Source file src/archive/tar/common.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package tar implements access to tar archives.
     6  //
     7  // Tape archives (tar) are a file format for storing a sequence of files that
     8  // can be read and written in a streaming manner.
     9  // This package aims to cover most variations of the format,
    10  // including those produced by GNU and BSD tar tools.
    11  package tar
    12  
    13  import (
    14  	"errors"
    15  	"fmt"
    16  	"internal/godebug"
    17  	"io/fs"
    18  	"maps"
    19  	"math"
    20  	"path"
    21  	"reflect"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  )
    26  
    27  // BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit
    28  // architectures. If a large value is encountered when decoding, the result
    29  // stored in Header will be the truncated version.
    30  
    31  var tarinsecurepath = godebug.New("tarinsecurepath")
    32  
    33  var (
    34  	ErrHeader          = errors.New("archive/tar: invalid tar header")
    35  	ErrWriteTooLong    = errors.New("archive/tar: write too long")
    36  	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
    37  	ErrWriteAfterClose = errors.New("archive/tar: write after close")
    38  	ErrInsecurePath    = errors.New("archive/tar: insecure file path")
    39  	errMissData        = errors.New("archive/tar: sparse file references non-existent data")
    40  	errUnrefData       = errors.New("archive/tar: sparse file contains unreferenced data")
    41  	errWriteHole       = errors.New("archive/tar: write non-NUL byte in sparse hole")
    42  	errSparseTooLong   = errors.New("archive/tar: sparse map too long")
    43  )
    44  
    45  type headerError []string
    46  
    47  func (he headerError) Error() string {
    48  	const prefix = "archive/tar: cannot encode header"
    49  	var ss []string
    50  	for _, s := range he {
    51  		if s != "" {
    52  			ss = append(ss, s)
    53  		}
    54  	}
    55  	if len(ss) == 0 {
    56  		return prefix
    57  	}
    58  	return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and "))
    59  }
    60  
    61  // Type flags for Header.Typeflag.
    62  const (
    63  	// Type '0' indicates a regular file.
    64  	TypeReg = '0'
    65  
    66  	// Deprecated: Use TypeReg instead.
    67  	TypeRegA = '\x00'
    68  
    69  	// Type '1' to '6' are header-only flags and may not have a data body.
    70  	TypeLink    = '1' // Hard link
    71  	TypeSymlink = '2' // Symbolic link
    72  	TypeChar    = '3' // Character device node
    73  	TypeBlock   = '4' // Block device node
    74  	TypeDir     = '5' // Directory
    75  	TypeFifo    = '6' // FIFO node
    76  
    77  	// Type '7' is reserved.
    78  	TypeCont = '7'
    79  
    80  	// Type 'x' is used by the PAX format to store key-value records that
    81  	// are only relevant to the next file.
    82  	// This package transparently handles these types.
    83  	TypeXHeader = 'x'
    84  
    85  	// Type 'g' is used by the PAX format to store key-value records that
    86  	// are relevant to all subsequent files.
    87  	// This package only supports parsing and composing such headers,
    88  	// but does not currently support persisting the global state across files.
    89  	TypeXGlobalHeader = 'g'
    90  
    91  	// Type 'S' indicates a sparse file in the GNU format.
    92  	TypeGNUSparse = 'S'
    93  
    94  	// Types 'L' and 'K' are used by the GNU format for a meta file
    95  	// used to store the path or link name for the next file.
    96  	// This package transparently handles these types.
    97  	TypeGNULongName = 'L'
    98  	TypeGNULongLink = 'K'
    99  )
   100  
   101  // Keywords for PAX extended header records.
   102  const (
   103  	paxNone     = "" // Indicates that no PAX key is suitable
   104  	paxPath     = "path"
   105  	paxLinkpath = "linkpath"
   106  	paxSize     = "size"
   107  	paxUid      = "uid"
   108  	paxGid      = "gid"
   109  	paxUname    = "uname"
   110  	paxGname    = "gname"
   111  	paxMtime    = "mtime"
   112  	paxAtime    = "atime"
   113  	paxCtime    = "ctime"   // Removed from later revision of PAX spec, but was valid
   114  	paxCharset  = "charset" // Currently unused
   115  	paxComment  = "comment" // Currently unused
   116  
   117  	paxSchilyXattr = "SCHILY.xattr."
   118  
   119  	// Keywords for GNU sparse files in a PAX extended header.
   120  	paxGNUSparse          = "GNU.sparse."
   121  	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
   122  	paxGNUSparseOffset    = "GNU.sparse.offset"
   123  	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
   124  	paxGNUSparseMap       = "GNU.sparse.map"
   125  	paxGNUSparseName      = "GNU.sparse.name"
   126  	paxGNUSparseMajor     = "GNU.sparse.major"
   127  	paxGNUSparseMinor     = "GNU.sparse.minor"
   128  	paxGNUSparseSize      = "GNU.sparse.size"
   129  	paxGNUSparseRealSize  = "GNU.sparse.realsize"
   130  )
   131  
   132  // basicKeys is a set of the PAX keys for which we have built-in support.
   133  // This does not contain "charset" or "comment", which are both PAX-specific,
   134  // so adding them as first-class features of Header is unlikely.
   135  // Users can use the PAXRecords field to set it themselves.
   136  var basicKeys = map[string]bool{
   137  	paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true,
   138  	paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true,
   139  }
   140  
   141  // A Header represents a single header in a tar archive.
   142  // Some fields may not be populated.
   143  //
   144  // For forward compatibility, users that retrieve a Header from Reader.Next,
   145  // mutate it in some ways, and then pass it back to Writer.WriteHeader
   146  // should do so by creating a new Header and copying the fields
   147  // that they are interested in preserving.
   148  type Header struct {
   149  	// Typeflag is the type of header entry.
   150  	// The zero value is automatically promoted to either TypeReg or TypeDir
   151  	// depending on the presence of a trailing slash in Name.
   152  	Typeflag byte
   153  
   154  	Name     string // Name of file entry
   155  	Linkname string // Target name of link (valid for TypeLink or TypeSymlink)
   156  
   157  	Size  int64  // Logical file size in bytes
   158  	Mode  int64  // Permission and mode bits
   159  	Uid   int    // User ID of owner
   160  	Gid   int    // Group ID of owner
   161  	Uname string // User name of owner
   162  	Gname string // Group name of owner
   163  
   164  	// If the Format is unspecified, then Writer.WriteHeader rounds ModTime
   165  	// to the nearest second and ignores the AccessTime and ChangeTime fields.
   166  	//
   167  	// To use AccessTime or ChangeTime, specify the Format as PAX or GNU.
   168  	// To use sub-second resolution, specify the Format as PAX.
   169  	ModTime    time.Time // Modification time
   170  	AccessTime time.Time // Access time (requires either PAX or GNU support)
   171  	ChangeTime time.Time // Change time (requires either PAX or GNU support)
   172  
   173  	Devmajor int64 // Major device number (valid for TypeChar or TypeBlock)
   174  	Devminor int64 // Minor device number (valid for TypeChar or TypeBlock)
   175  
   176  	// Xattrs stores extended attributes as PAX records under the
   177  	// "SCHILY.xattr." namespace.
   178  	//
   179  	// The following are semantically equivalent:
   180  	//  h.Xattrs[key] = value
   181  	//  h.PAXRecords["SCHILY.xattr."+key] = value
   182  	//
   183  	// When Writer.WriteHeader is called, the contents of Xattrs will take
   184  	// precedence over those in PAXRecords.
   185  	//
   186  	// Deprecated: Use PAXRecords instead.
   187  	Xattrs map[string]string
   188  
   189  	// PAXRecords is a map of PAX extended header records.
   190  	//
   191  	// User-defined records should have keys of the following form:
   192  	//	VENDOR.keyword
   193  	// Where VENDOR is some namespace in all uppercase, and keyword may
   194  	// not contain the '=' character (e.g., "GOLANG.pkg.version").
   195  	// The key and value should be non-empty UTF-8 strings.
   196  	//
   197  	// When Writer.WriteHeader is called, PAX records derived from the
   198  	// other fields in Header take precedence over PAXRecords.
   199  	PAXRecords map[string]string
   200  
   201  	// Format specifies the format of the tar header.
   202  	//
   203  	// This is set by Reader.Next as a best-effort guess at the format.
   204  	// Since the Reader liberally reads some non-compliant files,
   205  	// it is possible for this to be FormatUnknown.
   206  	//
   207  	// If the format is unspecified when Writer.WriteHeader is called,
   208  	// then it uses the first format (in the order of USTAR, PAX, GNU)
   209  	// capable of encoding this Header (see Format).
   210  	Format Format
   211  }
   212  
   213  // sparseEntry represents a Length-sized fragment at Offset in the file.
   214  type sparseEntry struct{ Offset, Length int64 }
   215  
   216  func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length }
   217  
   218  // A sparse file can be represented as either a sparseDatas or a sparseHoles.
   219  // As long as the total size is known, they are equivalent and one can be
   220  // converted to the other form and back. The various tar formats with sparse
   221  // file support represent sparse files in the sparseDatas form. That is, they
   222  // specify the fragments in the file that has data, and treat everything else as
   223  // having zero bytes. As such, the encoding and decoding logic in this package
   224  // deals with sparseDatas.
   225  //
   226  // However, the external API uses sparseHoles instead of sparseDatas because the
   227  // zero value of sparseHoles logically represents a normal file (i.e., there are
   228  // no holes in it). On the other hand, the zero value of sparseDatas implies
   229  // that the file has no data in it, which is rather odd.
   230  //
   231  // As an example, if the underlying raw file contains the 10-byte data:
   232  //
   233  //	var compactFile = "abcdefgh"
   234  //
   235  // And the sparse map has the following entries:
   236  //
   237  //	var spd sparseDatas = []sparseEntry{
   238  //		{Offset: 2,  Length: 5},  // Data fragment for 2..6
   239  //		{Offset: 18, Length: 3},  // Data fragment for 18..20
   240  //	}
   241  //	var sph sparseHoles = []sparseEntry{
   242  //		{Offset: 0,  Length: 2},  // Hole fragment for 0..1
   243  //		{Offset: 7,  Length: 11}, // Hole fragment for 7..17
   244  //		{Offset: 21, Length: 4},  // Hole fragment for 21..24
   245  //	}
   246  //
   247  // Then the content of the resulting sparse file with a Header.Size of 25 is:
   248  //
   249  //	var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
   250  type (
   251  	sparseDatas []sparseEntry
   252  	sparseHoles []sparseEntry
   253  )
   254  
   255  // validateSparseEntries reports whether sp is a valid sparse map.
   256  // It does not matter whether sp represents data fragments or hole fragments.
   257  func validateSparseEntries(sp []sparseEntry, size int64) bool {
   258  	// Validate all sparse entries. These are the same checks as performed by
   259  	// the BSD tar utility.
   260  	if size < 0 {
   261  		return false
   262  	}
   263  	var pre sparseEntry
   264  	for _, cur := range sp {
   265  		switch {
   266  		case cur.Offset < 0 || cur.Length < 0:
   267  			return false // Negative values are never okay
   268  		case cur.Offset > math.MaxInt64-cur.Length:
   269  			return false // Integer overflow with large length
   270  		case cur.endOffset() > size:
   271  			return false // Region extends beyond the actual size
   272  		case pre.endOffset() > cur.Offset:
   273  			return false // Regions cannot overlap and must be in order
   274  		}
   275  		pre = cur
   276  	}
   277  	return true
   278  }
   279  
   280  // alignSparseEntries mutates src and returns dst where each fragment's
   281  // starting offset is aligned up to the nearest block edge, and each
   282  // ending offset is aligned down to the nearest block edge.
   283  //
   284  // Even though the Go tar Reader and the BSD tar utility can handle entries
   285  // with arbitrary offsets and lengths, the GNU tar utility can only handle
   286  // offsets and lengths that are multiples of blockSize.
   287  func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry {
   288  	dst := src[:0]
   289  	for _, s := range src {
   290  		pos, end := s.Offset, s.endOffset()
   291  		pos += blockPadding(+pos) // Round-up to nearest blockSize
   292  		if end != size {
   293  			end -= blockPadding(-end) // Round-down to nearest blockSize
   294  		}
   295  		if pos < end {
   296  			dst = append(dst, sparseEntry{Offset: pos, Length: end - pos})
   297  		}
   298  	}
   299  	return dst
   300  }
   301  
   302  // invertSparseEntries converts a sparse map from one form to the other.
   303  // If the input is sparseHoles, then it will output sparseDatas and vice-versa.
   304  // The input must have been already validated.
   305  //
   306  // This function mutates src and returns a normalized map where:
   307  //   - adjacent fragments are coalesced together
   308  //   - only the last fragment may be empty
   309  //   - the endOffset of the last fragment is the total size
   310  func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry {
   311  	dst := src[:0]
   312  	var pre sparseEntry
   313  	for _, cur := range src {
   314  		if cur.Length == 0 {
   315  			continue // Skip empty fragments
   316  		}
   317  		pre.Length = cur.Offset - pre.Offset
   318  		if pre.Length > 0 {
   319  			dst = append(dst, pre) // Only add non-empty fragments
   320  		}
   321  		pre.Offset = cur.endOffset()
   322  	}
   323  	pre.Length = size - pre.Offset // Possibly the only empty fragment
   324  	return append(dst, pre)
   325  }
   326  
   327  // fileState tracks the number of logical (includes sparse holes) and physical
   328  // (actual in tar archive) bytes remaining for the current file.
   329  //
   330  // Invariant: logicalRemaining >= physicalRemaining
   331  type fileState interface {
   332  	logicalRemaining() int64
   333  	physicalRemaining() int64
   334  }
   335  
   336  // allowedFormats determines which formats can be used.
   337  // The value returned is the logical OR of multiple possible formats.
   338  // If the value is FormatUnknown, then the input Header cannot be encoded
   339  // and an error is returned explaining why.
   340  //
   341  // As a by-product of checking the fields, this function returns paxHdrs, which
   342  // contain all fields that could not be directly encoded.
   343  // A value receiver ensures that this method does not mutate the source Header.
   344  func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) {
   345  	format = FormatUSTAR | FormatPAX | FormatGNU
   346  	paxHdrs = make(map[string]string)
   347  
   348  	var whyNoUSTAR, whyNoPAX, whyNoGNU string
   349  	var preferPAX bool // Prefer PAX over USTAR
   350  	verifyString := func(s string, size int, name, paxKey string) {
   351  		// NUL-terminator is optional for path and linkpath.
   352  		// Technically, it is required for uname and gname,
   353  		// but neither GNU nor BSD tar checks for it.
   354  		tooLong := len(s) > size
   355  		allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath
   356  		if hasNUL(s) || (tooLong && !allowLongGNU) {
   357  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s)
   358  			format.mustNotBe(FormatGNU)
   359  		}
   360  		if !isASCII(s) || tooLong {
   361  			canSplitUSTAR := paxKey == paxPath
   362  			if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok {
   363  				whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s)
   364  				format.mustNotBe(FormatUSTAR)
   365  			}
   366  			if paxKey == paxNone {
   367  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s)
   368  				format.mustNotBe(FormatPAX)
   369  			} else {
   370  				paxHdrs[paxKey] = s
   371  			}
   372  		}
   373  		if v, ok := h.PAXRecords[paxKey]; ok && v == s {
   374  			paxHdrs[paxKey] = v
   375  		}
   376  	}
   377  	verifyNumeric := func(n int64, size int, name, paxKey string) {
   378  		if !fitsInBase256(size, n) {
   379  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n)
   380  			format.mustNotBe(FormatGNU)
   381  		}
   382  		if !fitsInOctal(size, n) {
   383  			whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n)
   384  			format.mustNotBe(FormatUSTAR)
   385  			if paxKey == paxNone {
   386  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n)
   387  				format.mustNotBe(FormatPAX)
   388  			} else {
   389  				paxHdrs[paxKey] = strconv.FormatInt(n, 10)
   390  			}
   391  		}
   392  		if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) {
   393  			paxHdrs[paxKey] = v
   394  		}
   395  	}
   396  	verifyTime := func(ts time.Time, size int, name, paxKey string) {
   397  		if ts.IsZero() {
   398  			return // Always okay
   399  		}
   400  		if !fitsInBase256(size, ts.Unix()) {
   401  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts)
   402  			format.mustNotBe(FormatGNU)
   403  		}
   404  		isMtime := paxKey == paxMtime
   405  		fitsOctal := fitsInOctal(size, ts.Unix())
   406  		if (isMtime && !fitsOctal) || !isMtime {
   407  			whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts)
   408  			format.mustNotBe(FormatUSTAR)
   409  		}
   410  		needsNano := ts.Nanosecond() != 0
   411  		if !isMtime || !fitsOctal || needsNano {
   412  			preferPAX = true // USTAR may truncate sub-second measurements
   413  			if paxKey == paxNone {
   414  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts)
   415  				format.mustNotBe(FormatPAX)
   416  			} else {
   417  				paxHdrs[paxKey] = formatPAXTime(ts)
   418  			}
   419  		}
   420  		if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) {
   421  			paxHdrs[paxKey] = v
   422  		}
   423  	}
   424  
   425  	// Check basic fields.
   426  	var blk block
   427  	v7 := blk.toV7()
   428  	ustar := blk.toUSTAR()
   429  	gnu := blk.toGNU()
   430  	verifyString(h.Name, len(v7.name()), "Name", paxPath)
   431  	verifyString(h.Linkname, len(v7.linkName()), "Linkname", paxLinkpath)
   432  	verifyString(h.Uname, len(ustar.userName()), "Uname", paxUname)
   433  	verifyString(h.Gname, len(ustar.groupName()), "Gname", paxGname)
   434  	verifyNumeric(h.Mode, len(v7.mode()), "Mode", paxNone)
   435  	verifyNumeric(int64(h.Uid), len(v7.uid()), "Uid", paxUid)
   436  	verifyNumeric(int64(h.Gid), len(v7.gid()), "Gid", paxGid)
   437  	verifyNumeric(h.Size, len(v7.size()), "Size", paxSize)
   438  	verifyNumeric(h.Devmajor, len(ustar.devMajor()), "Devmajor", paxNone)
   439  	verifyNumeric(h.Devminor, len(ustar.devMinor()), "Devminor", paxNone)
   440  	verifyTime(h.ModTime, len(v7.modTime()), "ModTime", paxMtime)
   441  	verifyTime(h.AccessTime, len(gnu.accessTime()), "AccessTime", paxAtime)
   442  	verifyTime(h.ChangeTime, len(gnu.changeTime()), "ChangeTime", paxCtime)
   443  
   444  	// Check for header-only types.
   445  	var whyOnlyPAX, whyOnlyGNU string
   446  	switch h.Typeflag {
   447  	case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse:
   448  		// Exclude TypeLink and TypeSymlink, since they may reference directories.
   449  		if strings.HasSuffix(h.Name, "/") {
   450  			return FormatUnknown, nil, headerError{"filename may not have trailing slash"}
   451  		}
   452  	case TypeXHeader, TypeGNULongName, TypeGNULongLink:
   453  		return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"}
   454  	case TypeXGlobalHeader:
   455  		h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format}
   456  		if !reflect.DeepEqual(h, h2) {
   457  			return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"}
   458  		}
   459  		whyOnlyPAX = "only PAX supports TypeXGlobalHeader"
   460  		format.mayOnlyBe(FormatPAX)
   461  	}
   462  	if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 {
   463  		return FormatUnknown, nil, headerError{"negative size on header-only type"}
   464  	}
   465  
   466  	// Check PAX records.
   467  	if len(h.Xattrs) > 0 {
   468  		for k, v := range h.Xattrs {
   469  			paxHdrs[paxSchilyXattr+k] = v
   470  		}
   471  		whyOnlyPAX = "only PAX supports Xattrs"
   472  		format.mayOnlyBe(FormatPAX)
   473  	}
   474  	if len(h.PAXRecords) > 0 {
   475  		for k, v := range h.PAXRecords {
   476  			switch _, exists := paxHdrs[k]; {
   477  			case exists:
   478  				continue // Do not overwrite existing records
   479  			case h.Typeflag == TypeXGlobalHeader:
   480  				paxHdrs[k] = v // Copy all records
   481  			case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse):
   482  				paxHdrs[k] = v // Ignore local records that may conflict
   483  			}
   484  		}
   485  		whyOnlyPAX = "only PAX supports PAXRecords"
   486  		format.mayOnlyBe(FormatPAX)
   487  	}
   488  	for k, v := range paxHdrs {
   489  		if !validPAXRecord(k, v) {
   490  			return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)}
   491  		}
   492  	}
   493  
   494  	// TODO(dsnet): Re-enable this when adding sparse support.
   495  	// See https://golang.org/issue/22735
   496  	/*
   497  		// Check sparse files.
   498  		if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
   499  			if isHeaderOnlyType(h.Typeflag) {
   500  				return FormatUnknown, nil, headerError{"header-only type cannot be sparse"}
   501  			}
   502  			if !validateSparseEntries(h.SparseHoles, h.Size) {
   503  				return FormatUnknown, nil, headerError{"invalid sparse holes"}
   504  			}
   505  			if h.Typeflag == TypeGNUSparse {
   506  				whyOnlyGNU = "only GNU supports TypeGNUSparse"
   507  				format.mayOnlyBe(FormatGNU)
   508  			} else {
   509  				whyNoGNU = "GNU supports sparse files only with TypeGNUSparse"
   510  				format.mustNotBe(FormatGNU)
   511  			}
   512  			whyNoUSTAR = "USTAR does not support sparse files"
   513  			format.mustNotBe(FormatUSTAR)
   514  		}
   515  	*/
   516  
   517  	// Check desired format.
   518  	if wantFormat := h.Format; wantFormat != FormatUnknown {
   519  		if wantFormat.has(FormatPAX) && !preferPAX {
   520  			wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too
   521  		}
   522  		format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted
   523  	}
   524  	if format == FormatUnknown {
   525  		switch h.Format {
   526  		case FormatUSTAR:
   527  			err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU}
   528  		case FormatPAX:
   529  			err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU}
   530  		case FormatGNU:
   531  			err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX}
   532  		default:
   533  			err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU}
   534  		}
   535  	}
   536  	return format, paxHdrs, err
   537  }
   538  
   539  // FileInfo returns an fs.FileInfo for the Header.
   540  func (h *Header) FileInfo() fs.FileInfo {
   541  	return headerFileInfo{h}
   542  }
   543  
   544  // headerFileInfo implements fs.FileInfo.
   545  type headerFileInfo struct {
   546  	h *Header
   547  }
   548  
   549  func (fi headerFileInfo) Size() int64        { return fi.h.Size }
   550  func (fi headerFileInfo) IsDir() bool        { return fi.Mode().IsDir() }
   551  func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime }
   552  func (fi headerFileInfo) Sys() any           { return fi.h }
   553  
   554  // Name returns the base name of the file.
   555  func (fi headerFileInfo) Name() string {
   556  	if fi.IsDir() {
   557  		return path.Base(path.Clean(fi.h.Name))
   558  	}
   559  	return path.Base(fi.h.Name)
   560  }
   561  
   562  // Mode returns the permission and mode bits for the headerFileInfo.
   563  func (fi headerFileInfo) Mode() (mode fs.FileMode) {
   564  	// Set file permission bits.
   565  	mode = fs.FileMode(fi.h.Mode).Perm()
   566  
   567  	// Set setuid, setgid and sticky bits.
   568  	if fi.h.Mode&c_ISUID != 0 {
   569  		mode |= fs.ModeSetuid
   570  	}
   571  	if fi.h.Mode&c_ISGID != 0 {
   572  		mode |= fs.ModeSetgid
   573  	}
   574  	if fi.h.Mode&c_ISVTX != 0 {
   575  		mode |= fs.ModeSticky
   576  	}
   577  
   578  	// Set file mode bits; clear perm, setuid, setgid, and sticky bits.
   579  	switch m := fs.FileMode(fi.h.Mode) &^ 07777; m {
   580  	case c_ISDIR:
   581  		mode |= fs.ModeDir
   582  	case c_ISFIFO:
   583  		mode |= fs.ModeNamedPipe
   584  	case c_ISLNK:
   585  		mode |= fs.ModeSymlink
   586  	case c_ISBLK:
   587  		mode |= fs.ModeDevice
   588  	case c_ISCHR:
   589  		mode |= fs.ModeDevice
   590  		mode |= fs.ModeCharDevice
   591  	case c_ISSOCK:
   592  		mode |= fs.ModeSocket
   593  	}
   594  
   595  	switch fi.h.Typeflag {
   596  	case TypeSymlink:
   597  		mode |= fs.ModeSymlink
   598  	case TypeChar:
   599  		mode |= fs.ModeDevice
   600  		mode |= fs.ModeCharDevice
   601  	case TypeBlock:
   602  		mode |= fs.ModeDevice
   603  	case TypeDir:
   604  		mode |= fs.ModeDir
   605  	case TypeFifo:
   606  		mode |= fs.ModeNamedPipe
   607  	}
   608  
   609  	return mode
   610  }
   611  
   612  func (fi headerFileInfo) String() string {
   613  	return fs.FormatFileInfo(fi)
   614  }
   615  
   616  // sysStat, if non-nil, populates h from system-dependent fields of fi.
   617  var sysStat func(fi fs.FileInfo, h *Header, doNameLookups bool) error
   618  
   619  const (
   620  	// Mode constants from the USTAR spec:
   621  	// See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
   622  	c_ISUID = 04000 // Set uid
   623  	c_ISGID = 02000 // Set gid
   624  	c_ISVTX = 01000 // Save text (sticky bit)
   625  
   626  	// Common Unix mode constants; these are not defined in any common tar standard.
   627  	// Header.FileInfo understands these, but FileInfoHeader will never produce these.
   628  	c_ISDIR  = 040000  // Directory
   629  	c_ISFIFO = 010000  // FIFO
   630  	c_ISREG  = 0100000 // Regular file
   631  	c_ISLNK  = 0120000 // Symbolic link
   632  	c_ISBLK  = 060000  // Block special file
   633  	c_ISCHR  = 020000  // Character special file
   634  	c_ISSOCK = 0140000 // Socket
   635  )
   636  
   637  // FileInfoHeader creates a partially-populated [Header] from fi.
   638  // If fi describes a symlink, FileInfoHeader records link as the link target.
   639  // If fi describes a directory, a slash is appended to the name.
   640  //
   641  // Since fs.FileInfo's Name method only returns the base name of
   642  // the file it describes, it may be necessary to modify Header.Name
   643  // to provide the full path name of the file.
   644  //
   645  // If fi implements [FileInfoNames]
   646  // Header.Gname and Header.Uname
   647  // are provided by the methods of the interface.
   648  func FileInfoHeader(fi fs.FileInfo, link string) (*Header, error) {
   649  	if fi == nil {
   650  		return nil, errors.New("archive/tar: FileInfo is nil")
   651  	}
   652  	fm := fi.Mode()
   653  	h := &Header{
   654  		Name:    fi.Name(),
   655  		ModTime: fi.ModTime(),
   656  		Mode:    int64(fm.Perm()), // or'd with c_IS* constants later
   657  	}
   658  	switch {
   659  	case fm.IsRegular():
   660  		h.Typeflag = TypeReg
   661  		h.Size = fi.Size()
   662  	case fi.IsDir():
   663  		h.Typeflag = TypeDir
   664  		h.Name += "/"
   665  	case fm&fs.ModeSymlink != 0:
   666  		h.Typeflag = TypeSymlink
   667  		h.Linkname = link
   668  	case fm&fs.ModeDevice != 0:
   669  		if fm&fs.ModeCharDevice != 0 {
   670  			h.Typeflag = TypeChar
   671  		} else {
   672  			h.Typeflag = TypeBlock
   673  		}
   674  	case fm&fs.ModeNamedPipe != 0:
   675  		h.Typeflag = TypeFifo
   676  	case fm&fs.ModeSocket != 0:
   677  		return nil, fmt.Errorf("archive/tar: sockets not supported")
   678  	default:
   679  		return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm)
   680  	}
   681  	if fm&fs.ModeSetuid != 0 {
   682  		h.Mode |= c_ISUID
   683  	}
   684  	if fm&fs.ModeSetgid != 0 {
   685  		h.Mode |= c_ISGID
   686  	}
   687  	if fm&fs.ModeSticky != 0 {
   688  		h.Mode |= c_ISVTX
   689  	}
   690  	// If possible, populate additional fields from OS-specific
   691  	// FileInfo fields.
   692  	if sys, ok := fi.Sys().(*Header); ok {
   693  		// This FileInfo came from a Header (not the OS). Use the
   694  		// original Header to populate all remaining fields.
   695  		h.Uid = sys.Uid
   696  		h.Gid = sys.Gid
   697  		h.Uname = sys.Uname
   698  		h.Gname = sys.Gname
   699  		h.AccessTime = sys.AccessTime
   700  		h.ChangeTime = sys.ChangeTime
   701  		h.Xattrs = maps.Clone(sys.Xattrs)
   702  		if sys.Typeflag == TypeLink {
   703  			// hard link
   704  			h.Typeflag = TypeLink
   705  			h.Size = 0
   706  			h.Linkname = sys.Linkname
   707  		}
   708  		h.PAXRecords = maps.Clone(sys.PAXRecords)
   709  	}
   710  	var doNameLookups = true
   711  	if iface, ok := fi.(FileInfoNames); ok {
   712  		doNameLookups = false
   713  		var err error
   714  		h.Gname, err = iface.Gname()
   715  		if err != nil {
   716  			return nil, err
   717  		}
   718  		h.Uname, err = iface.Uname()
   719  		if err != nil {
   720  			return nil, err
   721  		}
   722  	}
   723  	if sysStat != nil {
   724  		return h, sysStat(fi, h, doNameLookups)
   725  	}
   726  	return h, nil
   727  }
   728  
   729  // FileInfoNames extends [fs.FileInfo].
   730  // Passing an instance of this to [FileInfoHeader] permits the caller
   731  // to avoid a system-dependent name lookup by specifying the Uname and Gname directly.
   732  type FileInfoNames interface {
   733  	fs.FileInfo
   734  	// Uname should give a user name.
   735  	Uname() (string, error)
   736  	// Gname should give a group name.
   737  	Gname() (string, error)
   738  }
   739  
   740  // isHeaderOnlyType checks if the given type flag is of the type that has no
   741  // data section even if a size is specified.
   742  func isHeaderOnlyType(flag byte) bool {
   743  	switch flag {
   744  	case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
   745  		return true
   746  	default:
   747  		return false
   748  	}
   749  }
   750  

View as plain text