Source file src/archive/tar/common.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package tar implements access to tar archives.
     6  //
     7  // Tape archives (tar) are a file format for storing a sequence of files that
     8  // can be read and written in a streaming manner.
     9  // This package aims to cover most variations of the format,
    10  // including those produced by GNU and BSD tar tools.
    11  package tar
    12  
    13  import (
    14  	"errors"
    15  	"fmt"
    16  	"internal/godebug"
    17  	"io/fs"
    18  	"maps"
    19  	"math"
    20  	"path"
    21  	"reflect"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  )
    26  
    27  // BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit
    28  // architectures. If a large value is encountered when decoding, the result
    29  // stored in Header will be the truncated version.
    30  
    31  var tarinsecurepath = godebug.New("tarinsecurepath")
    32  
    33  var (
    34  	ErrHeader          = errors.New("archive/tar: invalid tar header")
    35  	ErrWriteTooLong    = errors.New("archive/tar: write too long")
    36  	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
    37  	ErrWriteAfterClose = errors.New("archive/tar: write after close")
    38  	ErrInsecurePath    = errors.New("archive/tar: insecure file path")
    39  	errMissData        = errors.New("archive/tar: sparse file references non-existent data")
    40  	errUnrefData       = errors.New("archive/tar: sparse file contains unreferenced data")
    41  	errWriteHole       = errors.New("archive/tar: write non-NUL byte in sparse hole")
    42  )
    43  
    44  type headerError []string
    45  
    46  func (he headerError) Error() string {
    47  	const prefix = "archive/tar: cannot encode header"
    48  	var ss []string
    49  	for _, s := range he {
    50  		if s != "" {
    51  			ss = append(ss, s)
    52  		}
    53  	}
    54  	if len(ss) == 0 {
    55  		return prefix
    56  	}
    57  	return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and "))
    58  }
    59  
    60  // Type flags for Header.Typeflag.
    61  const (
    62  	// Type '0' indicates a regular file.
    63  	TypeReg = '0'
    64  
    65  	// Deprecated: Use TypeReg instead.
    66  	TypeRegA = '\x00'
    67  
    68  	// Type '1' to '6' are header-only flags and may not have a data body.
    69  	TypeLink    = '1' // Hard link
    70  	TypeSymlink = '2' // Symbolic link
    71  	TypeChar    = '3' // Character device node
    72  	TypeBlock   = '4' // Block device node
    73  	TypeDir     = '5' // Directory
    74  	TypeFifo    = '6' // FIFO node
    75  
    76  	// Type '7' is reserved.
    77  	TypeCont = '7'
    78  
    79  	// Type 'x' is used by the PAX format to store key-value records that
    80  	// are only relevant to the next file.
    81  	// This package transparently handles these types.
    82  	TypeXHeader = 'x'
    83  
    84  	// Type 'g' is used by the PAX format to store key-value records that
    85  	// are relevant to all subsequent files.
    86  	// This package only supports parsing and composing such headers,
    87  	// but does not currently support persisting the global state across files.
    88  	TypeXGlobalHeader = 'g'
    89  
    90  	// Type 'S' indicates a sparse file in the GNU format.
    91  	TypeGNUSparse = 'S'
    92  
    93  	// Types 'L' and 'K' are used by the GNU format for a meta file
    94  	// used to store the path or link name for the next file.
    95  	// This package transparently handles these types.
    96  	TypeGNULongName = 'L'
    97  	TypeGNULongLink = 'K'
    98  )
    99  
   100  // Keywords for PAX extended header records.
   101  const (
   102  	paxNone     = "" // Indicates that no PAX key is suitable
   103  	paxPath     = "path"
   104  	paxLinkpath = "linkpath"
   105  	paxSize     = "size"
   106  	paxUid      = "uid"
   107  	paxGid      = "gid"
   108  	paxUname    = "uname"
   109  	paxGname    = "gname"
   110  	paxMtime    = "mtime"
   111  	paxAtime    = "atime"
   112  	paxCtime    = "ctime"   // Removed from later revision of PAX spec, but was valid
   113  	paxCharset  = "charset" // Currently unused
   114  	paxComment  = "comment" // Currently unused
   115  
   116  	paxSchilyXattr = "SCHILY.xattr."
   117  
   118  	// Keywords for GNU sparse files in a PAX extended header.
   119  	paxGNUSparse          = "GNU.sparse."
   120  	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
   121  	paxGNUSparseOffset    = "GNU.sparse.offset"
   122  	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
   123  	paxGNUSparseMap       = "GNU.sparse.map"
   124  	paxGNUSparseName      = "GNU.sparse.name"
   125  	paxGNUSparseMajor     = "GNU.sparse.major"
   126  	paxGNUSparseMinor     = "GNU.sparse.minor"
   127  	paxGNUSparseSize      = "GNU.sparse.size"
   128  	paxGNUSparseRealSize  = "GNU.sparse.realsize"
   129  )
   130  
   131  // basicKeys is a set of the PAX keys for which we have built-in support.
   132  // This does not contain "charset" or "comment", which are both PAX-specific,
   133  // so adding them as first-class features of Header is unlikely.
   134  // Users can use the PAXRecords field to set it themselves.
   135  var basicKeys = map[string]bool{
   136  	paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true,
   137  	paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true,
   138  }
   139  
   140  // A Header represents a single header in a tar archive.
   141  // Some fields may not be populated.
   142  //
   143  // For forward compatibility, users that retrieve a Header from Reader.Next,
   144  // mutate it in some ways, and then pass it back to Writer.WriteHeader
   145  // should do so by creating a new Header and copying the fields
   146  // that they are interested in preserving.
   147  type Header struct {
   148  	// Typeflag is the type of header entry.
   149  	// The zero value is automatically promoted to either TypeReg or TypeDir
   150  	// depending on the presence of a trailing slash in Name.
   151  	Typeflag byte
   152  
   153  	Name     string // Name of file entry
   154  	Linkname string // Target name of link (valid for TypeLink or TypeSymlink)
   155  
   156  	Size  int64  // Logical file size in bytes
   157  	Mode  int64  // Permission and mode bits
   158  	Uid   int    // User ID of owner
   159  	Gid   int    // Group ID of owner
   160  	Uname string // User name of owner
   161  	Gname string // Group name of owner
   162  
   163  	// If the Format is unspecified, then Writer.WriteHeader rounds ModTime
   164  	// to the nearest second and ignores the AccessTime and ChangeTime fields.
   165  	//
   166  	// To use AccessTime or ChangeTime, specify the Format as PAX or GNU.
   167  	// To use sub-second resolution, specify the Format as PAX.
   168  	ModTime    time.Time // Modification time
   169  	AccessTime time.Time // Access time (requires either PAX or GNU support)
   170  	ChangeTime time.Time // Change time (requires either PAX or GNU support)
   171  
   172  	Devmajor int64 // Major device number (valid for TypeChar or TypeBlock)
   173  	Devminor int64 // Minor device number (valid for TypeChar or TypeBlock)
   174  
   175  	// Xattrs stores extended attributes as PAX records under the
   176  	// "SCHILY.xattr." namespace.
   177  	//
   178  	// The following are semantically equivalent:
   179  	//  h.Xattrs[key] = value
   180  	//  h.PAXRecords["SCHILY.xattr."+key] = value
   181  	//
   182  	// When Writer.WriteHeader is called, the contents of Xattrs will take
   183  	// precedence over those in PAXRecords.
   184  	//
   185  	// Deprecated: Use PAXRecords instead.
   186  	Xattrs map[string]string
   187  
   188  	// PAXRecords is a map of PAX extended header records.
   189  	//
   190  	// User-defined records should have keys of the following form:
   191  	//	VENDOR.keyword
   192  	// Where VENDOR is some namespace in all uppercase, and keyword may
   193  	// not contain the '=' character (e.g., "GOLANG.pkg.version").
   194  	// The key and value should be non-empty UTF-8 strings.
   195  	//
   196  	// When Writer.WriteHeader is called, PAX records derived from the
   197  	// other fields in Header take precedence over PAXRecords.
   198  	PAXRecords map[string]string
   199  
   200  	// Format specifies the format of the tar header.
   201  	//
   202  	// This is set by Reader.Next as a best-effort guess at the format.
   203  	// Since the Reader liberally reads some non-compliant files,
   204  	// it is possible for this to be FormatUnknown.
   205  	//
   206  	// If the format is unspecified when Writer.WriteHeader is called,
   207  	// then it uses the first format (in the order of USTAR, PAX, GNU)
   208  	// capable of encoding this Header (see Format).
   209  	Format Format
   210  }
   211  
   212  // sparseEntry represents a Length-sized fragment at Offset in the file.
   213  type sparseEntry struct{ Offset, Length int64 }
   214  
   215  func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length }
   216  
   217  // A sparse file can be represented as either a sparseDatas or a sparseHoles.
   218  // As long as the total size is known, they are equivalent and one can be
   219  // converted to the other form and back. The various tar formats with sparse
   220  // file support represent sparse files in the sparseDatas form. That is, they
   221  // specify the fragments in the file that has data, and treat everything else as
   222  // having zero bytes. As such, the encoding and decoding logic in this package
   223  // deals with sparseDatas.
   224  //
   225  // However, the external API uses sparseHoles instead of sparseDatas because the
   226  // zero value of sparseHoles logically represents a normal file (i.e., there are
   227  // no holes in it). On the other hand, the zero value of sparseDatas implies
   228  // that the file has no data in it, which is rather odd.
   229  //
   230  // As an example, if the underlying raw file contains the 10-byte data:
   231  //
   232  //	var compactFile = "abcdefgh"
   233  //
   234  // And the sparse map has the following entries:
   235  //
   236  //	var spd sparseDatas = []sparseEntry{
   237  //		{Offset: 2,  Length: 5},  // Data fragment for 2..6
   238  //		{Offset: 18, Length: 3},  // Data fragment for 18..20
   239  //	}
   240  //	var sph sparseHoles = []sparseEntry{
   241  //		{Offset: 0,  Length: 2},  // Hole fragment for 0..1
   242  //		{Offset: 7,  Length: 11}, // Hole fragment for 7..17
   243  //		{Offset: 21, Length: 4},  // Hole fragment for 21..24
   244  //	}
   245  //
   246  // Then the content of the resulting sparse file with a Header.Size of 25 is:
   247  //
   248  //	var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
   249  type (
   250  	sparseDatas []sparseEntry
   251  	sparseHoles []sparseEntry
   252  )
   253  
   254  // validateSparseEntries reports whether sp is a valid sparse map.
   255  // It does not matter whether sp represents data fragments or hole fragments.
   256  func validateSparseEntries(sp []sparseEntry, size int64) bool {
   257  	// Validate all sparse entries. These are the same checks as performed by
   258  	// the BSD tar utility.
   259  	if size < 0 {
   260  		return false
   261  	}
   262  	var pre sparseEntry
   263  	for _, cur := range sp {
   264  		switch {
   265  		case cur.Offset < 0 || cur.Length < 0:
   266  			return false // Negative values are never okay
   267  		case cur.Offset > math.MaxInt64-cur.Length:
   268  			return false // Integer overflow with large length
   269  		case cur.endOffset() > size:
   270  			return false // Region extends beyond the actual size
   271  		case pre.endOffset() > cur.Offset:
   272  			return false // Regions cannot overlap and must be in order
   273  		}
   274  		pre = cur
   275  	}
   276  	return true
   277  }
   278  
   279  // alignSparseEntries mutates src and returns dst where each fragment's
   280  // starting offset is aligned up to the nearest block edge, and each
   281  // ending offset is aligned down to the nearest block edge.
   282  //
   283  // Even though the Go tar Reader and the BSD tar utility can handle entries
   284  // with arbitrary offsets and lengths, the GNU tar utility can only handle
   285  // offsets and lengths that are multiples of blockSize.
   286  func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry {
   287  	dst := src[:0]
   288  	for _, s := range src {
   289  		pos, end := s.Offset, s.endOffset()
   290  		pos += blockPadding(+pos) // Round-up to nearest blockSize
   291  		if end != size {
   292  			end -= blockPadding(-end) // Round-down to nearest blockSize
   293  		}
   294  		if pos < end {
   295  			dst = append(dst, sparseEntry{Offset: pos, Length: end - pos})
   296  		}
   297  	}
   298  	return dst
   299  }
   300  
   301  // invertSparseEntries converts a sparse map from one form to the other.
   302  // If the input is sparseHoles, then it will output sparseDatas and vice-versa.
   303  // The input must have been already validated.
   304  //
   305  // This function mutates src and returns a normalized map where:
   306  //   - adjacent fragments are coalesced together
   307  //   - only the last fragment may be empty
   308  //   - the endOffset of the last fragment is the total size
   309  func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry {
   310  	dst := src[:0]
   311  	var pre sparseEntry
   312  	for _, cur := range src {
   313  		if cur.Length == 0 {
   314  			continue // Skip empty fragments
   315  		}
   316  		pre.Length = cur.Offset - pre.Offset
   317  		if pre.Length > 0 {
   318  			dst = append(dst, pre) // Only add non-empty fragments
   319  		}
   320  		pre.Offset = cur.endOffset()
   321  	}
   322  	pre.Length = size - pre.Offset // Possibly the only empty fragment
   323  	return append(dst, pre)
   324  }
   325  
   326  // fileState tracks the number of logical (includes sparse holes) and physical
   327  // (actual in tar archive) bytes remaining for the current file.
   328  //
   329  // Invariant: logicalRemaining >= physicalRemaining
   330  type fileState interface {
   331  	logicalRemaining() int64
   332  	physicalRemaining() int64
   333  }
   334  
   335  // allowedFormats determines which formats can be used.
   336  // The value returned is the logical OR of multiple possible formats.
   337  // If the value is FormatUnknown, then the input Header cannot be encoded
   338  // and an error is returned explaining why.
   339  //
   340  // As a by-product of checking the fields, this function returns paxHdrs, which
   341  // contain all fields that could not be directly encoded.
   342  // A value receiver ensures that this method does not mutate the source Header.
   343  func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) {
   344  	format = FormatUSTAR | FormatPAX | FormatGNU
   345  	paxHdrs = make(map[string]string)
   346  
   347  	var whyNoUSTAR, whyNoPAX, whyNoGNU string
   348  	var preferPAX bool // Prefer PAX over USTAR
   349  	verifyString := func(s string, size int, name, paxKey string) {
   350  		// NUL-terminator is optional for path and linkpath.
   351  		// Technically, it is required for uname and gname,
   352  		// but neither GNU nor BSD tar checks for it.
   353  		tooLong := len(s) > size
   354  		allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath
   355  		if hasNUL(s) || (tooLong && !allowLongGNU) {
   356  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s)
   357  			format.mustNotBe(FormatGNU)
   358  		}
   359  		if !isASCII(s) || tooLong {
   360  			canSplitUSTAR := paxKey == paxPath
   361  			if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok {
   362  				whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s)
   363  				format.mustNotBe(FormatUSTAR)
   364  			}
   365  			if paxKey == paxNone {
   366  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s)
   367  				format.mustNotBe(FormatPAX)
   368  			} else {
   369  				paxHdrs[paxKey] = s
   370  			}
   371  		}
   372  		if v, ok := h.PAXRecords[paxKey]; ok && v == s {
   373  			paxHdrs[paxKey] = v
   374  		}
   375  	}
   376  	verifyNumeric := func(n int64, size int, name, paxKey string) {
   377  		if !fitsInBase256(size, n) {
   378  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n)
   379  			format.mustNotBe(FormatGNU)
   380  		}
   381  		if !fitsInOctal(size, n) {
   382  			whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n)
   383  			format.mustNotBe(FormatUSTAR)
   384  			if paxKey == paxNone {
   385  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n)
   386  				format.mustNotBe(FormatPAX)
   387  			} else {
   388  				paxHdrs[paxKey] = strconv.FormatInt(n, 10)
   389  			}
   390  		}
   391  		if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) {
   392  			paxHdrs[paxKey] = v
   393  		}
   394  	}
   395  	verifyTime := func(ts time.Time, size int, name, paxKey string) {
   396  		if ts.IsZero() {
   397  			return // Always okay
   398  		}
   399  		if !fitsInBase256(size, ts.Unix()) {
   400  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts)
   401  			format.mustNotBe(FormatGNU)
   402  		}
   403  		isMtime := paxKey == paxMtime
   404  		fitsOctal := fitsInOctal(size, ts.Unix())
   405  		if (isMtime && !fitsOctal) || !isMtime {
   406  			whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts)
   407  			format.mustNotBe(FormatUSTAR)
   408  		}
   409  		needsNano := ts.Nanosecond() != 0
   410  		if !isMtime || !fitsOctal || needsNano {
   411  			preferPAX = true // USTAR may truncate sub-second measurements
   412  			if paxKey == paxNone {
   413  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts)
   414  				format.mustNotBe(FormatPAX)
   415  			} else {
   416  				paxHdrs[paxKey] = formatPAXTime(ts)
   417  			}
   418  		}
   419  		if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) {
   420  			paxHdrs[paxKey] = v
   421  		}
   422  	}
   423  
   424  	// Check basic fields.
   425  	var blk block
   426  	v7 := blk.toV7()
   427  	ustar := blk.toUSTAR()
   428  	gnu := blk.toGNU()
   429  	verifyString(h.Name, len(v7.name()), "Name", paxPath)
   430  	verifyString(h.Linkname, len(v7.linkName()), "Linkname", paxLinkpath)
   431  	verifyString(h.Uname, len(ustar.userName()), "Uname", paxUname)
   432  	verifyString(h.Gname, len(ustar.groupName()), "Gname", paxGname)
   433  	verifyNumeric(h.Mode, len(v7.mode()), "Mode", paxNone)
   434  	verifyNumeric(int64(h.Uid), len(v7.uid()), "Uid", paxUid)
   435  	verifyNumeric(int64(h.Gid), len(v7.gid()), "Gid", paxGid)
   436  	verifyNumeric(h.Size, len(v7.size()), "Size", paxSize)
   437  	verifyNumeric(h.Devmajor, len(ustar.devMajor()), "Devmajor", paxNone)
   438  	verifyNumeric(h.Devminor, len(ustar.devMinor()), "Devminor", paxNone)
   439  	verifyTime(h.ModTime, len(v7.modTime()), "ModTime", paxMtime)
   440  	verifyTime(h.AccessTime, len(gnu.accessTime()), "AccessTime", paxAtime)
   441  	verifyTime(h.ChangeTime, len(gnu.changeTime()), "ChangeTime", paxCtime)
   442  
   443  	// Check for header-only types.
   444  	var whyOnlyPAX, whyOnlyGNU string
   445  	switch h.Typeflag {
   446  	case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse:
   447  		// Exclude TypeLink and TypeSymlink, since they may reference directories.
   448  		if strings.HasSuffix(h.Name, "/") {
   449  			return FormatUnknown, nil, headerError{"filename may not have trailing slash"}
   450  		}
   451  	case TypeXHeader, TypeGNULongName, TypeGNULongLink:
   452  		return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"}
   453  	case TypeXGlobalHeader:
   454  		h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format}
   455  		if !reflect.DeepEqual(h, h2) {
   456  			return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"}
   457  		}
   458  		whyOnlyPAX = "only PAX supports TypeXGlobalHeader"
   459  		format.mayOnlyBe(FormatPAX)
   460  	}
   461  	if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 {
   462  		return FormatUnknown, nil, headerError{"negative size on header-only type"}
   463  	}
   464  
   465  	// Check PAX records.
   466  	if len(h.Xattrs) > 0 {
   467  		for k, v := range h.Xattrs {
   468  			paxHdrs[paxSchilyXattr+k] = v
   469  		}
   470  		whyOnlyPAX = "only PAX supports Xattrs"
   471  		format.mayOnlyBe(FormatPAX)
   472  	}
   473  	if len(h.PAXRecords) > 0 {
   474  		for k, v := range h.PAXRecords {
   475  			switch _, exists := paxHdrs[k]; {
   476  			case exists:
   477  				continue // Do not overwrite existing records
   478  			case h.Typeflag == TypeXGlobalHeader:
   479  				paxHdrs[k] = v // Copy all records
   480  			case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse):
   481  				paxHdrs[k] = v // Ignore local records that may conflict
   482  			}
   483  		}
   484  		whyOnlyPAX = "only PAX supports PAXRecords"
   485  		format.mayOnlyBe(FormatPAX)
   486  	}
   487  	for k, v := range paxHdrs {
   488  		if !validPAXRecord(k, v) {
   489  			return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)}
   490  		}
   491  	}
   492  
   493  	// TODO(dsnet): Re-enable this when adding sparse support.
   494  	// See https://golang.org/issue/22735
   495  	/*
   496  		// Check sparse files.
   497  		if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
   498  			if isHeaderOnlyType(h.Typeflag) {
   499  				return FormatUnknown, nil, headerError{"header-only type cannot be sparse"}
   500  			}
   501  			if !validateSparseEntries(h.SparseHoles, h.Size) {
   502  				return FormatUnknown, nil, headerError{"invalid sparse holes"}
   503  			}
   504  			if h.Typeflag == TypeGNUSparse {
   505  				whyOnlyGNU = "only GNU supports TypeGNUSparse"
   506  				format.mayOnlyBe(FormatGNU)
   507  			} else {
   508  				whyNoGNU = "GNU supports sparse files only with TypeGNUSparse"
   509  				format.mustNotBe(FormatGNU)
   510  			}
   511  			whyNoUSTAR = "USTAR does not support sparse files"
   512  			format.mustNotBe(FormatUSTAR)
   513  		}
   514  	*/
   515  
   516  	// Check desired format.
   517  	if wantFormat := h.Format; wantFormat != FormatUnknown {
   518  		if wantFormat.has(FormatPAX) && !preferPAX {
   519  			wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too
   520  		}
   521  		format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted
   522  	}
   523  	if format == FormatUnknown {
   524  		switch h.Format {
   525  		case FormatUSTAR:
   526  			err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU}
   527  		case FormatPAX:
   528  			err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU}
   529  		case FormatGNU:
   530  			err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX}
   531  		default:
   532  			err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU}
   533  		}
   534  	}
   535  	return format, paxHdrs, err
   536  }
   537  
   538  // FileInfo returns an fs.FileInfo for the Header.
   539  func (h *Header) FileInfo() fs.FileInfo {
   540  	return headerFileInfo{h}
   541  }
   542  
   543  // headerFileInfo implements fs.FileInfo.
   544  type headerFileInfo struct {
   545  	h *Header
   546  }
   547  
   548  func (fi headerFileInfo) Size() int64        { return fi.h.Size }
   549  func (fi headerFileInfo) IsDir() bool        { return fi.Mode().IsDir() }
   550  func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime }
   551  func (fi headerFileInfo) Sys() any           { return fi.h }
   552  
   553  // Name returns the base name of the file.
   554  func (fi headerFileInfo) Name() string {
   555  	if fi.IsDir() {
   556  		return path.Base(path.Clean(fi.h.Name))
   557  	}
   558  	return path.Base(fi.h.Name)
   559  }
   560  
   561  // Mode returns the permission and mode bits for the headerFileInfo.
   562  func (fi headerFileInfo) Mode() (mode fs.FileMode) {
   563  	// Set file permission bits.
   564  	mode = fs.FileMode(fi.h.Mode).Perm()
   565  
   566  	// Set setuid, setgid and sticky bits.
   567  	if fi.h.Mode&c_ISUID != 0 {
   568  		mode |= fs.ModeSetuid
   569  	}
   570  	if fi.h.Mode&c_ISGID != 0 {
   571  		mode |= fs.ModeSetgid
   572  	}
   573  	if fi.h.Mode&c_ISVTX != 0 {
   574  		mode |= fs.ModeSticky
   575  	}
   576  
   577  	// Set file mode bits; clear perm, setuid, setgid, and sticky bits.
   578  	switch m := fs.FileMode(fi.h.Mode) &^ 07777; m {
   579  	case c_ISDIR:
   580  		mode |= fs.ModeDir
   581  	case c_ISFIFO:
   582  		mode |= fs.ModeNamedPipe
   583  	case c_ISLNK:
   584  		mode |= fs.ModeSymlink
   585  	case c_ISBLK:
   586  		mode |= fs.ModeDevice
   587  	case c_ISCHR:
   588  		mode |= fs.ModeDevice
   589  		mode |= fs.ModeCharDevice
   590  	case c_ISSOCK:
   591  		mode |= fs.ModeSocket
   592  	}
   593  
   594  	switch fi.h.Typeflag {
   595  	case TypeSymlink:
   596  		mode |= fs.ModeSymlink
   597  	case TypeChar:
   598  		mode |= fs.ModeDevice
   599  		mode |= fs.ModeCharDevice
   600  	case TypeBlock:
   601  		mode |= fs.ModeDevice
   602  	case TypeDir:
   603  		mode |= fs.ModeDir
   604  	case TypeFifo:
   605  		mode |= fs.ModeNamedPipe
   606  	}
   607  
   608  	return mode
   609  }
   610  
   611  func (fi headerFileInfo) String() string {
   612  	return fs.FormatFileInfo(fi)
   613  }
   614  
   615  // sysStat, if non-nil, populates h from system-dependent fields of fi.
   616  var sysStat func(fi fs.FileInfo, h *Header, doNameLookups bool) error
   617  
   618  const (
   619  	// Mode constants from the USTAR spec:
   620  	// See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
   621  	c_ISUID = 04000 // Set uid
   622  	c_ISGID = 02000 // Set gid
   623  	c_ISVTX = 01000 // Save text (sticky bit)
   624  
   625  	// Common Unix mode constants; these are not defined in any common tar standard.
   626  	// Header.FileInfo understands these, but FileInfoHeader will never produce these.
   627  	c_ISDIR  = 040000  // Directory
   628  	c_ISFIFO = 010000  // FIFO
   629  	c_ISREG  = 0100000 // Regular file
   630  	c_ISLNK  = 0120000 // Symbolic link
   631  	c_ISBLK  = 060000  // Block special file
   632  	c_ISCHR  = 020000  // Character special file
   633  	c_ISSOCK = 0140000 // Socket
   634  )
   635  
   636  // FileInfoHeader creates a partially-populated [Header] from fi.
   637  // If fi describes a symlink, FileInfoHeader records link as the link target.
   638  // If fi describes a directory, a slash is appended to the name.
   639  //
   640  // Since fs.FileInfo's Name method only returns the base name of
   641  // the file it describes, it may be necessary to modify Header.Name
   642  // to provide the full path name of the file.
   643  //
   644  // If fi implements [FileInfoNames]
   645  // Header.Gname and Header.Uname
   646  // are provided by the methods of the interface.
   647  func FileInfoHeader(fi fs.FileInfo, link string) (*Header, error) {
   648  	if fi == nil {
   649  		return nil, errors.New("archive/tar: FileInfo is nil")
   650  	}
   651  	fm := fi.Mode()
   652  	h := &Header{
   653  		Name:    fi.Name(),
   654  		ModTime: fi.ModTime(),
   655  		Mode:    int64(fm.Perm()), // or'd with c_IS* constants later
   656  	}
   657  	switch {
   658  	case fm.IsRegular():
   659  		h.Typeflag = TypeReg
   660  		h.Size = fi.Size()
   661  	case fi.IsDir():
   662  		h.Typeflag = TypeDir
   663  		h.Name += "/"
   664  	case fm&fs.ModeSymlink != 0:
   665  		h.Typeflag = TypeSymlink
   666  		h.Linkname = link
   667  	case fm&fs.ModeDevice != 0:
   668  		if fm&fs.ModeCharDevice != 0 {
   669  			h.Typeflag = TypeChar
   670  		} else {
   671  			h.Typeflag = TypeBlock
   672  		}
   673  	case fm&fs.ModeNamedPipe != 0:
   674  		h.Typeflag = TypeFifo
   675  	case fm&fs.ModeSocket != 0:
   676  		return nil, fmt.Errorf("archive/tar: sockets not supported")
   677  	default:
   678  		return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm)
   679  	}
   680  	if fm&fs.ModeSetuid != 0 {
   681  		h.Mode |= c_ISUID
   682  	}
   683  	if fm&fs.ModeSetgid != 0 {
   684  		h.Mode |= c_ISGID
   685  	}
   686  	if fm&fs.ModeSticky != 0 {
   687  		h.Mode |= c_ISVTX
   688  	}
   689  	// If possible, populate additional fields from OS-specific
   690  	// FileInfo fields.
   691  	if sys, ok := fi.Sys().(*Header); ok {
   692  		// This FileInfo came from a Header (not the OS). Use the
   693  		// original Header to populate all remaining fields.
   694  		h.Uid = sys.Uid
   695  		h.Gid = sys.Gid
   696  		h.Uname = sys.Uname
   697  		h.Gname = sys.Gname
   698  		h.AccessTime = sys.AccessTime
   699  		h.ChangeTime = sys.ChangeTime
   700  		h.Xattrs = maps.Clone(sys.Xattrs)
   701  		if sys.Typeflag == TypeLink {
   702  			// hard link
   703  			h.Typeflag = TypeLink
   704  			h.Size = 0
   705  			h.Linkname = sys.Linkname
   706  		}
   707  		h.PAXRecords = maps.Clone(sys.PAXRecords)
   708  	}
   709  	var doNameLookups = true
   710  	if iface, ok := fi.(FileInfoNames); ok {
   711  		doNameLookups = false
   712  		var err error
   713  		h.Gname, err = iface.Gname()
   714  		if err != nil {
   715  			return nil, err
   716  		}
   717  		h.Uname, err = iface.Uname()
   718  		if err != nil {
   719  			return nil, err
   720  		}
   721  	}
   722  	if sysStat != nil {
   723  		return h, sysStat(fi, h, doNameLookups)
   724  	}
   725  	return h, nil
   726  }
   727  
   728  // FileInfoNames extends [fs.FileInfo].
   729  // Passing an instance of this to [FileInfoHeader] permits the caller
   730  // to avoid a system-dependent name lookup by specifying the Uname and Gname directly.
   731  type FileInfoNames interface {
   732  	fs.FileInfo
   733  	// Uname should give a user name.
   734  	Uname() (string, error)
   735  	// Gname should give a group name.
   736  	Gname() (string, error)
   737  }
   738  
   739  // isHeaderOnlyType checks if the given type flag is of the type that has no
   740  // data section even if a size is specified.
   741  func isHeaderOnlyType(flag byte) bool {
   742  	switch flag {
   743  	case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
   744  		return true
   745  	default:
   746  		return false
   747  	}
   748  }
   749  

View as plain text