Source file src/archive/zip/zip64_test.go

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package zip
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"path/filepath"
    14  	"slices"
    15  	"strings"
    16  	"testing"
    17  )
    18  
    19  // TestZip64WriterCDGoldens checks that the archive/zip Writer emits a Central
    20  // Directory that matches the Zip64 conventions used by Info-ZIP, libarchive,
    21  // and the pre-CL archive/zip writer (go126-*), for archives at or above 4 GiB,
    22  // except where we intentionally diverged.
    23  //
    24  // For each golden in testdata/zip64/*.zsparse (see [sparseFile] for the
    25  // committed format), the test:
    26  //  1. Parses the golden's CD into a producer-independent snapshot — which
    27  //     fields hold 0xFFFFFFFF placeholders, which Zip64 extra sub-fields are
    28  //     present and in what order, and the EOCD/EOCD64 values.
    29  //  2. Verifies the production [NewReader] parses the same archive.
    30  //  3. Replays the same entries through a fresh [Writer] into a [sparseBuffer]
    31  //     and parses our own CD.
    32  //  4. Verifies the production [NewReader] parses our reproduced archive too.
    33  //  5. Compares the two snapshots field-by-field, ignoring producer-specific
    34  //     details (creator version, external attrs, non-Zip64 extras, absolute
    35  //     byte offsets that depend on LFH/data-descriptor layout).
    36  func TestZip64WriterCDGoldens(t *testing.T) {
    37  	if testing.Short() {
    38  		t.Skip("skipping in short mode; each golden replays a multi-GiB write")
    39  	}
    40  
    41  	matches, err := filepath.Glob("testdata/zip64/*.zsparse")
    42  	if err != nil {
    43  		t.Fatal(err)
    44  	}
    45  	if len(matches) == 0 {
    46  		t.Fatal("missing Zip64 goldens in testdata/zip64")
    47  	}
    48  
    49  	// Tail materialized for parseCD. Goldens have ≤ 2 entries; their CD
    50  	// plus EOCD records fits in well under 1 MiB.
    51  	const tailKeep = 1 << 20
    52  
    53  	// archive/zip's writer takes the most defensive position on every
    54  	// spec-fuzzy point: it always emits the Zip64 extra at the 0xFFFFFFFF
    55  	// boundary (matching libarchive but more conservative than Info-ZIP) AND
    56  	// emits EOCD64 whenever any entry has a Zip64 extra in its CD record
    57  	// (matching Info-ZIP but more conservative than libarchive). The go126-
    58  	// goldens are output of an older archive/zip writer, and the format
    59  	// deliberately diverges; they are kept here so the reader-side check
    60  	// enforces backwards compatibility with archives produced by our own past
    61  	// writer, and to ensure we only diverge where intended.
    62  	expectedDiff := map[string]bool{
    63  		// Info-ZIP treats a CD size field of exactly 0xFFFFFFFF as a real
    64  		// value and omits the Zip64 extra; archive/zip defensively emits
    65  		// the Zip64 extra with USize64+CSize64.
    66  		"infozip-store-4g-minus-1": true,
    67  
    68  		// Info-ZIP treats a CD offset field of exactly 0xFFFFFFFF as a real
    69  		// value and omits the Zip64 extra for offset; archive/zip defensively
    70  		// emits the Zip64 extra with the offset sub-field.
    71  		"infozip-offset-eq-4g": true,
    72  
    73  		// libarchive's writer emits EOCD64 only on EOCD-level overflow (CD
    74  		// size/offset > 4GiB, records > 0xFFFF); archive/zip also emits
    75  		// EOCD64 when any per-entry CD record uses a Zip64 extra, even if
    76  		// the EOCD fields fit in 32 bits.
    77  		"libarchive-deflate-zeros-5g": true,
    78  
    79  		// libarchive's LFH always carries a UT timestamp extra (~9 bytes),
    80  		// so its dirOffset for a body of 4GiB-59 lands just past 0xFFFFFFFF
    81  		// and it emits EOCD64. archive/zip's streaming LFH has no such
    82  		// extras and stays under uint32max.
    83  		"libarchive-store-just-under-4g": true,
    84  
    85  		// The old archive/zip writer differs from the current writer on
    86  		// every Zip64-using entry: it always wrote a fixed 24-byte Zip64
    87  		// extra with all three sub-fields (usize, csize, offset) and set
    88  		// both 32-bit size fields to 0xFFFFFFFF whenever the per-entry
    89  		// trigger fired; it also set the EOCD records/size/offset to the
    90  		// placeholder values whenever EOCD64 was present.
    91  		"go126-store-5g":            true,
    92  		"go126-deflate-zeros-5g":    true,
    93  		"go126-store-4g-minus-1":    true,
    94  		"go126-store-4g-minus-2":    true,
    95  		"go126-store-exact-4g":      true,
    96  		"go126-offset-past-4g":      true,
    97  		"go126-offset-eq-4g":        true,
    98  		"go126-store-just-under-4g": false,
    99  	}
   100  
   101  	for _, path := range matches {
   102  		name := strings.TrimSuffix(filepath.Base(path), ".zsparse")
   103  		t.Run(name, func(t *testing.T) {
   104  			t.Parallel()
   105  			goldenSF, err := readSparseFile(path)
   106  			if err != nil {
   107  				t.Fatalf("read golden: %v", err)
   108  			}
   109  			goldenData, goldenBase := goldenSF.materializeTail(tailKeep)
   110  			golden, err := parseCD(goldenData, goldenBase)
   111  			if err != nil {
   112  				t.Fatalf("parse golden CD: %v", err)
   113  			}
   114  
   115  			// Verify the production Reader can parse the full golden.
   116  			checkReaderMatchesSnapshot(t, "golden", goldenSF, golden)
   117  
   118  			oursSF := reproduceCD(t, golden)
   119  			oursData, oursBase := oursSF.materializeTail(tailKeep)
   120  			got, err := parseCD(oursData, oursBase)
   121  			if err != nil {
   122  				t.Fatalf("parse reproduced CD: %v\nbytes:\n%s", err, hexDump(oursData))
   123  			}
   124  			// Verify the production Reader can parse archive/zip's own
   125  			// output and gets the same view of the entries.
   126  			checkReaderMatchesSnapshot(t, "reproduced", oursSF, got)
   127  
   128  			if expectedDiff[name] {
   129  				var cap captureReporter
   130  				compareCDSnapshots(&cap, golden, got)
   131  				if !cap.failed {
   132  					t.Errorf("expected this golden to fail equivalence, but it passed")
   133  				} else {
   134  					t.Logf("expected mismatch:\n%s", indent(cap.msg.String(), "  "))
   135  				}
   136  				return
   137  			}
   138  			compareCDSnapshots(t, golden, got)
   139  		})
   140  	}
   141  }
   142  
   143  // errReporter is the subset of [testing.TB] that [compareCDSnapshots] uses.
   144  // The captureReporter implementation lets the test capture mismatches for
   145  // expected-failure cases instead of propagating them to the outer t.
   146  type errReporter interface {
   147  	Errorf(format string, args ...any)
   148  	Helper()
   149  }
   150  
   151  type captureReporter struct {
   152  	failed bool
   153  	msg    strings.Builder
   154  }
   155  
   156  func (c *captureReporter) Errorf(format string, args ...any) {
   157  	c.failed = true
   158  	fmt.Fprintf(&c.msg, format+"\n", args...)
   159  }
   160  
   161  func (c *captureReporter) Helper() {}
   162  
   163  // checkReaderMatchesSnapshot opens the archive backed by the sparseFile
   164  // using the production [NewReader] and asserts that the entry list it
   165  // returns matches the [cdSnapshot] (entry count, names, resolved 64-bit
   166  // sizes).
   167  func checkReaderMatchesSnapshot(t *testing.T, label string, f *sparseFile, snap *cdSnapshot) {
   168  	t.Helper()
   169  	zr, err := NewReader(f, f.Size)
   170  	if err != nil {
   171  		t.Fatalf("%s: NewReader: %v", label, err)
   172  	}
   173  	if g, w := len(zr.File), len(snap.Entries); g != w {
   174  		t.Errorf("%s: NewReader returned %d files, parseCD found %d", label, g, w)
   175  		return
   176  	}
   177  	for i, f := range zr.File {
   178  		want := &snap.Entries[i]
   179  		if f.Name != want.Name {
   180  			t.Errorf("%s entry %d: Name = %q, want %q", label, i, f.Name, want.Name)
   181  		}
   182  		if f.UncompressedSize64 != want.USize64 {
   183  			t.Errorf("%s entry %d %q: UncompressedSize64 = %d, want %d", label, i, want.Name, f.UncompressedSize64, want.USize64)
   184  		}
   185  		if f.CompressedSize64 != want.CSize64 {
   186  			t.Errorf("%s entry %d %q: CompressedSize64 = %d, want %d", label, i, want.Name, f.CompressedSize64, want.CSize64)
   187  		}
   188  	}
   189  }
   190  
   191  // indent prefixes every line of s with prefix.
   192  func indent(s, prefix string) string {
   193  	if s == "" {
   194  		return s
   195  	}
   196  	lines := strings.Split(strings.TrimRight(s, "\n"), "\n")
   197  	for i, l := range lines {
   198  		lines[i] = prefix + l
   199  	}
   200  	return strings.Join(lines, "\n") + "\n"
   201  }
   202  
   203  // reproduceCD writes a zip archive with the same logical entries as golden
   204  // into a [sparseBuffer] (which drops all-zero chunks, so pushing multi-GiB
   205  // streams of zeros through the writer is essentially free) and returns the
   206  // resulting [sparseFile].
   207  //
   208  // For entries where compressed == uncompressed (Store, or other 1:1 cases)
   209  // we drive the Writer through [Writer.CreateHeader] so that the data
   210  // descriptor, offset accounting, and Close-time CD emission all exercise
   211  // the production streaming path. The CRC32 hasher is replaced with
   212  // [fakeHash32] to avoid hashing many GiB of zeros.
   213  //
   214  // For entries where compressed ≪ uncompressed (Method=Deflate over zeros),
   215  // actually deflating multi-GiB streams at test time is prohibitively slow,
   216  // so we fall back to [Writer.CreateRaw] and declare the sizes directly.
   217  // The Central Directory output is identical either way.
   218  func reproduceCD(t *testing.T, golden *cdSnapshot) *sparseFile {
   219  	t.Helper()
   220  	sb := &sparseBuffer{}
   221  	w := NewWriter(sb)
   222  	for i, e := range golden.Entries {
   223  		if e.CSize64 == e.USize64 {
   224  			fh := &FileHeader{Name: e.Name, Method: e.Method}
   225  			fw, err := w.CreateHeader(fh)
   226  			if err != nil {
   227  				t.Fatalf("CreateHeader[%d %q]: %v", i, e.Name, err)
   228  			}
   229  			fw.(*fileWriter).crc32 = fakeHash32{}
   230  			if _, err := io.CopyN(fw, zeros{}, int64(e.USize64)); err != nil {
   231  				t.Fatalf("CopyN[%d %q]: %v", i, e.Name, err)
   232  			}
   233  			continue
   234  		}
   235  		fh := &FileHeader{
   236  			Name:               e.Name,
   237  			Method:             e.Method,
   238  			CompressedSize64:   e.CSize64,
   239  			UncompressedSize64: e.USize64,
   240  		}
   241  		fw, err := w.CreateRaw(fh)
   242  		if err != nil {
   243  			t.Fatalf("CreateRaw[%d %q]: %v", i, e.Name, err)
   244  		}
   245  		if _, err := io.CopyN(fw, zeros{}, int64(e.CSize64)); err != nil {
   246  			t.Fatalf("CopyN[%d %q]: %v", i, e.Name, err)
   247  		}
   248  	}
   249  	if err := w.Close(); err != nil {
   250  		t.Fatalf("Close: %v", err)
   251  	}
   252  	return &sb.f
   253  }
   254  
   255  // compareCDSnapshots asserts that got matches want on Zip64-relevant fields.
   256  //
   257  // Per-entry size fields (RawCSize, RawUSize, CSize64, USize64) are compared
   258  // exactly — we feed them in from the golden when reproducing, so the writer
   259  // has no excuse to disagree. Per-entry RawOffset and the EOCD records/size/
   260  // offset fields are compared only as placeholder-or-not: their absolute
   261  // values depend on producer-specific LFH layout (Info-ZIP packs sizes into
   262  // the LFH; archive/zip's streaming path uses a data descriptor; libarchive
   263  // adds UT extras) and that's not what this test is pinning down.
   264  func compareCDSnapshots(t errReporter, want, got *cdSnapshot) {
   265  	t.Helper()
   266  	if g, w := len(got.Entries), len(want.Entries); g != w {
   267  		t.Errorf("entry count = %d, want %d", g, w)
   268  		return
   269  	}
   270  	for i := range want.Entries {
   271  		we, ge := &want.Entries[i], &got.Entries[i]
   272  		// csize and usize come from the declared FileHeader values, so the
   273  		// raw 32-bit fields must match exactly (real value vs. placeholder
   274  		// choice and, when not placeholder, the value itself).
   275  		if we.RawCSize != ge.RawCSize {
   276  			t.Errorf("entry %d %q: RawCSize = %#08x, want %#08x", i, we.Name, ge.RawCSize, we.RawCSize)
   277  		}
   278  		if we.RawUSize != ge.RawUSize {
   279  			t.Errorf("entry %d %q: RawUSize = %#08x, want %#08x", i, we.Name, ge.RawUSize, we.RawUSize)
   280  		}
   281  		// Resolved csize/usize must match — we fed them in from the golden.
   282  		if we.CSize64 != ge.CSize64 {
   283  			t.Errorf("entry %d %q: CSize64 = %d, want %d", i, we.Name, ge.CSize64, we.CSize64)
   284  		}
   285  		if we.USize64 != ge.USize64 {
   286  			t.Errorf("entry %d %q: USize64 = %d, want %d", i, we.Name, ge.USize64, we.USize64)
   287  		}
   288  		// Offset is layout-dependent. Compare placeholder-or-not, not value.
   289  		if isPlaceholder32(we.RawOffset) != isPlaceholder32(ge.RawOffset) {
   290  			t.Errorf("entry %d %q: RawOffset placeholder = %#08x, want %#08x", i, we.Name, ge.RawOffset, we.RawOffset)
   291  		}
   292  
   293  		// Zip64 sub-field presence/order, must match exactly.
   294  		if !slices.Equal(we.Z64ExtraFields, ge.Z64ExtraFields) {
   295  			t.Errorf("entry %d %q: Zip64 sub-field order = %v, want %v", i, we.Name, ge.Z64ExtraFields, we.Z64ExtraFields)
   296  		}
   297  		// ReaderVersion ≥ 45 whenever a Zip64 extra is present.
   298  		if len(we.Z64ExtraFields) > 0 && ge.ReaderVersion < zipVersion45 {
   299  			t.Errorf("entry %d %q: ReaderVersion = %d, want ≥ %d (Zip64 extra present)", i, we.Name, ge.ReaderVersion, zipVersion45)
   300  		}
   301  	}
   302  
   303  	// EOCD: compare placeholder-or-not for each field. Exact values are
   304  	// layout-dependent.
   305  	if isPlaceholder16(want.EOCD.Records) != isPlaceholder16(got.EOCD.Records) {
   306  		t.Errorf("EOCD records placeholder = %#x, want %#x", got.EOCD.Records, want.EOCD.Records)
   307  	}
   308  	if isPlaceholder32(want.EOCD.Size) != isPlaceholder32(got.EOCD.Size) {
   309  		t.Errorf("EOCD size placeholder = %#x, want %#x", got.EOCD.Size, want.EOCD.Size)
   310  	}
   311  	if isPlaceholder32(want.EOCD.Offset) != isPlaceholder32(got.EOCD.Offset) {
   312  		t.Errorf("EOCD offset placeholder = %#x, want %#x", got.EOCD.Offset, want.EOCD.Offset)
   313  	}
   314  
   315  	if got.HasEOCD64 != want.HasEOCD64 {
   316  		t.Errorf("EOCD64 present = %v, want %v", got.HasEOCD64, want.HasEOCD64)
   317  	}
   318  	if want.HasEOCD64 && got.HasEOCD64 {
   319  		if got.EOCD64.Records != want.EOCD64.Records {
   320  			t.Errorf("EOCD64 records = %d, want %d", got.EOCD64.Records, want.EOCD64.Records)
   321  		}
   322  		// EOCD64.Size and EOCD64.Offset are layout-dependent.
   323  	}
   324  }
   325  
   326  func isPlaceholder32(v uint32) bool { return v == uint32max }
   327  func isPlaceholder16(v uint16) bool { return v == uint16max }
   328  
   329  // CD snapshot types and parser
   330  
   331  // zip64SubID identifies one of the three sub-fields that may appear in a
   332  // Zip64 extended-information extra field, in the spec-defined order.
   333  type zip64SubID int
   334  
   335  const (
   336  	z64USize zip64SubID = iota + 1
   337  	z64CSize
   338  	z64Offset
   339  )
   340  
   341  func (s zip64SubID) String() string {
   342  	switch s {
   343  	case z64USize:
   344  		return "usize"
   345  	case z64CSize:
   346  		return "csize"
   347  	case z64Offset:
   348  		return "offset"
   349  	}
   350  	return fmt.Sprintf("zip64SubID(%d)", int(s))
   351  }
   352  
   353  type cdEntry struct {
   354  	Name          string
   355  	Method        uint16
   356  	ReaderVersion uint16
   357  
   358  	// Raw 32-bit fields from the CD record. A value of 0xFFFFFFFF indicates
   359  	// the real value is in the Zip64 extended-information extra field.
   360  	RawCSize  uint32
   361  	RawUSize  uint32
   362  	RawOffset uint32
   363  
   364  	// Resolved 64-bit values (from the 32-bit field if not a placeholder,
   365  	// otherwise from the Zip64 extra).
   366  	CSize64  uint64
   367  	USize64  uint64
   368  	Offset64 uint64
   369  
   370  	// Sub-fields present in the Zip64 extra, in the order they appear.
   371  	Z64ExtraFields []zip64SubID
   372  }
   373  
   374  type eocdRec struct {
   375  	Records uint16 // 0xFFFF if placeholder
   376  	Size    uint32 // 0xFFFFFFFF if placeholder
   377  	Offset  uint32 // 0xFFFFFFFF if placeholder
   378  }
   379  
   380  type eocd64Rec struct {
   381  	Records uint64
   382  	Size    uint64
   383  	Offset  uint64
   384  }
   385  
   386  type cdSnapshot struct {
   387  	Entries   []cdEntry
   388  	EOCD      eocdRec
   389  	HasEOCD64 bool
   390  	EOCD64    eocd64Rec
   391  }
   392  
   393  var le = binary.LittleEndian
   394  
   395  // parseCD parses the Central Directory and EOCD records of a zip archive
   396  // from its raw bytes. data must be the tail of the archive, with baseOffset
   397  // indicating where data[0] sits in the original archive (0 for whole-archive
   398  // input).
   399  func parseCD(data []byte, baseOffset uint64) (*cdSnapshot, error) {
   400  	sigOff, err := findEOCD(data)
   401  	if err != nil {
   402  		return nil, err
   403  	}
   404  	snap := &cdSnapshot{}
   405  	snap.EOCD.Records = le.Uint16(data[sigOff+10:])
   406  	snap.EOCD.Size = le.Uint32(data[sigOff+12:])
   407  	snap.EOCD.Offset = le.Uint32(data[sigOff+16:])
   408  
   409  	dirOffset := uint64(snap.EOCD.Offset)
   410  	nRecords := uint64(snap.EOCD.Records)
   411  
   412  	// toData converts an absolute archive offset to a data slice offset,
   413  	// returning false if it lies before our captured tail.
   414  	toData := func(absOff uint64) (uint64, bool) {
   415  		if absOff < baseOffset {
   416  			return 0, false
   417  		}
   418  		return absOff - baseOffset, true
   419  	}
   420  
   421  	// Look for an EOCD64 locator immediately preceding the EOCD record.
   422  	if sigOff >= directory64LocLen {
   423  		locOff := sigOff - directory64LocLen
   424  		if le.Uint32(data[locOff:]) == directory64LocSignature {
   425  			eocd64Off := le.Uint64(data[locOff+8:])
   426  			eocd64DataOff, ok := toData(eocd64Off)
   427  			if !ok {
   428  				return nil, fmt.Errorf("zip: EOCD64 at %#x before captured tail (base %#x)", eocd64Off, baseOffset)
   429  			}
   430  			if eocd64DataOff+directory64EndLen > uint64(len(data)) {
   431  				return nil, errors.New("zip: EOCD64 offset out of range")
   432  			}
   433  			if le.Uint32(data[eocd64DataOff:]) != directory64EndSignature {
   434  				return nil, errors.New("zip: EOCD64 signature mismatch")
   435  			}
   436  			snap.HasEOCD64 = true
   437  			snap.EOCD64.Records = le.Uint64(data[eocd64DataOff+32:])
   438  			snap.EOCD64.Size = le.Uint64(data[eocd64DataOff+40:])
   439  			snap.EOCD64.Offset = le.Uint64(data[eocd64DataOff+48:])
   440  			dirOffset = snap.EOCD64.Offset
   441  			nRecords = snap.EOCD64.Records
   442  		}
   443  	}
   444  
   445  	off, ok := toData(dirOffset)
   446  	if !ok {
   447  		return nil, fmt.Errorf("zip: CD at %#x before captured tail (base %#x)", dirOffset, baseOffset)
   448  	}
   449  	for i := uint64(0); i < nRecords; i++ {
   450  		if off+directoryHeaderLen > uint64(len(data)) {
   451  			return nil, fmt.Errorf("zip: CD entry %d out of range", i)
   452  		}
   453  		rec := data[off:]
   454  		if le.Uint32(rec) != directoryHeaderSignature {
   455  			return nil, fmt.Errorf("zip: bad CD signature at offset %d", off)
   456  		}
   457  		var e cdEntry
   458  		e.ReaderVersion = le.Uint16(rec[6:])
   459  		e.Method = le.Uint16(rec[10:])
   460  		e.RawCSize = le.Uint32(rec[20:])
   461  		e.RawUSize = le.Uint32(rec[24:])
   462  		nameLen := uint64(le.Uint16(rec[28:]))
   463  		extraLen := uint64(le.Uint16(rec[30:]))
   464  		commLen := uint64(le.Uint16(rec[32:]))
   465  		e.RawOffset = le.Uint32(rec[42:])
   466  
   467  		recLen := uint64(directoryHeaderLen) + nameLen + extraLen + commLen
   468  		if off+recLen > uint64(len(data)) {
   469  			return nil, fmt.Errorf("zip: CD entry %d truncated", i)
   470  		}
   471  		nameOff := off + directoryHeaderLen
   472  		extraOff := nameOff + nameLen
   473  		e.Name = string(data[nameOff:extraOff])
   474  		extra := data[extraOff : extraOff+extraLen]
   475  
   476  		e.CSize64 = uint64(e.RawCSize)
   477  		e.USize64 = uint64(e.RawUSize)
   478  		e.Offset64 = uint64(e.RawOffset)
   479  
   480  		// Walk extra fields; consume the Zip64 sub-field if present.
   481  		// Per the spec and Info-ZIP convention, the Zip64 extra contains
   482  		// 8-byte values for exactly the size/offset fields whose 32-bit
   483  		// counterpart is 0xFFFFFFFF, in the order: USize, CSize, Offset.
   484  		for len(extra) >= 4 {
   485  			tag := le.Uint16(extra)
   486  			size := uint64(le.Uint16(extra[2:]))
   487  			if 4+size > uint64(len(extra)) {
   488  				break
   489  			}
   490  			field := extra[4 : 4+size]
   491  			extra = extra[4+size:]
   492  			if tag != zip64ExtraID {
   493  				continue
   494  			}
   495  			if e.RawUSize == uint32max && len(field) >= 8 {
   496  				e.USize64 = le.Uint64(field)
   497  				e.Z64ExtraFields = append(e.Z64ExtraFields, z64USize)
   498  				field = field[8:]
   499  			}
   500  			if e.RawCSize == uint32max && len(field) >= 8 {
   501  				e.CSize64 = le.Uint64(field)
   502  				e.Z64ExtraFields = append(e.Z64ExtraFields, z64CSize)
   503  				field = field[8:]
   504  			}
   505  			if e.RawOffset == uint32max && len(field) >= 8 {
   506  				e.Offset64 = le.Uint64(field)
   507  				e.Z64ExtraFields = append(e.Z64ExtraFields, z64Offset)
   508  				field = field[8:]
   509  			}
   510  		}
   511  
   512  		snap.Entries = append(snap.Entries, e)
   513  		off += recLen
   514  	}
   515  	return snap, nil
   516  }
   517  
   518  // findEOCD locates the EOCD record by scanning back from the end of data,
   519  // matching both the signature and the trailing comment-length field.
   520  func findEOCD(data []byte) (uint64, error) {
   521  	if len(data) < directoryEndLen {
   522  		return 0, errors.New("zip: too short for EOCD")
   523  	}
   524  	maxComment := uint16max
   525  	lo := len(data) - directoryEndLen
   526  	hi := lo
   527  	if hi > maxComment {
   528  		lo = hi - maxComment
   529  	} else {
   530  		lo = 0
   531  	}
   532  	for i := hi; i >= lo; i-- {
   533  		if le.Uint32(data[i:]) != directoryEndSignature {
   534  			continue
   535  		}
   536  		cl := int(le.Uint16(data[i+20:]))
   537  		if i+directoryEndLen+cl == len(data) {
   538  			return uint64(i), nil
   539  		}
   540  	}
   541  	return 0, errors.New("zip: EOCD not found")
   542  }
   543  
   544  // hexDump returns a short hex dump of data for failure messages.
   545  func hexDump(data []byte) string {
   546  	if len(data) > 4096 {
   547  		data = data[len(data)-4096:]
   548  	}
   549  	var b strings.Builder
   550  	for i := 0; i < len(data); i += 16 {
   551  		end := min(i+16, len(data))
   552  		fmt.Fprintf(&b, "%04x  % x\n", i, data[i:end])
   553  	}
   554  	return b.String()
   555  }
   556  
   557  // TestZip64LFHBothPlaceholders covers the [Writer.CreateRaw] + no-data-
   558  // descriptor path where the entry's uncompressed or compressed size exceeds
   559  // 4 GiB. The Local File Header carries a Zip64 extra with both 8-byte
   560  // USize64 and CSize64 sub-fields (matching Info-ZIP), so per APPNOTE 4.5.3
   561  // both 32-bit size fields in the LFH must be the 0xFFFFFFFF placeholder —
   562  // even if only one of the sizes actually overflows.
   563  func TestZip64LFHBothPlaceholders(t *testing.T) {
   564  	var buf bytes.Buffer
   565  	w := NewWriter(&buf)
   566  	fh := &FileHeader{
   567  		Name:               "x",
   568  		Method:             Deflate,
   569  		CompressedSize64:   1024,
   570  		UncompressedSize64: 5 << 30, // > 4 GiB
   571  	}
   572  	fw, err := w.CreateRaw(fh)
   573  	if err != nil {
   574  		t.Fatal(err)
   575  	}
   576  	if _, err := io.CopyN(fw, zeros{}, int64(fh.CompressedSize64)); err != nil {
   577  		t.Fatal(err)
   578  	}
   579  	if err := w.Close(); err != nil {
   580  		t.Fatal(err)
   581  	}
   582  
   583  	b := buf.Bytes()
   584  	if got := le.Uint32(b[14:18]); got != fh.CRC32 {
   585  		t.Errorf("LFH CRC32 = %#x, want %#x", got, fh.CRC32)
   586  	}
   587  	if got := le.Uint32(b[18:22]); got != uint32max {
   588  		t.Errorf("LFH CompressedSize = %#x, want %#x (placeholder)", got, uint32(uint32max))
   589  	}
   590  	if got := le.Uint32(b[22:26]); got != uint32max {
   591  		t.Errorf("LFH UncompressedSize = %#x, want %#x (placeholder)", got, uint32(uint32max))
   592  	}
   593  
   594  	// The Zip64 LFH extra should carry both 64-bit sub-fields in
   595  	// USize64-then-CSize64 order.
   596  	nameLen := uint64(le.Uint16(b[26:28]))
   597  	extraLen := uint64(le.Uint16(b[28:30]))
   598  	if want := uint64(20); extraLen != want {
   599  		t.Fatalf("LFH extra length = %d, want %d", extraLen, want)
   600  	}
   601  	extra := b[30+nameLen : 30+nameLen+extraLen]
   602  	if tag := le.Uint16(extra[:2]); tag != zip64ExtraID {
   603  		t.Errorf("Zip64 extra tag = %#x, want %#x", tag, zip64ExtraID)
   604  	}
   605  	if dataLen := le.Uint16(extra[2:4]); dataLen != 16 {
   606  		t.Errorf("Zip64 extra data length = %d, want 16", dataLen)
   607  	}
   608  	if got := le.Uint64(extra[4:12]); got != fh.UncompressedSize64 {
   609  		t.Errorf("Zip64 USize64 = %d, want %d", got, fh.UncompressedSize64)
   610  	}
   611  	if got := le.Uint64(extra[12:20]); got != fh.CompressedSize64 {
   612  		t.Errorf("Zip64 CSize64 = %d, want %d", got, fh.CompressedSize64)
   613  	}
   614  }
   615  

View as plain text