// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package zip import ( "bufio" "encoding/binary" "errors" "hash" "hash/crc32" "io" "io/fs" "strings" "unicode/utf8" ) var ( errLongName = errors.New("zip: FileHeader.Name too long") errLongExtra = errors.New("zip: FileHeader.Extra too long") ) // Writer implements a zip file writer. type Writer struct { cw *countWriter dir []*header last *fileWriter closed bool compressors map[uint16]Compressor comment string // testHookCloseSizeOffset if non-nil is called with the size // of offset of the central directory at Close. testHookCloseSizeOffset func(size, offset uint64) } type header struct { *FileHeader offset uint64 raw bool } // NewWriter returns a new [Writer] writing a zip file to w. func NewWriter(w io.Writer) *Writer { return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}} } // SetOffset sets the offset of the beginning of the zip data within the // underlying writer. It should be used when the zip data is appended to an // existing file, such as a binary executable. // It must be called before any data is written. func (w *Writer) SetOffset(n int64) { if w.cw.count != 0 { panic("zip: SetOffset called after data was written") } w.cw.count = n } // Flush flushes any buffered data to the underlying writer. // Calling Flush is not normally necessary; calling Close is sufficient. func (w *Writer) Flush() error { return w.cw.w.(*bufio.Writer).Flush() } // SetComment sets the end-of-central-directory comment field. // It can only be called before [Writer.Close]. func (w *Writer) SetComment(comment string) error { if len(comment) > uint16max { return errors.New("zip: Writer.Comment too long") } w.comment = comment return nil } // Close finishes writing the zip file by writing the central directory. // It does not close the underlying writer. func (w *Writer) Close() error { if w.last != nil && !w.last.closed { if err := w.last.close(); err != nil { return err } w.last = nil } if w.closed { return errors.New("zip: writer closed twice") } w.closed = true // write central directory start := w.cw.count for _, h := range w.dir { var buf [directoryHeaderLen]byte b := writeBuf(buf[:]) b.uint32(uint32(directoryHeaderSignature)) b.uint16(h.CreatorVersion) b.uint16(h.ReaderVersion) b.uint16(h.Flags) b.uint16(h.Method) b.uint16(h.ModifiedTime) b.uint16(h.ModifiedDate) b.uint32(h.CRC32) if h.isZip64() || h.offset >= uint32max { // the file needs a zip64 header. store maxint in both // 32 bit size fields (and offset later) to signal that the // zip64 extra header should be used. b.uint32(uint32max) // compressed size b.uint32(uint32max) // uncompressed size // append a zip64 extra block to Extra var buf [28]byte // 2x uint16 + 3x uint64 eb := writeBuf(buf[:]) eb.uint16(zip64ExtraID) eb.uint16(24) // size = 3x uint64 eb.uint64(h.UncompressedSize64) eb.uint64(h.CompressedSize64) eb.uint64(h.offset) h.Extra = append(h.Extra, buf[:]...) } else { b.uint32(h.CompressedSize) b.uint32(h.UncompressedSize) } b.uint16(uint16(len(h.Name))) b.uint16(uint16(len(h.Extra))) b.uint16(uint16(len(h.Comment))) b = b[4:] // skip disk number start and internal file attr (2x uint16) b.uint32(h.ExternalAttrs) if h.offset > uint32max { b.uint32(uint32max) } else { b.uint32(uint32(h.offset)) } if _, err := w.cw.Write(buf[:]); err != nil { return err } if _, err := io.WriteString(w.cw, h.Name); err != nil { return err } if _, err := w.cw.Write(h.Extra); err != nil { return err } if _, err := io.WriteString(w.cw, h.Comment); err != nil { return err } } end := w.cw.count records := uint64(len(w.dir)) size := uint64(end - start) offset := uint64(start) if f := w.testHookCloseSizeOffset; f != nil { f(size, offset) } if records >= uint16max || size >= uint32max || offset >= uint32max { var buf [directory64EndLen + directory64LocLen]byte b := writeBuf(buf[:]) // zip64 end of central directory record b.uint32(directory64EndSignature) b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64) b.uint16(zipVersion45) // version made by b.uint16(zipVersion45) // version needed to extract b.uint32(0) // number of this disk b.uint32(0) // number of the disk with the start of the central directory b.uint64(records) // total number of entries in the central directory on this disk b.uint64(records) // total number of entries in the central directory b.uint64(size) // size of the central directory b.uint64(offset) // offset of start of central directory with respect to the starting disk number // zip64 end of central directory locator b.uint32(directory64LocSignature) b.uint32(0) // number of the disk with the start of the zip64 end of central directory b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record b.uint32(1) // total number of disks if _, err := w.cw.Write(buf[:]); err != nil { return err } // store max values in the regular end record to signal // that the zip64 values should be used instead records = uint16max size = uint32max offset = uint32max } // write end record var buf [directoryEndLen]byte b := writeBuf(buf[:]) b.uint32(uint32(directoryEndSignature)) b = b[4:] // skip over disk number and first disk number (2x uint16) b.uint16(uint16(records)) // number of entries this disk b.uint16(uint16(records)) // number of entries total b.uint32(uint32(size)) // size of directory b.uint32(uint32(offset)) // start of directory b.uint16(uint16(len(w.comment))) // byte size of EOCD comment if _, err := w.cw.Write(buf[:]); err != nil { return err } if _, err := io.WriteString(w.cw, w.comment); err != nil { return err } return w.cw.w.(*bufio.Writer).Flush() } // Create adds a file to the zip file using the provided name. // It returns a [Writer] to which the file contents should be written. // The file contents will be compressed using the [Deflate] method. // The name must be a relative path: it must not start with a drive // letter (e.g. C:) or leading slash, and only forward slashes are // allowed. To create a directory instead of a file, add a trailing // slash to the name. Duplicate names will not overwrite previous entries // and are appended to the zip file. // The file's contents must be written to the [io.Writer] before the next // call to [Writer.Create], [Writer.CreateHeader], or [Writer.Close]. func (w *Writer) Create(name string) (io.Writer, error) { header := &FileHeader{ Name: name, Method: Deflate, } return w.CreateHeader(header) } // detectUTF8 reports whether s is a valid UTF-8 string, and whether the string // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, // or any other common encoding). func detectUTF8(s string) (valid, require bool) { for i := 0; i < len(s); { r, size := utf8.DecodeRuneInString(s[i:]) i += size // Officially, ZIP uses CP-437, but many readers use the system's // local character encoding. Most encoding are compatible with a large // subset of CP-437, which itself is ASCII-like. // // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those // characters with localized currency and overline characters. if r < 0x20 || r > 0x7d || r == 0x5c { if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { return false, false } require = true } } return true, require } // prepare performs the bookkeeping operations required at the start of // CreateHeader and CreateRaw. func (w *Writer) prepare(fh *FileHeader) error { if w.last != nil && !w.last.closed { if err := w.last.close(); err != nil { return err } } if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { // See https://golang.org/issue/11144 confusion. return errors.New("archive/zip: invalid duplicate FileHeader") } return nil } // CreateHeader adds a file to the zip archive using the provided [FileHeader] // for the file metadata. [Writer] takes ownership of fh and may mutate // its fields. The caller must not modify fh after calling [Writer.CreateHeader]. // // This returns a [Writer] to which the file contents should be written. // The file's contents must be written to the io.Writer before the next // call to [Writer.Create], [Writer.CreateHeader], [Writer.CreateRaw], or [Writer.Close]. func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { if err := w.prepare(fh); err != nil { return nil, err } // The ZIP format has a sad state of affairs regarding character encoding. // Officially, the name and comment fields are supposed to be encoded // in CP-437 (which is mostly compatible with ASCII), unless the UTF-8 // flag bit is set. However, there are several problems: // // * Many ZIP readers still do not support UTF-8. // * If the UTF-8 flag is cleared, several readers simply interpret the // name and comment fields as whatever the local system encoding is. // // In order to avoid breaking readers without UTF-8 support, // we avoid setting the UTF-8 flag if the strings are CP-437 compatible. // However, if the strings require multibyte UTF-8 encoding and is a // valid UTF-8 string, then we set the UTF-8 bit. // // For the case, where the user explicitly wants to specify the encoding // as UTF-8, they will need to set the flag bit themselves. utf8Valid1, utf8Require1 := detectUTF8(fh.Name) utf8Valid2, utf8Require2 := detectUTF8(fh.Comment) switch { case fh.NonUTF8: fh.Flags &^= 0x800 case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2): fh.Flags |= 0x800 } fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte fh.ReaderVersion = zipVersion20 // If Modified is set, this takes precedence over MS-DOS timestamp fields. if !fh.Modified.IsZero() { // Contrary to the FileHeader.SetModTime method, we intentionally // do not convert to UTC, because we assume the user intends to encode // the date using the specified timezone. A user may want this control // because many legacy ZIP readers interpret the timestamp according // to the local timezone. // // The timezone is only non-UTC if a user directly sets the Modified // field directly themselves. All other approaches sets UTC. fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified) // Use "extended timestamp" format since this is what Info-ZIP uses. // Nearly every major ZIP implementation uses a different format, // but at least most seem to be able to understand the other formats. // // This format happens to be identical for both local and central header // if modification time is the only timestamp being encoded. var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32) mt := uint32(fh.Modified.Unix()) eb := writeBuf(mbuf[:]) eb.uint16(extTimeExtraID) eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32) eb.uint8(1) // Flags: ModTime eb.uint32(mt) // ModTime fh.Extra = append(fh.Extra, mbuf[:]...) } var ( ow io.Writer fw *fileWriter ) h := &header{ FileHeader: fh, offset: uint64(w.cw.count), } if strings.HasSuffix(fh.Name, "/") { // Set the compression method to Store to ensure data length is truly zero, // which the writeHeader method always encodes for the size fields. // This is necessary as most compression formats have non-zero lengths // even when compressing an empty string. fh.Method = Store fh.Flags &^= 0x8 // we will not write a data descriptor // Explicitly clear sizes as they have no meaning for directories. fh.CompressedSize = 0 fh.CompressedSize64 = 0 fh.UncompressedSize = 0 fh.UncompressedSize64 = 0 ow = dirWriter{} } else { fh.Flags |= 0x8 // we will write a data descriptor fw = &fileWriter{ zipw: w.cw, compCount: &countWriter{w: w.cw}, crc32: crc32.NewIEEE(), } comp := w.compressor(fh.Method) if comp == nil { return nil, ErrAlgorithm } var err error fw.comp, err = comp(fw.compCount) if err != nil { return nil, err } fw.rawCount = &countWriter{w: fw.comp} fw.header = h ow = fw } w.dir = append(w.dir, h) if err := writeHeader(w.cw, h); err != nil { return nil, err } // If we're creating a directory, fw is nil. w.last = fw return ow, nil } func writeHeader(w io.Writer, h *header) error { const maxUint16 = 1<<16 - 1 if len(h.Name) > maxUint16 { return errLongName } if len(h.Extra) > maxUint16 { return errLongExtra } var buf [fileHeaderLen]byte b := writeBuf(buf[:]) b.uint32(uint32(fileHeaderSignature)) b.uint16(h.ReaderVersion) b.uint16(h.Flags) b.uint16(h.Method) b.uint16(h.ModifiedTime) b.uint16(h.ModifiedDate) // In raw mode (caller does the compression), the values are either // written here or in the trailing data descriptor based on the header // flags. if h.raw && !h.hasDataDescriptor() { b.uint32(h.CRC32) b.uint32(uint32(min(h.CompressedSize64, uint32max))) b.uint32(uint32(min(h.UncompressedSize64, uint32max))) } else { // When this package handle the compression, these values are // always written to the trailing data descriptor. b.uint32(0) // crc32 b.uint32(0) // compressed size b.uint32(0) // uncompressed size } b.uint16(uint16(len(h.Name))) b.uint16(uint16(len(h.Extra))) if _, err := w.Write(buf[:]); err != nil { return err } if _, err := io.WriteString(w, h.Name); err != nil { return err } _, err := w.Write(h.Extra) return err } // CreateRaw adds a file to the zip archive using the provided [FileHeader] and // returns a [Writer] to which the file contents should be written. The file's // contents must be written to the io.Writer before the next call to [Writer.Create], // [Writer.CreateHeader], [Writer.CreateRaw], or [Writer.Close]. // // In contrast to [Writer.CreateHeader], the bytes passed to Writer are not compressed. // // CreateRaw's argument is stored in w. If the argument is a pointer to the embedded // [FileHeader] in a [File] obtained from a [Reader] created from in-memory data, // then w will refer to all of that memory. func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) { if err := w.prepare(fh); err != nil { return nil, err } fh.CompressedSize = uint32(min(fh.CompressedSize64, uint32max)) fh.UncompressedSize = uint32(min(fh.UncompressedSize64, uint32max)) h := &header{ FileHeader: fh, offset: uint64(w.cw.count), raw: true, } w.dir = append(w.dir, h) if err := writeHeader(w.cw, h); err != nil { return nil, err } if strings.HasSuffix(fh.Name, "/") { w.last = nil return dirWriter{}, nil } fw := &fileWriter{ header: h, zipw: w.cw, } w.last = fw return fw, nil } // Copy copies the file f (obtained from a [Reader]) into w. It copies the raw // form directly bypassing decompression, compression, and validation. func (w *Writer) Copy(f *File) error { r, err := f.OpenRaw() if err != nil { return err } // Copy the FileHeader so w doesn't store a pointer to the data // of f's entire archive. See #65499. fh := f.FileHeader fw, err := w.CreateRaw(&fh) if err != nil { return err } _, err = io.Copy(fw, r) return err } // RegisterCompressor registers or overrides a custom compressor for a specific // method ID. If a compressor for a given method is not found, [Writer] will // default to looking up the compressor at the package level. func (w *Writer) RegisterCompressor(method uint16, comp Compressor) { if w.compressors == nil { w.compressors = make(map[uint16]Compressor) } w.compressors[method] = comp } // AddFS adds the files from fs.FS to the archive. // It walks the directory tree starting at the root of the filesystem // adding each file to the zip using deflate while maintaining the directory structure. func (w *Writer) AddFS(fsys fs.FS) error { return fs.WalkDir(fsys, ".", func(name string, d fs.DirEntry, err error) error { if err != nil { return err } if name == "." { return nil } info, err := d.Info() if err != nil { return err } if !d.IsDir() && !info.Mode().IsRegular() { return errors.New("zip: cannot add non-regular file") } h, err := FileInfoHeader(info) if err != nil { return err } h.Name = name h.Method = Deflate fw, err := w.CreateHeader(h) if err != nil { return err } if d.IsDir() { return nil } f, err := fsys.Open(name) if err != nil { return err } defer f.Close() _, err = io.Copy(fw, f) return err }) } func (w *Writer) compressor(method uint16) Compressor { comp := w.compressors[method] if comp == nil { comp = compressor(method) } return comp } type dirWriter struct{} func (dirWriter) Write(b []byte) (int, error) { if len(b) == 0 { return 0, nil } return 0, errors.New("zip: write to directory") } type fileWriter struct { *header zipw io.Writer rawCount *countWriter comp io.WriteCloser compCount *countWriter crc32 hash.Hash32 closed bool } func (w *fileWriter) Write(p []byte) (int, error) { if w.closed { return 0, errors.New("zip: write to closed file") } if w.raw { return w.zipw.Write(p) } w.crc32.Write(p) return w.rawCount.Write(p) } func (w *fileWriter) close() error { if w.closed { return errors.New("zip: file closed twice") } w.closed = true if w.raw { return w.writeDataDescriptor() } if err := w.comp.Close(); err != nil { return err } // update FileHeader fh := w.header.FileHeader fh.CRC32 = w.crc32.Sum32() fh.CompressedSize64 = uint64(w.compCount.count) fh.UncompressedSize64 = uint64(w.rawCount.count) if fh.isZip64() { fh.CompressedSize = uint32max fh.UncompressedSize = uint32max fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions } else { fh.CompressedSize = uint32(fh.CompressedSize64) fh.UncompressedSize = uint32(fh.UncompressedSize64) } return w.writeDataDescriptor() } func (w *fileWriter) writeDataDescriptor() error { if !w.hasDataDescriptor() { return nil } // Write data descriptor. This is more complicated than one would // think, see e.g. comments in zipfile.c:putextended() and // https://bugs.openjdk.org/browse/JDK-7073588. // The approach here is to write 8 byte sizes if needed without // adding a zip64 extra in the local header (too late anyway). var buf []byte if w.isZip64() { buf = make([]byte, dataDescriptor64Len) } else { buf = make([]byte, dataDescriptorLen) } b := writeBuf(buf) b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X b.uint32(w.CRC32) if w.isZip64() { b.uint64(w.CompressedSize64) b.uint64(w.UncompressedSize64) } else { b.uint32(w.CompressedSize) b.uint32(w.UncompressedSize) } _, err := w.zipw.Write(buf) return err } type countWriter struct { w io.Writer count int64 } func (w *countWriter) Write(p []byte) (int, error) { n, err := w.w.Write(p) w.count += int64(n) return n, err } type nopCloser struct { io.Writer } func (w nopCloser) Close() error { return nil } type writeBuf []byte func (b *writeBuf) uint8(v uint8) { (*b)[0] = v *b = (*b)[1:] } func (b *writeBuf) uint16(v uint16) { binary.LittleEndian.PutUint16(*b, v) *b = (*b)[2:] } func (b *writeBuf) uint32(v uint32) { binary.LittleEndian.PutUint32(*b, v) *b = (*b)[4:] } func (b *writeBuf) uint64(v uint64) { binary.LittleEndian.PutUint64(*b, v) *b = (*b)[8:] }