1
2
3
4
5
6
7
8 package flate
9
10 import (
11 "bufio"
12 "io"
13 "math/bits"
14 "strconv"
15 "sync"
16 )
17
18 const (
19 maxCodeLen = 16
20
21
22
23 maxNumLit = 286
24 maxNumDist = 30
25 numCodes = 19
26 )
27
28
29 var fixedOnce sync.Once
30 var fixedHuffmanDecoder huffmanDecoder
31
32
33 type CorruptInputError int64
34
35 func (e CorruptInputError) Error() string {
36 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
37 }
38
39
40 type InternalError string
41
42 func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
43
44
45
46
47 type ReadError struct {
48 Offset int64
49 Err error
50 }
51
52 func (e *ReadError) Error() string {
53 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
54 }
55
56
57
58
59 type WriteError struct {
60 Offset int64
61 Err error
62 }
63
64 func (e *WriteError) Error() string {
65 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
66 }
67
68
69
70
71 type Resetter interface {
72
73
74 Reset(r io.Reader, dict []byte) error
75 }
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 const (
98 huffmanChunkBits = 9
99 huffmanNumChunks = 1 << huffmanChunkBits
100 huffmanCountMask = 15
101 huffmanValueShift = 4
102 )
103
104 type huffmanDecoder struct {
105 min int
106 chunks [huffmanNumChunks]uint32
107 links [][]uint32
108 linkMask uint32
109 }
110
111
112
113
114
115
116 func (h *huffmanDecoder) init(lengths []int) bool {
117
118
119
120 const sanity = false
121
122 if h.min != 0 {
123 *h = huffmanDecoder{}
124 }
125
126
127
128 var count [maxCodeLen]int
129 var min, max int
130 for _, n := range lengths {
131 if n == 0 {
132 continue
133 }
134 if min == 0 || n < min {
135 min = n
136 }
137 if n > max {
138 max = n
139 }
140 count[n]++
141 }
142
143
144
145
146
147
148
149
150 if max == 0 {
151 return true
152 }
153
154 code := 0
155 var nextcode [maxCodeLen]int
156 for i := min; i <= max; i++ {
157 code <<= 1
158 nextcode[i] = code
159 code += count[i]
160 }
161
162
163
164
165
166
167 if code != 1<<uint(max) && !(code == 1 && max == 1) {
168 return false
169 }
170
171 h.min = min
172 if max > huffmanChunkBits {
173 numLinks := 1 << (uint(max) - huffmanChunkBits)
174 h.linkMask = uint32(numLinks - 1)
175
176
177 link := nextcode[huffmanChunkBits+1] >> 1
178 h.links = make([][]uint32, huffmanNumChunks-link)
179 for j := uint(link); j < huffmanNumChunks; j++ {
180 reverse := int(bits.Reverse16(uint16(j)))
181 reverse >>= uint(16 - huffmanChunkBits)
182 off := j - uint(link)
183 if sanity && h.chunks[reverse] != 0 {
184 panic("impossible: overwriting existing chunk")
185 }
186 h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
187 h.links[off] = make([]uint32, numLinks)
188 }
189 }
190
191 for i, n := range lengths {
192 if n == 0 {
193 continue
194 }
195 code := nextcode[n]
196 nextcode[n]++
197 chunk := uint32(i<<huffmanValueShift | n)
198 reverse := int(bits.Reverse16(uint16(code)))
199 reverse >>= uint(16 - n)
200 if n <= huffmanChunkBits {
201 for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
202
203
204
205
206
207 if sanity && h.chunks[off] != 0 {
208 panic("impossible: overwriting existing chunk")
209 }
210 h.chunks[off] = chunk
211 }
212 } else {
213 j := reverse & (huffmanNumChunks - 1)
214 if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
215
216
217 panic("impossible: not an indirect chunk")
218 }
219 value := h.chunks[j] >> huffmanValueShift
220 linktab := h.links[value]
221 reverse >>= huffmanChunkBits
222 for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
223 if sanity && linktab[off] != 0 {
224 panic("impossible: overwriting existing chunk")
225 }
226 linktab[off] = chunk
227 }
228 }
229 }
230
231 if sanity {
232
233
234
235 for i, chunk := range h.chunks {
236 if chunk == 0 {
237
238
239
240 if code == 1 && i%2 == 1 {
241 continue
242 }
243 panic("impossible: missing chunk")
244 }
245 }
246 for _, linktab := range h.links {
247 for _, chunk := range linktab {
248 if chunk == 0 {
249 panic("impossible: missing chunk")
250 }
251 }
252 }
253 }
254
255 return true
256 }
257
258
259
260
261 type Reader interface {
262 io.Reader
263 io.ByteReader
264 }
265
266
267 type decompressor struct {
268
269 r Reader
270 rBuf *bufio.Reader
271 roffset int64
272
273
274 b uint32
275 nb uint
276
277
278 h1, h2 huffmanDecoder
279
280
281 bits *[maxNumLit + maxNumDist]int
282 codebits *[numCodes]int
283
284
285 dict dictDecoder
286
287
288 buf [4]byte
289
290
291
292 step func(*decompressor)
293 stepState int
294 final bool
295 err error
296 toRead []byte
297 hl, hd *huffmanDecoder
298 copyLen int
299 copyDist int
300 }
301
302 func (f *decompressor) nextBlock() {
303 for f.nb < 1+2 {
304 if f.err = f.moreBits(); f.err != nil {
305 return
306 }
307 }
308 f.final = f.b&1 == 1
309 f.b >>= 1
310 typ := f.b & 3
311 f.b >>= 2
312 f.nb -= 1 + 2
313 switch typ {
314 case 0:
315 f.dataBlock()
316 case 1:
317
318 f.hl = &fixedHuffmanDecoder
319 f.hd = nil
320 f.huffmanBlock()
321 case 2:
322
323 if f.err = f.readHuffman(); f.err != nil {
324 break
325 }
326 f.hl = &f.h1
327 f.hd = &f.h2
328 f.huffmanBlock()
329 default:
330
331 f.err = CorruptInputError(f.roffset)
332 }
333 }
334
335 func (f *decompressor) Read(b []byte) (int, error) {
336 for {
337 if len(f.toRead) > 0 {
338 n := copy(b, f.toRead)
339 f.toRead = f.toRead[n:]
340 if len(f.toRead) == 0 {
341 return n, f.err
342 }
343 return n, nil
344 }
345 if f.err != nil {
346 return 0, f.err
347 }
348 f.step(f)
349 if f.err != nil && len(f.toRead) == 0 {
350 f.toRead = f.dict.readFlush()
351 }
352 }
353 }
354
355 func (f *decompressor) Close() error {
356 if f.err == io.EOF {
357 return nil
358 }
359 return f.err
360 }
361
362
363
364
365 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
366
367 func (f *decompressor) readHuffman() error {
368
369 for f.nb < 5+5+4 {
370 if err := f.moreBits(); err != nil {
371 return err
372 }
373 }
374 nlit := int(f.b&0x1F) + 257
375 if nlit > maxNumLit {
376 return CorruptInputError(f.roffset)
377 }
378 f.b >>= 5
379 ndist := int(f.b&0x1F) + 1
380 if ndist > maxNumDist {
381 return CorruptInputError(f.roffset)
382 }
383 f.b >>= 5
384 nclen := int(f.b&0xF) + 4
385
386 f.b >>= 4
387 f.nb -= 5 + 5 + 4
388
389
390 for i := 0; i < nclen; i++ {
391 for f.nb < 3 {
392 if err := f.moreBits(); err != nil {
393 return err
394 }
395 }
396 f.codebits[codeOrder[i]] = int(f.b & 0x7)
397 f.b >>= 3
398 f.nb -= 3
399 }
400 for i := nclen; i < len(codeOrder); i++ {
401 f.codebits[codeOrder[i]] = 0
402 }
403 if !f.h1.init(f.codebits[0:]) {
404 return CorruptInputError(f.roffset)
405 }
406
407
408
409 for i, n := 0, nlit+ndist; i < n; {
410 x, err := f.huffSym(&f.h1)
411 if err != nil {
412 return err
413 }
414 if x < 16 {
415
416 f.bits[i] = x
417 i++
418 continue
419 }
420
421 var rep int
422 var nb uint
423 var b int
424 switch x {
425 default:
426 return InternalError("unexpected length code")
427 case 16:
428 rep = 3
429 nb = 2
430 if i == 0 {
431 return CorruptInputError(f.roffset)
432 }
433 b = f.bits[i-1]
434 case 17:
435 rep = 3
436 nb = 3
437 b = 0
438 case 18:
439 rep = 11
440 nb = 7
441 b = 0
442 }
443 for f.nb < nb {
444 if err := f.moreBits(); err != nil {
445 return err
446 }
447 }
448 rep += int(f.b & uint32(1<<nb-1))
449 f.b >>= nb
450 f.nb -= nb
451 if i+rep > n {
452 return CorruptInputError(f.roffset)
453 }
454 for j := 0; j < rep; j++ {
455 f.bits[i] = b
456 i++
457 }
458 }
459
460 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
461 return CorruptInputError(f.roffset)
462 }
463
464
465
466
467
468 if f.h1.min < f.bits[endBlockMarker] {
469 f.h1.min = f.bits[endBlockMarker]
470 }
471
472 return nil
473 }
474
475
476
477
478
479 func (f *decompressor) huffmanBlock() {
480 const (
481 stateInit = iota
482 stateDict
483 )
484
485 switch f.stepState {
486 case stateInit:
487 goto readLiteral
488 case stateDict:
489 goto copyHistory
490 }
491
492 readLiteral:
493
494 {
495 v, err := f.huffSym(f.hl)
496 if err != nil {
497 f.err = err
498 return
499 }
500 var n uint
501 var length int
502 switch {
503 case v < 256:
504 f.dict.writeByte(byte(v))
505 if f.dict.availWrite() == 0 {
506 f.toRead = f.dict.readFlush()
507 f.step = (*decompressor).huffmanBlock
508 f.stepState = stateInit
509 return
510 }
511 goto readLiteral
512 case v == 256:
513 f.finishBlock()
514 return
515
516 case v < 265:
517 length = v - (257 - 3)
518 n = 0
519 case v < 269:
520 length = v*2 - (265*2 - 11)
521 n = 1
522 case v < 273:
523 length = v*4 - (269*4 - 19)
524 n = 2
525 case v < 277:
526 length = v*8 - (273*8 - 35)
527 n = 3
528 case v < 281:
529 length = v*16 - (277*16 - 67)
530 n = 4
531 case v < 285:
532 length = v*32 - (281*32 - 131)
533 n = 5
534 case v < maxNumLit:
535 length = 258
536 n = 0
537 default:
538 f.err = CorruptInputError(f.roffset)
539 return
540 }
541 if n > 0 {
542 for f.nb < n {
543 if err = f.moreBits(); err != nil {
544 f.err = err
545 return
546 }
547 }
548 length += int(f.b & uint32(1<<n-1))
549 f.b >>= n
550 f.nb -= n
551 }
552
553 var dist int
554 if f.hd == nil {
555 for f.nb < 5 {
556 if err = f.moreBits(); err != nil {
557 f.err = err
558 return
559 }
560 }
561 dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
562 f.b >>= 5
563 f.nb -= 5
564 } else {
565 if dist, err = f.huffSym(f.hd); err != nil {
566 f.err = err
567 return
568 }
569 }
570
571 switch {
572 case dist < 4:
573 dist++
574 case dist < maxNumDist:
575 nb := uint(dist-2) >> 1
576
577 extra := (dist & 1) << nb
578 for f.nb < nb {
579 if err = f.moreBits(); err != nil {
580 f.err = err
581 return
582 }
583 }
584 extra |= int(f.b & uint32(1<<nb-1))
585 f.b >>= nb
586 f.nb -= nb
587 dist = 1<<(nb+1) + 1 + extra
588 default:
589 f.err = CorruptInputError(f.roffset)
590 return
591 }
592
593
594 if dist > f.dict.histSize() {
595 f.err = CorruptInputError(f.roffset)
596 return
597 }
598
599 f.copyLen, f.copyDist = length, dist
600 goto copyHistory
601 }
602
603 copyHistory:
604
605 {
606 cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
607 if cnt == 0 {
608 cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
609 }
610 f.copyLen -= cnt
611
612 if f.dict.availWrite() == 0 || f.copyLen > 0 {
613 f.toRead = f.dict.readFlush()
614 f.step = (*decompressor).huffmanBlock
615 f.stepState = stateDict
616 return
617 }
618 goto readLiteral
619 }
620 }
621
622
623 func (f *decompressor) dataBlock() {
624
625
626 f.nb = 0
627 f.b = 0
628
629
630 nr, err := io.ReadFull(f.r, f.buf[0:4])
631 f.roffset += int64(nr)
632 if err != nil {
633 f.err = noEOF(err)
634 return
635 }
636 n := int(f.buf[0]) | int(f.buf[1])<<8
637 nn := int(f.buf[2]) | int(f.buf[3])<<8
638 if uint16(nn) != uint16(^n) {
639 f.err = CorruptInputError(f.roffset)
640 return
641 }
642
643 if n == 0 {
644 f.toRead = f.dict.readFlush()
645 f.finishBlock()
646 return
647 }
648
649 f.copyLen = n
650 f.copyData()
651 }
652
653
654
655 func (f *decompressor) copyData() {
656 buf := f.dict.writeSlice()
657 if len(buf) > f.copyLen {
658 buf = buf[:f.copyLen]
659 }
660
661 cnt, err := io.ReadFull(f.r, buf)
662 f.roffset += int64(cnt)
663 f.copyLen -= cnt
664 f.dict.writeMark(cnt)
665 if err != nil {
666 f.err = noEOF(err)
667 return
668 }
669
670 if f.dict.availWrite() == 0 || f.copyLen > 0 {
671 f.toRead = f.dict.readFlush()
672 f.step = (*decompressor).copyData
673 return
674 }
675 f.finishBlock()
676 }
677
678 func (f *decompressor) finishBlock() {
679 if f.final {
680 if f.dict.availRead() > 0 {
681 f.toRead = f.dict.readFlush()
682 }
683 f.err = io.EOF
684 }
685 f.step = (*decompressor).nextBlock
686 }
687
688
689 func noEOF(e error) error {
690 if e == io.EOF {
691 return io.ErrUnexpectedEOF
692 }
693 return e
694 }
695
696 func (f *decompressor) moreBits() error {
697 c, err := f.r.ReadByte()
698 if err != nil {
699 return noEOF(err)
700 }
701 f.roffset++
702 f.b |= uint32(c) << f.nb
703 f.nb += 8
704 return nil
705 }
706
707
708 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
709
710
711
712
713 n := uint(h.min)
714
715
716
717 nb, b := f.nb, f.b
718 for {
719 for nb < n {
720 c, err := f.r.ReadByte()
721 if err != nil {
722 f.b = b
723 f.nb = nb
724 return 0, noEOF(err)
725 }
726 f.roffset++
727 b |= uint32(c) << (nb & 31)
728 nb += 8
729 }
730 chunk := h.chunks[b&(huffmanNumChunks-1)]
731 n = uint(chunk & huffmanCountMask)
732 if n > huffmanChunkBits {
733 chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
734 n = uint(chunk & huffmanCountMask)
735 }
736 if n <= nb {
737 if n == 0 {
738 f.b = b
739 f.nb = nb
740 f.err = CorruptInputError(f.roffset)
741 return 0, f.err
742 }
743 f.b = b >> (n & 31)
744 f.nb = nb - n
745 return int(chunk >> huffmanValueShift), nil
746 }
747 }
748 }
749
750 func (f *decompressor) makeReader(r io.Reader) {
751 if rr, ok := r.(Reader); ok {
752 f.rBuf = nil
753 f.r = rr
754 return
755 }
756
757 if f.rBuf != nil {
758 f.rBuf.Reset(r)
759 } else {
760
761 f.rBuf = bufio.NewReader(r)
762 }
763 f.r = f.rBuf
764 }
765
766 func fixedHuffmanDecoderInit() {
767 fixedOnce.Do(func() {
768
769 var bits [288]int
770 for i := 0; i < 144; i++ {
771 bits[i] = 8
772 }
773 for i := 144; i < 256; i++ {
774 bits[i] = 9
775 }
776 for i := 256; i < 280; i++ {
777 bits[i] = 7
778 }
779 for i := 280; i < 288; i++ {
780 bits[i] = 8
781 }
782 fixedHuffmanDecoder.init(bits[:])
783 })
784 }
785
786 func (f *decompressor) Reset(r io.Reader, dict []byte) error {
787 *f = decompressor{
788 rBuf: f.rBuf,
789 bits: f.bits,
790 codebits: f.codebits,
791 dict: f.dict,
792 step: (*decompressor).nextBlock,
793 }
794 f.makeReader(r)
795 f.dict.init(maxMatchOffset, dict)
796 return nil
797 }
798
799
800
801
802
803
804
805
806
807 func NewReader(r io.Reader) io.ReadCloser {
808 fixedHuffmanDecoderInit()
809
810 var f decompressor
811 f.makeReader(r)
812 f.bits = new([maxNumLit + maxNumDist]int)
813 f.codebits = new([numCodes]int)
814 f.step = (*decompressor).nextBlock
815 f.dict.init(maxMatchOffset, nil)
816 return &f
817 }
818
819
820
821
822
823
824
825
826 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
827 fixedHuffmanDecoderInit()
828
829 var f decompressor
830 f.makeReader(r)
831 f.bits = new([maxNumLit + maxNumDist]int)
832 f.codebits = new([numCodes]int)
833 f.step = (*decompressor).nextBlock
834 f.dict.init(maxMatchOffset, dict)
835 return &f
836 }
837
View as plain text