Source file
src/compress/flate/huffman_bit_writer.go
1
2
3
4
5 package flate
6
7 import (
8 "io"
9 )
10
11 const (
12
13 offsetCodeCount = 30
14
15
16 endBlockMarker = 256
17
18
19 lengthCodesStart = 257
20
21
22 codegenCodeCount = 19
23 badCode = 255
24
25
26
27
28
29 bufferFlushSize = 240
30
31
32
33
34 bufferSize = bufferFlushSize + 8
35 )
36
37
38 var lengthExtraBits = []int8{
39 0, 0, 0,
40 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
41 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
42 4, 5, 5, 5, 5, 0,
43 }
44
45
46 var lengthBase = []uint32{
47 0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
48 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
49 64, 80, 96, 112, 128, 160, 192, 224, 255,
50 }
51
52
53 var offsetExtraBits = []int8{
54 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
55 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
56 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
57 }
58
59 var offsetBase = []uint32{
60 0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
61 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
62 0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
63 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
64 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
65 0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
66 }
67
68
69 var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
70
71 type huffmanBitWriter struct {
72
73
74
75 writer io.Writer
76
77
78
79
80 bits uint64
81 nbits uint
82 bytes [bufferSize]byte
83 codegenFreq [codegenCodeCount]int32
84 nbytes int
85 literalFreq []int32
86 offsetFreq []int32
87 codegen []uint8
88 literalEncoding *huffmanEncoder
89 offsetEncoding *huffmanEncoder
90 codegenEncoding *huffmanEncoder
91 err error
92 }
93
94 func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
95 return &huffmanBitWriter{
96 writer: w,
97 literalFreq: make([]int32, maxNumLit),
98 offsetFreq: make([]int32, offsetCodeCount),
99 codegen: make([]uint8, maxNumLit+offsetCodeCount+1),
100 literalEncoding: newHuffmanEncoder(maxNumLit),
101 codegenEncoding: newHuffmanEncoder(codegenCodeCount),
102 offsetEncoding: newHuffmanEncoder(offsetCodeCount),
103 }
104 }
105
106 func (w *huffmanBitWriter) reset(writer io.Writer) {
107 w.writer = writer
108 w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
109 }
110
111 func (w *huffmanBitWriter) flush() {
112 if w.err != nil {
113 w.nbits = 0
114 return
115 }
116 n := w.nbytes
117 for w.nbits != 0 {
118 w.bytes[n] = byte(w.bits)
119 w.bits >>= 8
120 if w.nbits > 8 {
121 w.nbits -= 8
122 } else {
123 w.nbits = 0
124 }
125 n++
126 }
127 w.bits = 0
128 w.write(w.bytes[:n])
129 w.nbytes = 0
130 }
131
132 func (w *huffmanBitWriter) write(b []byte) {
133 if w.err != nil {
134 return
135 }
136 _, w.err = w.writer.Write(b)
137 }
138
139 func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
140 if w.err != nil {
141 return
142 }
143 w.bits |= uint64(b) << w.nbits
144 w.nbits += nb
145 if w.nbits >= 48 {
146 bits := w.bits
147 w.bits >>= 48
148 w.nbits -= 48
149 n := w.nbytes
150 bytes := w.bytes[n : n+6]
151 bytes[0] = byte(bits)
152 bytes[1] = byte(bits >> 8)
153 bytes[2] = byte(bits >> 16)
154 bytes[3] = byte(bits >> 24)
155 bytes[4] = byte(bits >> 32)
156 bytes[5] = byte(bits >> 40)
157 n += 6
158 if n >= bufferFlushSize {
159 w.write(w.bytes[:n])
160 n = 0
161 }
162 w.nbytes = n
163 }
164 }
165
166 func (w *huffmanBitWriter) writeBytes(bytes []byte) {
167 if w.err != nil {
168 return
169 }
170 n := w.nbytes
171 if w.nbits&7 != 0 {
172 w.err = InternalError("writeBytes with unfinished bits")
173 return
174 }
175 for w.nbits != 0 {
176 w.bytes[n] = byte(w.bits)
177 w.bits >>= 8
178 w.nbits -= 8
179 n++
180 }
181 if n != 0 {
182 w.write(w.bytes[:n])
183 }
184 w.nbytes = 0
185 w.write(bytes)
186 }
187
188
189
190
191
192
193
194
195
196
197
198
199
200 func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
201 clear(w.codegenFreq[:])
202
203
204
205
206 codegen := w.codegen
207
208 cgnl := codegen[:numLiterals]
209 for i := range cgnl {
210 cgnl[i] = uint8(litEnc.codes[i].len)
211 }
212
213 cgnl = codegen[numLiterals : numLiterals+numOffsets]
214 for i := range cgnl {
215 cgnl[i] = uint8(offEnc.codes[i].len)
216 }
217 codegen[numLiterals+numOffsets] = badCode
218
219 size := codegen[0]
220 count := 1
221 outIndex := 0
222 for inIndex := 1; size != badCode; inIndex++ {
223
224
225 nextSize := codegen[inIndex]
226 if nextSize == size {
227 count++
228 continue
229 }
230
231 if size != 0 {
232 codegen[outIndex] = size
233 outIndex++
234 w.codegenFreq[size]++
235 count--
236 for count >= 3 {
237 n := 6
238 if n > count {
239 n = count
240 }
241 codegen[outIndex] = 16
242 outIndex++
243 codegen[outIndex] = uint8(n - 3)
244 outIndex++
245 w.codegenFreq[16]++
246 count -= n
247 }
248 } else {
249 for count >= 11 {
250 n := 138
251 if n > count {
252 n = count
253 }
254 codegen[outIndex] = 18
255 outIndex++
256 codegen[outIndex] = uint8(n - 11)
257 outIndex++
258 w.codegenFreq[18]++
259 count -= n
260 }
261 if count >= 3 {
262
263 codegen[outIndex] = 17
264 outIndex++
265 codegen[outIndex] = uint8(count - 3)
266 outIndex++
267 w.codegenFreq[17]++
268 count = 0
269 }
270 }
271 count--
272 for ; count >= 0; count-- {
273 codegen[outIndex] = size
274 outIndex++
275 w.codegenFreq[size]++
276 }
277
278 size = nextSize
279 count = 1
280 }
281
282 codegen[outIndex] = badCode
283 }
284
285
286 func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
287 numCodegens = len(w.codegenFreq)
288 for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
289 numCodegens--
290 }
291 header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
292 w.codegenEncoding.bitLength(w.codegenFreq[:]) +
293 int(w.codegenFreq[16])*2 +
294 int(w.codegenFreq[17])*3 +
295 int(w.codegenFreq[18])*7
296 size = header +
297 litEnc.bitLength(w.literalFreq) +
298 offEnc.bitLength(w.offsetFreq) +
299 extraBits
300
301 return size, numCodegens
302 }
303
304
305 func (w *huffmanBitWriter) fixedSize(extraBits int) int {
306 return 3 +
307 fixedLiteralEncoding.bitLength(w.literalFreq) +
308 fixedOffsetEncoding.bitLength(w.offsetFreq) +
309 extraBits
310 }
311
312
313
314
315 func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
316 if in == nil {
317 return 0, false
318 }
319 if len(in) <= maxStoreBlockSize {
320 return (len(in) + 5) * 8, true
321 }
322 return 0, false
323 }
324
325 func (w *huffmanBitWriter) writeCode(c hcode) {
326 if w.err != nil {
327 return
328 }
329 w.bits |= uint64(c.code) << w.nbits
330 w.nbits += uint(c.len)
331 if w.nbits >= 48 {
332 bits := w.bits
333 w.bits >>= 48
334 w.nbits -= 48
335 n := w.nbytes
336 bytes := w.bytes[n : n+6]
337 bytes[0] = byte(bits)
338 bytes[1] = byte(bits >> 8)
339 bytes[2] = byte(bits >> 16)
340 bytes[3] = byte(bits >> 24)
341 bytes[4] = byte(bits >> 32)
342 bytes[5] = byte(bits >> 40)
343 n += 6
344 if n >= bufferFlushSize {
345 w.write(w.bytes[:n])
346 n = 0
347 }
348 w.nbytes = n
349 }
350 }
351
352
353
354
355
356
357 func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
358 if w.err != nil {
359 return
360 }
361 var firstBits int32 = 4
362 if isEof {
363 firstBits = 5
364 }
365 w.writeBits(firstBits, 3)
366 w.writeBits(int32(numLiterals-257), 5)
367 w.writeBits(int32(numOffsets-1), 5)
368 w.writeBits(int32(numCodegens-4), 4)
369
370 for i := 0; i < numCodegens; i++ {
371 value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
372 w.writeBits(int32(value), 3)
373 }
374
375 i := 0
376 for {
377 var codeWord int = int(w.codegen[i])
378 i++
379 if codeWord == badCode {
380 break
381 }
382 w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
383
384 switch codeWord {
385 case 16:
386 w.writeBits(int32(w.codegen[i]), 2)
387 i++
388 case 17:
389 w.writeBits(int32(w.codegen[i]), 3)
390 i++
391 case 18:
392 w.writeBits(int32(w.codegen[i]), 7)
393 i++
394 }
395 }
396 }
397
398 func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
399 if w.err != nil {
400 return
401 }
402 var flag int32
403 if isEof {
404 flag = 1
405 }
406 w.writeBits(flag, 3)
407 w.flush()
408 w.writeBits(int32(length), 16)
409 w.writeBits(int32(^uint16(length)), 16)
410 }
411
412 func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
413 if w.err != nil {
414 return
415 }
416
417 var value int32 = 2
418 if isEof {
419 value = 3
420 }
421 w.writeBits(value, 3)
422 }
423
424
425
426
427
428
429 func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
430 if w.err != nil {
431 return
432 }
433
434 tokens = append(tokens, endBlockMarker)
435 numLiterals, numOffsets := w.indexTokens(tokens)
436
437 var extraBits int
438 storedSize, storable := w.storedSize(input)
439 if storable {
440
441
442
443
444 for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
445
446 extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
447 }
448 for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
449
450 extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
451 }
452 }
453
454
455
456 var literalEncoding = fixedLiteralEncoding
457 var offsetEncoding = fixedOffsetEncoding
458 var size = w.fixedSize(extraBits)
459
460
461 var numCodegens int
462
463
464
465 w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
466 w.codegenEncoding.generate(w.codegenFreq[:], 7)
467 dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
468
469 if dynamicSize < size {
470 size = dynamicSize
471 literalEncoding = w.literalEncoding
472 offsetEncoding = w.offsetEncoding
473 }
474
475
476 if storable && storedSize < size {
477 w.writeStoredHeader(len(input), eof)
478 w.writeBytes(input)
479 return
480 }
481
482
483 if literalEncoding == fixedLiteralEncoding {
484 w.writeFixedHeader(eof)
485 } else {
486 w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
487 }
488
489
490 w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
491 }
492
493
494
495
496
497
498 func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
499 if w.err != nil {
500 return
501 }
502
503 tokens = append(tokens, endBlockMarker)
504 numLiterals, numOffsets := w.indexTokens(tokens)
505
506
507
508 w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
509 w.codegenEncoding.generate(w.codegenFreq[:], 7)
510 size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
511
512
513 if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
514 w.writeStoredHeader(len(input), eof)
515 w.writeBytes(input)
516 return
517 }
518
519
520 w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
521
522
523 w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
524 }
525
526
527
528
529
530 func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
531 clear(w.literalFreq)
532 clear(w.offsetFreq)
533
534 for _, t := range tokens {
535 if t < matchType {
536 w.literalFreq[t.literal()]++
537 continue
538 }
539 length := t.length()
540 offset := t.offset()
541 w.literalFreq[lengthCodesStart+lengthCode(length)]++
542 w.offsetFreq[offsetCode(offset)]++
543 }
544
545
546 numLiterals = len(w.literalFreq)
547 for w.literalFreq[numLiterals-1] == 0 {
548 numLiterals--
549 }
550
551 numOffsets = len(w.offsetFreq)
552 for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
553 numOffsets--
554 }
555 if numOffsets == 0 {
556
557
558 w.offsetFreq[0] = 1
559 numOffsets = 1
560 }
561 w.literalEncoding.generate(w.literalFreq, 15)
562 w.offsetEncoding.generate(w.offsetFreq, 15)
563 return
564 }
565
566
567
568 func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
569 if w.err != nil {
570 return
571 }
572 for _, t := range tokens {
573 if t < matchType {
574 w.writeCode(leCodes[t.literal()])
575 continue
576 }
577
578 length := t.length()
579 lengthCode := lengthCode(length)
580 w.writeCode(leCodes[lengthCode+lengthCodesStart])
581 extraLengthBits := uint(lengthExtraBits[lengthCode])
582 if extraLengthBits > 0 {
583 extraLength := int32(length - lengthBase[lengthCode])
584 w.writeBits(extraLength, extraLengthBits)
585 }
586
587 offset := t.offset()
588 offsetCode := offsetCode(offset)
589 w.writeCode(oeCodes[offsetCode])
590 extraOffsetBits := uint(offsetExtraBits[offsetCode])
591 if extraOffsetBits > 0 {
592 extraOffset := int32(offset - offsetBase[offsetCode])
593 w.writeBits(extraOffset, extraOffsetBits)
594 }
595 }
596 }
597
598
599
600 var huffOffset *huffmanEncoder
601
602 func init() {
603 offsetFreq := make([]int32, offsetCodeCount)
604 offsetFreq[0] = 1
605 huffOffset = newHuffmanEncoder(offsetCodeCount)
606 huffOffset.generate(offsetFreq, 15)
607 }
608
609
610
611
612 func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
613 if w.err != nil {
614 return
615 }
616
617
618 clear(w.literalFreq)
619
620
621 histogram(input, w.literalFreq)
622
623 w.literalFreq[endBlockMarker] = 1
624
625 const numLiterals = endBlockMarker + 1
626 w.offsetFreq[0] = 1
627 const numOffsets = 1
628
629 w.literalEncoding.generate(w.literalFreq, 15)
630
631
632
633 var numCodegens int
634
635
636
637 w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
638 w.codegenEncoding.generate(w.codegenFreq[:], 7)
639 size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
640
641
642 if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
643 w.writeStoredHeader(len(input), eof)
644 w.writeBytes(input)
645 return
646 }
647
648
649 w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
650 encoding := w.literalEncoding.codes[:257]
651 n := w.nbytes
652 for _, t := range input {
653
654 c := encoding[t]
655 w.bits |= uint64(c.code) << w.nbits
656 w.nbits += uint(c.len)
657 if w.nbits < 48 {
658 continue
659 }
660
661 bits := w.bits
662 w.bits >>= 48
663 w.nbits -= 48
664 bytes := w.bytes[n : n+6]
665 bytes[0] = byte(bits)
666 bytes[1] = byte(bits >> 8)
667 bytes[2] = byte(bits >> 16)
668 bytes[3] = byte(bits >> 24)
669 bytes[4] = byte(bits >> 32)
670 bytes[5] = byte(bits >> 40)
671 n += 6
672 if n < bufferFlushSize {
673 continue
674 }
675 w.write(w.bytes[:n])
676 if w.err != nil {
677 return
678 }
679 n = 0
680 }
681 w.nbytes = n
682 w.writeCode(encoding[endBlockMarker])
683 }
684
685
686
687
688 func histogram(b []byte, h []int32) {
689 h = h[:256]
690 for _, t := range b {
691 h[t]++
692 }
693 }
694
View as plain text