290 lines
8.6 KiB
Go
290 lines
8.6 KiB
Go
// Copyright 2015, Joe Tsai. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE.md file.
|
|
|
|
package brotli
|
|
|
|
const (
|
|
// RFC section 3.5.
|
|
// This is the maximum bit-width of a prefix code.
|
|
// Thus, it is okay to use uint32 to store codes.
|
|
maxPrefixBits = 15
|
|
|
|
// RFC section 3.3.
|
|
// The size of the alphabet for various prefix codes.
|
|
numLitSyms = 256 // Literal symbols
|
|
maxNumDistSyms = 16 + 120 + (48 << 3) // Distance symbols
|
|
numIaCSyms = 704 // Insert-and-copy length symbols
|
|
numBlkCntSyms = 26 // Block count symbols
|
|
maxNumBlkTypeSyms = 256 + 2 // Block type symbols
|
|
maxNumCtxMapSyms = 256 + 16 // Context map symbols
|
|
|
|
// This should be the max of each of the constants above.
|
|
maxNumAlphabetSyms = numIaCSyms
|
|
)
|
|
|
|
var (
|
|
// RFC section 3.4.
|
|
// Prefix code lengths for simple codes.
|
|
simpleLens1 = [1]uint{0}
|
|
simpleLens2 = [2]uint{1, 1}
|
|
simpleLens3 = [3]uint{1, 2, 2}
|
|
simpleLens4a = [4]uint{2, 2, 2, 2}
|
|
simpleLens4b = [4]uint{1, 2, 3, 3}
|
|
|
|
// RFC section 3.5.
|
|
// Prefix code lengths for complex codes as they appear in the stream.
|
|
complexLens = [18]uint{
|
|
1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
}
|
|
)
|
|
|
|
type rangeCode struct {
|
|
base uint32 // Starting base offset of the range
|
|
bits uint32 // Bit-width of a subsequent integer to add to base offset
|
|
}
|
|
|
|
var (
|
|
// RFC section 5.
|
|
// LUT to convert an insert symbol to an actual insert length.
|
|
insLenRanges []rangeCode
|
|
|
|
// RFC section 5.
|
|
// LUT to convert an copy symbol to an actual copy length.
|
|
cpyLenRanges []rangeCode
|
|
|
|
// RFC section 6.
|
|
// LUT to convert an block-type length symbol to an actual length.
|
|
blkLenRanges []rangeCode
|
|
|
|
// RFC section 7.3.
|
|
// LUT to convert RLE symbol to an actual repeat length.
|
|
maxRLERanges []rangeCode
|
|
)
|
|
|
|
type prefixCode struct {
|
|
sym uint32 // The symbol being mapped
|
|
val uint32 // Value of the prefix code (must be in [0..1<<len])
|
|
len uint32 // Bit length of the prefix code
|
|
}
|
|
|
|
var (
|
|
// RFC section 3.5.
|
|
// Prefix codecs for code lengths in complex prefix definition.
|
|
codeCLens []prefixCode
|
|
decCLens prefixDecoder
|
|
encCLens prefixEncoder
|
|
|
|
// RFC section 7.3.
|
|
// Prefix codecs for RLEMAX in context map definition.
|
|
codeMaxRLE []prefixCode
|
|
decMaxRLE prefixDecoder
|
|
encMaxRLE prefixEncoder
|
|
|
|
// RFC section 9.1.
|
|
// Prefix codecs for WBITS in stream header definition.
|
|
codeWinBits []prefixCode
|
|
decWinBits prefixDecoder
|
|
encWinBits prefixEncoder
|
|
|
|
// RFC section 9.2.
|
|
// Prefix codecs used for size fields in meta-block header definition.
|
|
codeCounts []prefixCode
|
|
decCounts prefixDecoder
|
|
encCounts prefixEncoder
|
|
)
|
|
|
|
var (
|
|
// RFC section 5.
|
|
// Table to convert insert-and-copy symbols to insert and copy lengths.
|
|
iacLUT [numIaCSyms]struct{ ins, cpy rangeCode }
|
|
|
|
// RFC section 4.
|
|
// Table to help convert short-codes (first 16 symbols) to distances using
|
|
// the ring buffer of past distances.
|
|
distShortLUT [16]struct{ index, delta int }
|
|
|
|
// RFC section 4.
|
|
// Table to help convert long-codes to distances. This is two dimensional
|
|
// slice keyed by the NPOSTFIX and the normalized distance symbol.
|
|
distLongLUT [4][]rangeCode
|
|
)
|
|
|
|
func initPrefixLUTs() {
|
|
// Sanity check some constants.
|
|
for _, numMax := range []uint{
|
|
numLitSyms, maxNumDistSyms, numIaCSyms, numBlkCntSyms, maxNumBlkTypeSyms, maxNumCtxMapSyms,
|
|
} {
|
|
if numMax > maxNumAlphabetSyms {
|
|
panic("maximum alphabet size is not updated")
|
|
}
|
|
}
|
|
if maxNumAlphabetSyms >= 1<<prefixSymbolBits {
|
|
panic("maximum alphabet size is too large to represent")
|
|
}
|
|
if maxPrefixBits >= 1<<prefixCountBits {
|
|
panic("maximum prefix bit-length is too large to represent")
|
|
}
|
|
|
|
initPrefixRangeLUTs()
|
|
initPrefixCodeLUTs()
|
|
initLengthLUTs()
|
|
}
|
|
|
|
func initPrefixRangeLUTs() {
|
|
var makeRanges = func(base uint, bits []uint) (rc []rangeCode) {
|
|
for _, nb := range bits {
|
|
rc = append(rc, rangeCode{base: uint32(base), bits: uint32(nb)})
|
|
base += 1 << nb
|
|
}
|
|
return rc
|
|
}
|
|
|
|
insLenRanges = makeRanges(0, []uint{
|
|
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
|
|
}) // RFC section 5
|
|
cpyLenRanges = makeRanges(2, []uint{
|
|
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
|
|
}) // RFC section 5
|
|
blkLenRanges = makeRanges(1, []uint{
|
|
2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24,
|
|
}) // RFC section 6
|
|
maxRLERanges = makeRanges(2, []uint{
|
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
|
|
}) // RFC section 7.3
|
|
}
|
|
|
|
func initPrefixCodeLUTs() {
|
|
// Prefix code for reading code lengths in RFC section 3.5.
|
|
codeCLens = nil
|
|
for sym, clen := range []uint{2, 4, 3, 2, 2, 4} {
|
|
var code = prefixCode{sym: uint32(sym), len: uint32(clen)}
|
|
codeCLens = append(codeCLens, code)
|
|
}
|
|
decCLens.Init(codeCLens, true)
|
|
encCLens.Init(codeCLens)
|
|
|
|
// Prefix code for reading RLEMAX in RFC section 7.3.
|
|
codeMaxRLE = []prefixCode{{sym: 0, val: 0, len: 1}}
|
|
for i := uint32(0); i < 16; i++ {
|
|
var code = prefixCode{sym: i + 1, val: i<<1 | 1, len: 5}
|
|
codeMaxRLE = append(codeMaxRLE, code)
|
|
}
|
|
decMaxRLE.Init(codeMaxRLE, false)
|
|
encMaxRLE.Init(codeMaxRLE)
|
|
|
|
// Prefix code for reading WBITS in RFC section 9.1.
|
|
codeWinBits = nil
|
|
for i := uint32(9); i <= 24; i++ {
|
|
var code prefixCode
|
|
switch {
|
|
case i == 16:
|
|
code = prefixCode{sym: i, val: (i-16)<<0 | 0, len: 1} // Symbols: 16
|
|
case i > 17:
|
|
code = prefixCode{sym: i, val: (i-17)<<1 | 1, len: 4} // Symbols: 18..24
|
|
case i < 17:
|
|
code = prefixCode{sym: i, val: (i-8)<<4 | 1, len: 7} // Symbols: 9..15
|
|
default:
|
|
code = prefixCode{sym: i, val: (i-17)<<4 | 1, len: 7} // Symbols: 17
|
|
}
|
|
codeWinBits = append(codeWinBits, code)
|
|
}
|
|
codeWinBits[0].sym = 0 // Invalid code "1000100" to use symbol zero
|
|
decWinBits.Init(codeWinBits, false)
|
|
encWinBits.Init(codeWinBits)
|
|
|
|
// Prefix code for reading counts in RFC section 9.2.
|
|
// This is used for: NBLTYPESL, NBLTYPESI, NBLTYPESD, NTREESL, and NTREESD.
|
|
codeCounts = []prefixCode{{sym: 1, val: 0, len: 1}}
|
|
var code = codeCounts[len(codeCounts)-1]
|
|
for i := uint32(0); i < 8; i++ {
|
|
for j := uint32(0); j < 1<<i; j++ {
|
|
code.sym = code.sym + 1
|
|
code.val = j<<4 | i<<1 | 1
|
|
code.len = uint32(i + 4)
|
|
codeCounts = append(codeCounts, code)
|
|
}
|
|
}
|
|
decCounts.Init(codeCounts, false)
|
|
encCounts.Init(codeCounts)
|
|
}
|
|
|
|
func initLengthLUTs() {
|
|
// RFC section 5.
|
|
// The insert-and-copy length symbol is converted into an insert length
|
|
// and a copy length. Thus, create a table to precompute the result for
|
|
// all input symbols.
|
|
for iacSym := range iacLUT {
|
|
var insSym, cpySym int
|
|
switch iacSym / 64 {
|
|
case 0, 2: // 0..63 and 128..191
|
|
insSym, cpySym = 0, 0
|
|
case 1, 3: // 64..127 and 192..255
|
|
insSym, cpySym = 0, 8
|
|
case 4: // 256..319
|
|
insSym, cpySym = 8, 0
|
|
case 5: // 320..383
|
|
insSym, cpySym = 8, 8
|
|
case 6: // 384..447
|
|
insSym, cpySym = 0, 16
|
|
case 7: // 448..511
|
|
insSym, cpySym = 16, 0
|
|
case 8: // 512..575
|
|
insSym, cpySym = 8, 16
|
|
case 9: // 576..639
|
|
insSym, cpySym = 16, 8
|
|
case 10: // 640..703
|
|
insSym, cpySym = 16, 16
|
|
}
|
|
|
|
r64 := iacSym % 64
|
|
insSym += r64 >> 3 // Lower 3 bits
|
|
cpySym += r64 & 0x07 // Upper 3 bits
|
|
|
|
iacLUT[iacSym].ins = insLenRanges[insSym]
|
|
iacLUT[iacSym].cpy = cpyLenRanges[cpySym]
|
|
}
|
|
|
|
// RFC section 4.
|
|
// The first 16 symbols modify a previously seen symbol. Thus, we can create
|
|
// a table to determine which distance to use and how much to modify it by.
|
|
for distSym := range distShortLUT {
|
|
var index, delta int
|
|
switch {
|
|
case distSym < 4:
|
|
index, delta = distSym, 0
|
|
case distSym < 10:
|
|
index, delta = 0, int(distSym/2-1)
|
|
case distSym < 16:
|
|
index, delta = 1, int(distSym/2-4)
|
|
}
|
|
if distSym%2 == 0 {
|
|
delta *= -1
|
|
}
|
|
distShortLUT[distSym].index = index
|
|
distShortLUT[distSym].delta = delta
|
|
}
|
|
|
|
// RFC section 4.
|
|
// Longer distances are computed according the equation in the RFC.
|
|
// To reduce computation during runtime, we precompute as much of the output
|
|
// as possible. Thus, we compute the final distance using the following:
|
|
// rec := distLongLUT[NPOSTFIX][distSym - (16+NDIRECT)]
|
|
// distance := NDIRECT + rec.base + ReadBits(rec.bits)<<NPOSTFIX
|
|
for npostfix := range distLongLUT {
|
|
numDistSyms := 48 << uint(npostfix)
|
|
distLongLUT[npostfix] = make([]rangeCode, numDistSyms)
|
|
for distSym := range distLongLUT[npostfix] {
|
|
postfixMask := 1<<uint(npostfix) - 1
|
|
hcode := distSym >> uint(npostfix)
|
|
lcode := distSym & postfixMask
|
|
nbits := 1 + distSym>>uint(npostfix+1)
|
|
offset := ((2 + (hcode & 1)) << uint(nbits)) - 4
|
|
distLongLUT[npostfix][distSym] = rangeCode{
|
|
base: uint32(offset<<uint(npostfix) + lcode + 1),
|
|
bits: uint32(nbits),
|
|
}
|
|
}
|
|
}
|
|
}
|