0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2024-12-25 00:44:15 -05:00
forgejo/vendor/github.com/xi2/xz/dec_lzma2.go
PhilippHomann 684b7a999f
Dump: add output format tar and output to stdout (#10376)
* Dump: Use mholt/archive/v3 to support tar including many compressions

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: Allow dump output to stdout

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file"

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: also dump LFS (fixes #10058)

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365)

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Use log.Info instead of fmt.Fprintf

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* import ordering

* make fmt

Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Co-authored-by: Matti R <matti@mdranta.net>
2020-06-05 16:47:39 -04:00

1235 lines
31 KiB
Go
Vendored

/*
* LZMA2 decoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* Translation to Go: Michael Cross <https://github.com/xi2>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package xz
/* from linux/lib/xz/xz_lzma2.h ***************************************/
/* Range coder constants */
const (
rcShiftBits = 8
rcTopBits = 24
rcTopValue = 1 << rcTopBits
rcBitModelTotalBits = 11
rcBitModelTotal = 1 << rcBitModelTotalBits
rcMoveBits = 5
)
/*
* Maximum number of position states. A position state is the lowest pb
* number of bits of the current uncompressed offset. In some places there
* are different sets of probabilities for different position states.
*/
const posStatesMax = 1 << 4
/*
* lzmaState is used to track which LZMA symbols have occurred most recently
* and in which order. This information is used to predict the next symbol.
*
* Symbols:
* - Literal: One 8-bit byte
* - Match: Repeat a chunk of data at some distance
* - Long repeat: Multi-byte match at a recently seen distance
* - Short repeat: One-byte repeat at a recently seen distance
*
* The symbol names are in from STATE-oldest-older-previous. REP means
* either short or long repeated match, and NONLIT means any non-literal.
*/
type lzmaState int
const (
stateLitLit lzmaState = iota
stateMatchLitLit
stateRepLitLit
stateShortrepLitLit
stateMatchLit
stateRepList
stateShortrepLit
stateLitMatch
stateLitLongrep
stateLitShortrep
stateNonlitMatch
stateNonlitRep
)
/* Total number of states */
const states = 12
/* The lowest 7 states indicate that the previous state was a literal. */
const litStates = 7
/* Indicate that the latest symbol was a literal. */
func lzmaStateLiteral(state *lzmaState) {
switch {
case *state <= stateShortrepLitLit:
*state = stateLitLit
case *state <= stateLitShortrep:
*state -= 3
default:
*state -= 6
}
}
/* Indicate that the latest symbol was a match. */
func lzmaStateMatch(state *lzmaState) {
if *state < litStates {
*state = stateLitMatch
} else {
*state = stateNonlitMatch
}
}
/* Indicate that the latest state was a long repeated match. */
func lzmaStateLongRep(state *lzmaState) {
if *state < litStates {
*state = stateLitLongrep
} else {
*state = stateNonlitRep
}
}
/* Indicate that the latest symbol was a short match. */
func lzmaStateShortRep(state *lzmaState) {
if *state < litStates {
*state = stateLitShortrep
} else {
*state = stateNonlitRep
}
}
/* Test if the previous symbol was a literal. */
func lzmaStateIsLiteral(state lzmaState) bool {
return state < litStates
}
/* Each literal coder is divided in three sections:
* - 0x001-0x0FF: Without match byte
* - 0x101-0x1FF: With match byte; match bit is 0
* - 0x201-0x2FF: With match byte; match bit is 1
*
* Match byte is used when the previous LZMA symbol was something else than
* a literal (that is, it was some kind of match).
*/
const literalCoderSize = 0x300
/* Maximum number of literal coders */
const literalCodersMax = 1 << 4
/* Minimum length of a match is two bytes. */
const matchLenMin = 2
/* Match length is encoded with 4, 5, or 10 bits.
*
* Length Bits
* 2-9 4 = Choice=0 + 3 bits
* 10-17 5 = Choice=1 + Choice2=0 + 3 bits
* 18-273 10 = Choice=1 + Choice2=1 + 8 bits
*/
const (
lenLowBits = 3
lenLowSymbols = 1 << lenLowBits
lenMidBits = 3
lenMidSymbols = 1 << lenMidBits
lenHighBits = 8
lenHighSymbols = 1 << lenHighBits
)
/*
* Different sets of probabilities are used for match distances that have
* very short match length: Lengths of 2, 3, and 4 bytes have a separate
* set of probabilities for each length. The matches with longer length
* use a shared set of probabilities.
*/
const distStates = 4
/*
* Get the index of the appropriate probability array for decoding
* the distance slot.
*/
func lzmaGetDistState(len uint32) uint32 {
if len < distStates+matchLenMin {
return len - matchLenMin
} else {
return distStates - 1
}
}
/*
* The highest two bits of a 32-bit match distance are encoded using six bits.
* This six-bit value is called a distance slot. This way encoding a 32-bit
* value takes 6-36 bits, larger values taking more bits.
*/
const (
distSlotBits = 6
distSlots = 1 << distSlotBits
)
/* Match distances up to 127 are fully encoded using probabilities. Since
* the highest two bits (distance slot) are always encoded using six bits,
* the distances 0-3 don't need any additional bits to encode, since the
* distance slot itself is the same as the actual distance. distModelStart
* indicates the first distance slot where at least one additional bit is
* needed.
*/
const distModelStart = 4
/*
* Match distances greater than 127 are encoded in three pieces:
* - distance slot: the highest two bits
* - direct bits: 2-26 bits below the highest two bits
* - alignment bits: four lowest bits
*
* Direct bits don't use any probabilities.
*
* The distance slot value of 14 is for distances 128-191.
*/
const distModelEnd = 14
/* Distance slots that indicate a distance <= 127. */
const (
fullDistancesBits = distModelEnd / 2
fullDistances = 1 << fullDistancesBits
)
/*
* For match distances greater than 127, only the highest two bits and the
* lowest four bits (alignment) is encoded using probabilities.
*/
const (
alignBits = 4
alignSize = 1 << alignBits
)
/* from linux/lib/xz/xz_dec_lzma2.c ***********************************/
/*
* Range decoder initialization eats the first five bytes of each LZMA chunk.
*/
const rcInitBytes = 5
/*
* Minimum number of usable input buffer to safely decode one LZMA symbol.
* The worst case is that we decode 22 bits using probabilities and 26
* direct bits. This may decode at maximum of 20 bytes of input. However,
* lzmaMain does an extra normalization before returning, thus we
* need to put 21 here.
*/
const lzmaInRequired = 21
/*
* Dictionary (history buffer)
*
* These are always true:
* start <= pos <= full <= end
* pos <= limit <= end
* end == size
* size <= sizeMax
* len(buf) <= size
*/
type dictionary struct {
/* The history buffer */
buf []byte
/* Old position in buf (before decoding more data) */
start uint32
/* Position in buf */
pos uint32
/*
* How full dictionary is. This is used to detect corrupt input that
* would read beyond the beginning of the uncompressed stream.
*/
full uint32
/* Write limit; we don't write to buf[limit] or later bytes. */
limit uint32
/*
* End of the dictionary buffer. This is the same as the
* dictionary size.
*/
end uint32
/*
* Size of the dictionary as specified in Block Header. This is used
* together with "full" to detect corrupt input that would make us
* read beyond the beginning of the uncompressed stream.
*/
size uint32
/* Maximum allowed dictionary size. */
sizeMax uint32
}
/* Range decoder */
type rcDec struct {
rnge uint32
code uint32
/*
* Number of initializing bytes remaining to be read
* by rcReadInit.
*/
initBytesLeft uint32
/*
* Buffer from which we read our input. It can be either
* temp.buf or the caller-provided input buffer.
*/
in []byte
inPos int
inLimit int
}
/* Probabilities for a length decoder. */
type lzmaLenDec struct {
/* Probability of match length being at least 10 */
choice uint16
/* Probability of match length being at least 18 */
choice2 uint16
/* Probabilities for match lengths 2-9 */
low [posStatesMax][lenLowSymbols]uint16
/* Probabilities for match lengths 10-17 */
mid [posStatesMax][lenMidSymbols]uint16
/* Probabilities for match lengths 18-273 */
high [lenHighSymbols]uint16
}
type lzmaDec struct {
/* Distances of latest four matches */
rep0 uint32
rep1 uint32
rep2 uint32
rep3 uint32
/* Types of the most recently seen LZMA symbols */
state lzmaState
/*
* Length of a match. This is updated so that dictRepeat can
* be called again to finish repeating the whole match.
*/
len uint32
/*
* LZMA properties or related bit masks (number of literal
* context bits, a mask derived from the number of literal
* position bits, and a mask derived from the number
* position bits)
*/
lc uint32
literalPosMask uint32
posMask uint32
/* If 1, it's a match. Otherwise it's a single 8-bit literal. */
isMatch [states][posStatesMax]uint16
/* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
isRep [states]uint16
/*
* If 0, distance of a repeated match is rep0.
* Otherwise check is_rep1.
*/
isRep0 [states]uint16
/*
* If 0, distance of a repeated match is rep1.
* Otherwise check is_rep2.
*/
isRep1 [states]uint16
/* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
isRep2 [states]uint16
/*
* If 1, the repeated match has length of one byte. Otherwise
* the length is decoded from rep_len_decoder.
*/
isRep0Long [states][posStatesMax]uint16
/*
* Probability tree for the highest two bits of the match
* distance. There is a separate probability tree for match
* lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
*/
distSlot [distStates][distSlots]uint16
/*
* Probility trees for additional bits for match distance
* when the distance is in the range [4, 127].
*/
distSpecial [fullDistances - distModelEnd]uint16
/*
* Probability tree for the lowest four bits of a match
* distance that is equal to or greater than 128.
*/
distAlign [alignSize]uint16
/* Length of a normal match */
matchLenDec lzmaLenDec
/* Length of a repeated match */
repLenDec lzmaLenDec
/* Probabilities of literals */
literal [literalCodersMax][literalCoderSize]uint16
}
// type of lzma2Dec.sequence
type lzma2Seq int
const (
seqControl lzma2Seq = iota
seqUncompressed1
seqUncompressed2
seqCompressed0
seqCompressed1
seqProperties
seqLZMAPrepare
seqLZMARun
seqCopy
)
type lzma2Dec struct {
/* Position in xzDecLZMA2Run. */
sequence lzma2Seq
/* Next position after decoding the compressed size of the chunk. */
nextSequence lzma2Seq
/* Uncompressed size of LZMA chunk (2 MiB at maximum) */
uncompressed int
/*
* Compressed size of LZMA chunk or compressed/uncompressed
* size of uncompressed chunk (64 KiB at maximum)
*/
compressed int
/*
* True if dictionary reset is needed. This is false before
* the first chunk (LZMA or uncompressed).
*/
needDictReset bool
/*
* True if new LZMA properties are needed. This is false
* before the first LZMA chunk.
*/
needProps bool
}
type xzDecLZMA2 struct {
/*
* The order below is important on x86 to reduce code size and
* it shouldn't hurt on other platforms. Everything up to and
* including lzma.pos_mask are in the first 128 bytes on x86-32,
* which allows using smaller instructions to access those
* variables. On x86-64, fewer variables fit into the first 128
* bytes, but this is still the best order without sacrificing
* the readability by splitting the structures.
*/
rc rcDec
dict dictionary
lzma2 lzma2Dec
lzma lzmaDec
/*
* Temporary buffer which holds small number of input bytes between
* decoder calls. See lzma2LZMA for details.
*/
temp struct {
buf []byte // slice buf will be backed by bufArray
bufArray [3 * lzmaInRequired]byte
}
}
/**************
* Dictionary *
**************/
/*
* Reset the dictionary state. When in single-call mode, set up the beginning
* of the dictionary to point to the actual output buffer.
*/
func dictReset(dict *dictionary, b *xzBuf) {
dict.start = 0
dict.pos = 0
dict.limit = 0
dict.full = 0
}
/* Set dictionary write limit */
func dictLimit(dict *dictionary, outMax int) {
if dict.end-dict.pos <= uint32(outMax) {
dict.limit = dict.end
} else {
dict.limit = dict.pos + uint32(outMax)
}
}
/* Return true if at least one byte can be written into the dictionary. */
func dictHasSpace(dict *dictionary) bool {
return dict.pos < dict.limit
}
/*
* Get a byte from the dictionary at the given distance. The distance is
* assumed to valid, or as a special case, zero when the dictionary is
* still empty. This special case is needed for single-call decoding to
* avoid writing a '\x00' to the end of the destination buffer.
*/
func dictGet(dict *dictionary, dist uint32) uint32 {
var offset uint32 = dict.pos - dist - 1
if dist >= dict.pos {
offset += dict.end
}
if dict.full > 0 {
return uint32(dict.buf[offset])
}
return 0
}
/*
* Put one byte into the dictionary. It is assumed that there is space for it.
*/
func dictPut(dict *dictionary, byte byte) {
dict.buf[dict.pos] = byte
dict.pos++
if dict.full < dict.pos {
dict.full = dict.pos
}
}
/*
* Repeat given number of bytes from the given distance. If the distance is
* invalid, false is returned. On success, true is returned and *len is
* updated to indicate how many bytes were left to be repeated.
*/
func dictRepeat(dict *dictionary, len *uint32, dist uint32) bool {
var back uint32
var left uint32
if dist >= dict.full || dist >= dict.size {
return false
}
left = dict.limit - dict.pos
if left > *len {
left = *len
}
*len -= left
back = dict.pos - dist - 1
if dist >= dict.pos {
back += dict.end
}
for {
dict.buf[dict.pos] = dict.buf[back]
dict.pos++
back++
if back == dict.end {
back = 0
}
left--
if !(left > 0) {
break
}
}
if dict.full < dict.pos {
dict.full = dict.pos
}
return true
}
/* Copy uncompressed data as is from input to dictionary and output buffers. */
func dictUncompressed(dict *dictionary, b *xzBuf, left *int) {
var copySize int
for *left > 0 && b.inPos < len(b.in) && b.outPos < len(b.out) {
copySize = len(b.in) - b.inPos
if copySize > len(b.out)-b.outPos {
copySize = len(b.out) - b.outPos
}
if copySize > int(dict.end-dict.pos) {
copySize = int(dict.end - dict.pos)
}
if copySize > *left {
copySize = *left
}
*left -= copySize
copy(dict.buf[dict.pos:], b.in[b.inPos:b.inPos+copySize])
dict.pos += uint32(copySize)
if dict.full < dict.pos {
dict.full = dict.pos
}
if dict.pos == dict.end {
dict.pos = 0
}
copy(b.out[b.outPos:], b.in[b.inPos:b.inPos+copySize])
dict.start = dict.pos
b.outPos += copySize
b.inPos += copySize
}
}
/*
* Flush pending data from dictionary to b.out. It is assumed that there is
* enough space in b.out. This is guaranteed because caller uses dictLimit
* before decoding data into the dictionary.
*/
func dictFlush(dict *dictionary, b *xzBuf) int {
var copySize int = int(dict.pos - dict.start)
if dict.pos == dict.end {
dict.pos = 0
}
copy(b.out[b.outPos:], dict.buf[dict.start:dict.start+uint32(copySize)])
dict.start = dict.pos
b.outPos += copySize
return copySize
}
/*****************
* Range decoder *
*****************/
/* Reset the range decoder. */
func rcReset(rc *rcDec) {
rc.rnge = ^uint32(0)
rc.code = 0
rc.initBytesLeft = rcInitBytes
}
/*
* Read the first five initial bytes into rc->code if they haven't been
* read already. (Yes, the first byte gets completely ignored.)
*/
func rcReadInit(rc *rcDec, b *xzBuf) bool {
for rc.initBytesLeft > 0 {
if b.inPos == len(b.in) {
return false
}
rc.code = rc.code<<8 + uint32(b.in[b.inPos])
b.inPos++
rc.initBytesLeft--
}
return true
}
/* Return true if there may not be enough input for the next decoding loop. */
func rcLimitExceeded(rc *rcDec) bool {
return rc.inPos > rc.inLimit
}
/*
* Return true if it is possible (from point of view of range decoder) that
* we have reached the end of the LZMA chunk.
*/
func rcIsFinished(rc *rcDec) bool {
return rc.code == 0
}
/* Read the next input byte if needed. */
func rcNormalize(rc *rcDec) {
if rc.rnge < rcTopValue {
rc.rnge <<= rcShiftBits
rc.code = rc.code<<rcShiftBits + uint32(rc.in[rc.inPos])
rc.inPos++
}
}
/* Decode one bit. */
func rcBit(rc *rcDec, prob *uint16) bool {
var bound uint32
var bit bool
rcNormalize(rc)
bound = (rc.rnge >> rcBitModelTotalBits) * uint32(*prob)
if rc.code < bound {
rc.rnge = bound
*prob += (rcBitModelTotal - *prob) >> rcMoveBits
bit = false
} else {
rc.rnge -= bound
rc.code -= bound
*prob -= *prob >> rcMoveBits
bit = true
}
return bit
}
/* Decode a bittree starting from the most significant bit. */
func rcBittree(rc *rcDec, probs []uint16, limit uint32) uint32 {
var symbol uint32 = 1
for {
if rcBit(rc, &probs[symbol-1]) {
symbol = symbol<<1 + 1
} else {
symbol <<= 1
}
if !(symbol < limit) {
break
}
}
return symbol
}
/* Decode a bittree starting from the least significant bit. */
func rcBittreeReverse(rc *rcDec, probs []uint16, dest *uint32, limit uint32) {
var symbol uint32 = 1
var i uint32 = 0
for {
if rcBit(rc, &probs[symbol-1]) {
symbol = symbol<<1 + 1
*dest += 1 << i
} else {
symbol <<= 1
}
i++
if !(i < limit) {
break
}
}
}
/* Decode direct bits (fixed fifty-fifty probability) */
func rcDirect(rc *rcDec, dest *uint32, limit uint32) {
var mask uint32
for {
rcNormalize(rc)
rc.rnge >>= 1
rc.code -= rc.rnge
mask = 0 - rc.code>>31
rc.code += rc.rnge & mask
*dest = *dest<<1 + mask + 1
limit--
if !(limit > 0) {
break
}
}
}
/********
* LZMA *
********/
/* Get pointer to literal coder probability array. */
func lzmaLiteralProbs(s *xzDecLZMA2) []uint16 {
var prevByte uint32 = dictGet(&s.dict, 0)
var low uint32 = prevByte >> (8 - s.lzma.lc)
var high uint32 = (s.dict.pos & s.lzma.literalPosMask) << s.lzma.lc
return s.lzma.literal[low+high][:]
}
/* Decode a literal (one 8-bit byte) */
func lzmaLiteral(s *xzDecLZMA2) {
var probs []uint16
var symbol uint32
var matchByte uint32
var matchBit uint32
var offset uint32
var i uint32
probs = lzmaLiteralProbs(s)
if lzmaStateIsLiteral(s.lzma.state) {
symbol = rcBittree(&s.rc, probs[1:], 0x100)
} else {
symbol = 1
matchByte = dictGet(&s.dict, s.lzma.rep0) << 1
offset = 0x100
for {
matchBit = matchByte & offset
matchByte <<= 1
i = offset + matchBit + symbol
if rcBit(&s.rc, &probs[i]) {
symbol = symbol<<1 + 1
offset &= matchBit
} else {
symbol <<= 1
offset &= ^matchBit
}
if !(symbol < 0x100) {
break
}
}
}
dictPut(&s.dict, byte(symbol))
lzmaStateLiteral(&s.lzma.state)
}
/* Decode the length of the match into s.lzma.len. */
func lzmaLen(s *xzDecLZMA2, l *lzmaLenDec, posState uint32) {
var probs []uint16
var limit uint32
switch {
case !rcBit(&s.rc, &l.choice):
probs = l.low[posState][:]
limit = lenLowSymbols
s.lzma.len = matchLenMin
case !rcBit(&s.rc, &l.choice2):
probs = l.mid[posState][:]
limit = lenMidSymbols
s.lzma.len = matchLenMin + lenLowSymbols
default:
probs = l.high[:]
limit = lenHighSymbols
s.lzma.len = matchLenMin + lenLowSymbols + lenMidSymbols
}
s.lzma.len += rcBittree(&s.rc, probs[1:], limit) - limit
}
/* Decode a match. The distance will be stored in s.lzma.rep0. */
func lzmaMatch(s *xzDecLZMA2, posState uint32) {
var probs []uint16
var distSlot uint32
var limit uint32
lzmaStateMatch(&s.lzma.state)
s.lzma.rep3 = s.lzma.rep2
s.lzma.rep2 = s.lzma.rep1
s.lzma.rep1 = s.lzma.rep0
lzmaLen(s, &s.lzma.matchLenDec, posState)
probs = s.lzma.distSlot[lzmaGetDistState(s.lzma.len)][:]
distSlot = rcBittree(&s.rc, probs[1:], distSlots) - distSlots
if distSlot < distModelStart {
s.lzma.rep0 = distSlot
} else {
limit = distSlot>>1 - 1
s.lzma.rep0 = 2 + distSlot&1
if distSlot < distModelEnd {
s.lzma.rep0 <<= limit
probs = s.lzma.distSpecial[s.lzma.rep0-distSlot:]
rcBittreeReverse(&s.rc, probs, &s.lzma.rep0, limit)
} else {
rcDirect(&s.rc, &s.lzma.rep0, limit-alignBits)
s.lzma.rep0 <<= alignBits
rcBittreeReverse(
&s.rc, s.lzma.distAlign[1:], &s.lzma.rep0, alignBits)
}
}
}
/*
* Decode a repeated match. The distance is one of the four most recently
* seen matches. The distance will be stored in s.lzma.rep0.
*/
func lzmaRepMatch(s *xzDecLZMA2, posState uint32) {
var tmp uint32
if !rcBit(&s.rc, &s.lzma.isRep0[s.lzma.state]) {
if !rcBit(&s.rc, &s.lzma.isRep0Long[s.lzma.state][posState]) {
lzmaStateShortRep(&s.lzma.state)
s.lzma.len = 1
return
}
} else {
if !rcBit(&s.rc, &s.lzma.isRep1[s.lzma.state]) {
tmp = s.lzma.rep1
} else {
if !rcBit(&s.rc, &s.lzma.isRep2[s.lzma.state]) {
tmp = s.lzma.rep2
} else {
tmp = s.lzma.rep3
s.lzma.rep3 = s.lzma.rep2
}
s.lzma.rep2 = s.lzma.rep1
}
s.lzma.rep1 = s.lzma.rep0
s.lzma.rep0 = tmp
}
lzmaStateLongRep(&s.lzma.state)
lzmaLen(s, &s.lzma.repLenDec, posState)
}
/* LZMA decoder core */
func lzmaMain(s *xzDecLZMA2) bool {
var posState uint32
/*
* If the dictionary was reached during the previous call, try to
* finish the possibly pending repeat in the dictionary.
*/
if dictHasSpace(&s.dict) && s.lzma.len > 0 {
dictRepeat(&s.dict, &s.lzma.len, s.lzma.rep0)
}
/*
* Decode more LZMA symbols. One iteration may consume up to
* lzmaInRequired - 1 bytes.
*/
for dictHasSpace(&s.dict) && !rcLimitExceeded(&s.rc) {
posState = s.dict.pos & s.lzma.posMask
if !rcBit(&s.rc, &s.lzma.isMatch[s.lzma.state][posState]) {
lzmaLiteral(s)
} else {
if rcBit(&s.rc, &s.lzma.isRep[s.lzma.state]) {
lzmaRepMatch(s, posState)
} else {
lzmaMatch(s, posState)
}
if !dictRepeat(&s.dict, &s.lzma.len, s.lzma.rep0) {
return false
}
}
}
/*
* Having the range decoder always normalized when we are outside
* this function makes it easier to correctly handle end of the chunk.
*/
rcNormalize(&s.rc)
return true
}
/*
* Reset the LZMA decoder and range decoder state. Dictionary is not reset
* here, because LZMA state may be reset without resetting the dictionary.
*/
func lzmaReset(s *xzDecLZMA2) {
s.lzma.state = stateLitLit
s.lzma.rep0 = 0
s.lzma.rep1 = 0
s.lzma.rep2 = 0
s.lzma.rep3 = 0
/* All probabilities are initialized to the same value, v */
v := uint16(rcBitModelTotal / 2)
s.lzma.matchLenDec.choice = v
s.lzma.matchLenDec.choice2 = v
s.lzma.repLenDec.choice = v
s.lzma.repLenDec.choice2 = v
for _, m := range [][]uint16{
s.lzma.isRep[:], s.lzma.isRep0[:], s.lzma.isRep1[:],
s.lzma.isRep2[:], s.lzma.distSpecial[:], s.lzma.distAlign[:],
s.lzma.matchLenDec.high[:], s.lzma.repLenDec.high[:],
} {
for j := range m {
m[j] = v
}
}
for i := range s.lzma.isMatch {
for j := range s.lzma.isMatch[i] {
s.lzma.isMatch[i][j] = v
}
}
for i := range s.lzma.isRep0Long {
for j := range s.lzma.isRep0Long[i] {
s.lzma.isRep0Long[i][j] = v
}
}
for i := range s.lzma.distSlot {
for j := range s.lzma.distSlot[i] {
s.lzma.distSlot[i][j] = v
}
}
for i := range s.lzma.literal {
for j := range s.lzma.literal[i] {
s.lzma.literal[i][j] = v
}
}
for i := range s.lzma.matchLenDec.low {
for j := range s.lzma.matchLenDec.low[i] {
s.lzma.matchLenDec.low[i][j] = v
}
}
for i := range s.lzma.matchLenDec.mid {
for j := range s.lzma.matchLenDec.mid[i] {
s.lzma.matchLenDec.mid[i][j] = v
}
}
for i := range s.lzma.repLenDec.low {
for j := range s.lzma.repLenDec.low[i] {
s.lzma.repLenDec.low[i][j] = v
}
}
for i := range s.lzma.repLenDec.mid {
for j := range s.lzma.repLenDec.mid[i] {
s.lzma.repLenDec.mid[i][j] = v
}
}
rcReset(&s.rc)
}
/*
* Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
* from the decoded lp and pb values. On success, the LZMA decoder state is
* reset and true is returned.
*/
func lzmaProps(s *xzDecLZMA2, props byte) bool {
if props > (4*5+4)*9+8 {
return false
}
s.lzma.posMask = 0
for props >= 9*5 {
props -= 9 * 5
s.lzma.posMask++
}
s.lzma.posMask = 1<<s.lzma.posMask - 1
s.lzma.literalPosMask = 0
for props >= 9 {
props -= 9
s.lzma.literalPosMask++
}
s.lzma.lc = uint32(props)
if s.lzma.lc+s.lzma.literalPosMask > 4 {
return false
}
s.lzma.literalPosMask = 1<<s.lzma.literalPosMask - 1
lzmaReset(s)
return true
}
/*********
* LZMA2 *
*********/
/*
* The LZMA decoder assumes that if the input limit (s.rc.inLimit) hasn't
* been exceeded, it is safe to read up to lzmaInRequired bytes. This
* wrapper function takes care of making the LZMA decoder's assumption safe.
*
* As long as there is plenty of input left to be decoded in the current LZMA
* chunk, we decode directly from the caller-supplied input buffer until
* there's lzmaInRequired bytes left. Those remaining bytes are copied into
* s.temp.buf, which (hopefully) gets filled on the next call to this
* function. We decode a few bytes from the temporary buffer so that we can
* continue decoding from the caller-supplied input buffer again.
*/
func lzma2LZMA(s *xzDecLZMA2, b *xzBuf) bool {
var inAvail int
var tmp int
inAvail = len(b.in) - b.inPos
if len(s.temp.buf) > 0 || s.lzma2.compressed == 0 {
tmp = 2*lzmaInRequired - len(s.temp.buf)
if tmp > s.lzma2.compressed-len(s.temp.buf) {
tmp = s.lzma2.compressed - len(s.temp.buf)
}
if tmp > inAvail {
tmp = inAvail
}
copy(s.temp.bufArray[len(s.temp.buf):], b.in[b.inPos:b.inPos+tmp])
switch {
case len(s.temp.buf)+tmp == s.lzma2.compressed:
for i := len(s.temp.buf) + tmp; i < len(s.temp.bufArray); i++ {
s.temp.bufArray[i] = 0
}
s.rc.inLimit = len(s.temp.buf) + tmp
case len(s.temp.buf)+tmp < lzmaInRequired:
s.temp.buf = s.temp.bufArray[:len(s.temp.buf)+tmp]
b.inPos += tmp
return true
default:
s.rc.inLimit = len(s.temp.buf) + tmp - lzmaInRequired
}
s.rc.in = s.temp.bufArray[:]
s.rc.inPos = 0
if !lzmaMain(s) || s.rc.inPos > len(s.temp.buf)+tmp {
return false
}
s.lzma2.compressed -= s.rc.inPos
if s.rc.inPos < len(s.temp.buf) {
copy(s.temp.buf, s.temp.buf[s.rc.inPos:])
s.temp.buf = s.temp.buf[:len(s.temp.buf)-s.rc.inPos]
return true
}
b.inPos += s.rc.inPos - len(s.temp.buf)
s.temp.buf = nil
}
inAvail = len(b.in) - b.inPos
if inAvail >= lzmaInRequired {
s.rc.in = b.in
s.rc.inPos = b.inPos
if inAvail >= s.lzma2.compressed+lzmaInRequired {
s.rc.inLimit = b.inPos + s.lzma2.compressed
} else {
s.rc.inLimit = len(b.in) - lzmaInRequired
}
if !lzmaMain(s) {
return false
}
inAvail = s.rc.inPos - b.inPos
if inAvail > s.lzma2.compressed {
return false
}
s.lzma2.compressed -= inAvail
b.inPos = s.rc.inPos
}
inAvail = len(b.in) - b.inPos
if inAvail < lzmaInRequired {
if inAvail > s.lzma2.compressed {
inAvail = s.lzma2.compressed
}
s.temp.buf = s.temp.bufArray[:inAvail]
copy(s.temp.buf, b.in[b.inPos:])
b.inPos += inAvail
}
return true
}
/*
* Take care of the LZMA2 control layer, and forward the job of actual LZMA
* decoding or copying of uncompressed chunks to other functions.
*/
func xzDecLZMA2Run(s *xzDecLZMA2, b *xzBuf) xzRet {
var tmp int
for b.inPos < len(b.in) || s.lzma2.sequence == seqLZMARun {
switch s.lzma2.sequence {
case seqControl:
/*
* LZMA2 control byte
*
* Exact values:
* 0x00 End marker
* 0x01 Dictionary reset followed by
* an uncompressed chunk
* 0x02 Uncompressed chunk (no dictionary reset)
*
* Highest three bits (s.control & 0xE0):
* 0xE0 Dictionary reset, new properties and state
* reset, followed by LZMA compressed chunk
* 0xC0 New properties and state reset, followed
* by LZMA compressed chunk (no dictionary
* reset)
* 0xA0 State reset using old properties,
* followed by LZMA compressed chunk (no
* dictionary reset)
* 0x80 LZMA chunk (no dictionary or state reset)
*
* For LZMA compressed chunks, the lowest five bits
* (s.control & 1F) are the highest bits of the
* uncompressed size (bits 16-20).
*
* A new LZMA2 stream must begin with a dictionary
* reset. The first LZMA chunk must set new
* properties and reset the LZMA state.
*
* Values that don't match anything described above
* are invalid and we return xzDataError.
*/
tmp = int(b.in[b.inPos])
b.inPos++
if tmp == 0x00 {
return xzStreamEnd
}
switch {
case tmp >= 0xe0 || tmp == 0x01:
s.lzma2.needProps = true
s.lzma2.needDictReset = false
dictReset(&s.dict, b)
case s.lzma2.needDictReset:
return xzDataError
}
if tmp >= 0x80 {
s.lzma2.uncompressed = (tmp & 0x1f) << 16
s.lzma2.sequence = seqUncompressed1
switch {
case tmp >= 0xc0:
/*
* When there are new properties,
* state reset is done at
* seqProperties.
*/
s.lzma2.needProps = false
s.lzma2.nextSequence = seqProperties
case s.lzma2.needProps:
return xzDataError
default:
s.lzma2.nextSequence = seqLZMAPrepare
if tmp >= 0xa0 {
lzmaReset(s)
}
}
} else {
if tmp > 0x02 {
return xzDataError
}
s.lzma2.sequence = seqCompressed0
s.lzma2.nextSequence = seqCopy
}
case seqUncompressed1:
s.lzma2.uncompressed += int(b.in[b.inPos]) << 8
b.inPos++
s.lzma2.sequence = seqUncompressed2
case seqUncompressed2:
s.lzma2.uncompressed += int(b.in[b.inPos]) + 1
b.inPos++
s.lzma2.sequence = seqCompressed0
case seqCompressed0:
s.lzma2.compressed += int(b.in[b.inPos]) << 8
b.inPos++
s.lzma2.sequence = seqCompressed1
case seqCompressed1:
s.lzma2.compressed += int(b.in[b.inPos]) + 1
b.inPos++
s.lzma2.sequence = s.lzma2.nextSequence
case seqProperties:
if !lzmaProps(s, b.in[b.inPos]) {
return xzDataError
}
b.inPos++
s.lzma2.sequence = seqLZMAPrepare
fallthrough
case seqLZMAPrepare:
if s.lzma2.compressed < rcInitBytes {
return xzDataError
}
if !rcReadInit(&s.rc, b) {
return xzOK
}
s.lzma2.compressed -= rcInitBytes
s.lzma2.sequence = seqLZMARun
fallthrough
case seqLZMARun:
/*
* Set dictionary limit to indicate how much we want
* to be encoded at maximum. Decode new data into the
* dictionary. Flush the new data from dictionary to
* b.out. Check if we finished decoding this chunk.
* In case the dictionary got full but we didn't fill
* the output buffer yet, we may run this loop
* multiple times without changing s.lzma2.sequence.
*/
outMax := len(b.out) - b.outPos
if outMax > s.lzma2.uncompressed {
outMax = s.lzma2.uncompressed
}
dictLimit(&s.dict, outMax)
if !lzma2LZMA(s, b) {
return xzDataError
}
s.lzma2.uncompressed -= dictFlush(&s.dict, b)
switch {
case s.lzma2.uncompressed == 0:
if s.lzma2.compressed > 0 || s.lzma.len > 0 ||
!rcIsFinished(&s.rc) {
return xzDataError
}
rcReset(&s.rc)
s.lzma2.sequence = seqControl
case b.outPos == len(b.out) ||
b.inPos == len(b.in) &&
len(s.temp.buf) < s.lzma2.compressed:
return xzOK
}
case seqCopy:
dictUncompressed(&s.dict, b, &s.lzma2.compressed)
if s.lzma2.compressed > 0 {
return xzOK
}
s.lzma2.sequence = seqControl
}
}
return xzOK
}
/*
* Allocate memory for LZMA2 decoder. xzDecLZMA2Reset must be used
* before calling xzDecLZMA2Run.
*/
func xzDecLZMA2Create(dictMax uint32) *xzDecLZMA2 {
s := new(xzDecLZMA2)
s.dict.sizeMax = dictMax
return s
}
/*
* Decode the LZMA2 properties (one byte) and reset the decoder. Return
* xzOK on success, xzMemlimitError if the preallocated dictionary is not
* big enough, and xzOptionsError if props indicates something that this
* decoder doesn't support.
*/
func xzDecLZMA2Reset(s *xzDecLZMA2, props byte) xzRet {
if props > 40 {
return xzOptionsError // Bigger than 4 GiB
}
if props == 40 {
s.dict.size = ^uint32(0)
} else {
s.dict.size = uint32(2 + props&1)
s.dict.size <<= props>>1 + 11
}
if s.dict.size > s.dict.sizeMax {
return xzMemlimitError
}
s.dict.end = s.dict.size
if len(s.dict.buf) < int(s.dict.size) {
s.dict.buf = make([]byte, s.dict.size)
}
s.lzma.len = 0
s.lzma2.sequence = seqControl
s.lzma2.compressed = 0
s.lzma2.uncompressed = 0
s.lzma2.needDictReset = true
s.temp.buf = nil
return xzOK
}