From 3d7377b87424877c59e068e97f3d9b930aca630a Mon Sep 17 00:00:00 2001 From: Nick Terrell <terrelln@fb.com> Date: Fri, 15 Feb 2019 10:29:03 -0800 Subject: [PATCH] [libzstd] Handle uncompressed literals --- lib/compress/zstd_compress.c | 7 ++++-- lib/compress/zstd_compress_internal.h | 1 + lib/compress/zstd_opt.c | 34 ++++++++++++++++++++------- tests/regression/results.csv | 18 +++++++------- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e18051f00..9ea7f04ef 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -402,7 +402,6 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_minMatch: case ZSTD_c_targetLength: case ZSTD_c_strategy: - case ZSTD_c_literalCompressionMode: return 1; case ZSTD_c_format: @@ -421,6 +420,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_ldmBucketSizeLog: case ZSTD_c_ldmHashRateLog: case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: default: return 0; } @@ -2677,7 +2677,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, goto out; /* don't even attempt compression below a certain srcSize */ } ZSTD_resetSeqStore(&(zc->seqStore)); - ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */ + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; /* a gap between an attached dict and the current window is not safe, * they must remain adjacent, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index e0b54299d..a828de3b4 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -107,6 +107,7 @@ typedef struct { U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; } optState_t; typedef struct { diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 44de6e97f..cf2f70b11 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -64,9 +64,15 @@ MEM_STATIC double ZSTD_fCost(U32 price) } #endif +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} + static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) { - optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); @@ -99,6 +105,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, const BYTE* const src, size_t const srcSize, int const optLevel) { + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); optPtr->priceType = zop_dynamic; @@ -113,9 +120,10 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, /* huffman table presumed generated by dictionary */ optPtr->priceType = zop_dynamic; - assert(optPtr->litFreq != NULL); - optPtr->litSum = 0; - { unsigned lit; + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; for (lit=0; lit<=MaxLit; lit++) { U32 const scaleLog = 11; /* scale to 2K */ U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); @@ -163,10 +171,11 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, } else { /* not a dictionary */ assert(optPtr->litFreq != NULL); - { unsigned lit = MaxLit; + if (compressedLiterals) { + unsigned lit = MaxLit; HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); } - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); { unsigned ll; for (ll=0; ll<=MaxLL; ll++) @@ -190,7 +199,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, } else { /* new block : re-use previous statistics, scaled down */ - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); @@ -207,6 +217,10 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, int optLevel) { if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + if (optPtr->priceType == zop_predef) return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ @@ -310,7 +324,8 @@ static void ZSTD_updateStats(optState_t* const optPtr, U32 offsetCode, U32 matchLength) { /* literals */ - { U32 u; + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; for (u=0; u < litLength; u++) optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; @@ -1108,7 +1123,8 @@ static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) /* used in 2-pass strategy */ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) { - optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); diff --git a/tests/regression/results.csv b/tests/regression/results.csv index 076e3b454..6d0d1b515 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -179,7 +179,7 @@ silesia, small hash log, advanced one silesia, small chain log, advanced one pass, 4931093 silesia, explicit params, advanced one pass, 4815369 silesia, uncompressed literals, advanced one pass, 5155424 -silesia, uncompressed literals optimal, advanced one pass, 4426654 +silesia, uncompressed literals optimal, advanced one pass, 4325427 silesia, huffman literals, advanced one pass, 5341356 silesia.tar, level -5, advanced one pass, 7160438 silesia.tar, level -3, advanced one pass, 6789024 @@ -204,7 +204,7 @@ silesia.tar, small hash log, advanced one silesia.tar, small chain log, advanced one pass, 4943255 silesia.tar, explicit params, advanced one pass, 4829974 silesia.tar, uncompressed literals, advanced one pass, 5157992 -silesia.tar, uncompressed literals optimal, advanced one pass, 4372744 +silesia.tar, uncompressed literals optimal, advanced one pass, 4321094 silesia.tar, huffman literals, advanced one pass, 5358079 github, level -5, advanced one pass, 232744 github, level -5 with dict, advanced one pass, 46718 @@ -243,7 +243,7 @@ github, small hash log, advanced one github, small chain log, advanced one pass, 136314 github, explicit params, advanced one pass, 137670 github, uncompressed literals, advanced one pass, 167004 -github, uncompressed literals optimal, advanced one pass, 164600 +github, uncompressed literals optimal, advanced one pass, 156824 github, huffman literals, advanced one pass, 143457 silesia, level -5, advanced one pass small out, 7152294 silesia, level -3, advanced one pass small out, 6789969 @@ -268,7 +268,7 @@ silesia, small hash log, advanced one silesia, small chain log, advanced one pass small out, 4931093 silesia, explicit params, advanced one pass small out, 4815369 silesia, uncompressed literals, advanced one pass small out, 5155424 -silesia, uncompressed literals optimal, advanced one pass small out, 4426654 +silesia, uncompressed literals optimal, advanced one pass small out, 4325427 silesia, huffman literals, advanced one pass small out, 5341356 silesia.tar, level -5, advanced one pass small out, 7160438 silesia.tar, level -3, advanced one pass small out, 6789024 @@ -293,7 +293,7 @@ silesia.tar, small hash log, advanced one silesia.tar, small chain log, advanced one pass small out, 4943255 silesia.tar, explicit params, advanced one pass small out, 4829974 silesia.tar, uncompressed literals, advanced one pass small out, 5157992 -silesia.tar, uncompressed literals optimal, advanced one pass small out, 4372744 +silesia.tar, uncompressed literals optimal, advanced one pass small out, 4321094 silesia.tar, huffman literals, advanced one pass small out, 5358079 github, level -5, advanced one pass small out, 232744 github, level -5 with dict, advanced one pass small out, 46718 @@ -332,7 +332,7 @@ github, small hash log, advanced one github, small chain log, advanced one pass small out, 136314 github, explicit params, advanced one pass small out, 137670 github, uncompressed literals, advanced one pass small out, 167004 -github, uncompressed literals optimal, advanced one pass small out, 164600 +github, uncompressed literals optimal, advanced one pass small out, 156824 github, huffman literals, advanced one pass small out, 143457 silesia, level -5, advanced streaming, 7152294 silesia, level -3, advanced streaming, 6789973 @@ -357,7 +357,7 @@ silesia, small hash log, advanced str silesia, small chain log, advanced streaming, 4931093 silesia, explicit params, advanced streaming, 4815380 silesia, uncompressed literals, advanced streaming, 5155424 -silesia, uncompressed literals optimal, advanced streaming, 4426654 +silesia, uncompressed literals optimal, advanced streaming, 4325427 silesia, huffman literals, advanced streaming, 5341357 silesia.tar, level -5, advanced streaming, 7160440 silesia.tar, level -3, advanced streaming, 6789026 @@ -382,7 +382,7 @@ silesia.tar, small hash log, advanced str silesia.tar, small chain log, advanced streaming, 4943260 silesia.tar, explicit params, advanced streaming, 4830002 silesia.tar, uncompressed literals, advanced streaming, 5157995 -silesia.tar, uncompressed literals optimal, advanced streaming, 4372744 +silesia.tar, uncompressed literals optimal, advanced streaming, 4321094 silesia.tar, huffman literals, advanced streaming, 5358083 github, level -5, advanced streaming, 232744 github, level -5 with dict, advanced streaming, 46718 @@ -421,7 +421,7 @@ github, small hash log, advanced str github, small chain log, advanced streaming, 136314 github, explicit params, advanced streaming, 137670 github, uncompressed literals, advanced streaming, 167004 -github, uncompressed literals optimal, advanced streaming, 164600 +github, uncompressed literals optimal, advanced streaming, 156824 github, huffman literals, advanced streaming, 143457 silesia, level -5, old streaming, 7152294 silesia, level -3, old streaming, 6789973 -- GitLab