From 3d7377b87424877c59e068e97f3d9b930aca630a Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Fri, 15 Feb 2019 10:29:03 -0800
Subject: [PATCH] [libzstd] Handle uncompressed literals

---
 lib/compress/zstd_compress.c          |  7 ++++--
 lib/compress/zstd_compress_internal.h |  1 +
 lib/compress/zstd_opt.c               | 34 ++++++++++++++++++++-------
 tests/regression/results.csv          | 18 +++++++-------
 4 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index e18051f00..9ea7f04ef 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -402,7 +402,6 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
     case ZSTD_c_minMatch:
     case ZSTD_c_targetLength:
     case ZSTD_c_strategy:
-    case ZSTD_c_literalCompressionMode:
         return 1;
 
     case ZSTD_c_format:
@@ -421,6 +420,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
     case ZSTD_c_ldmBucketSizeLog:
     case ZSTD_c_ldmHashRateLog:
     case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
     default:
         return 0;
     }
@@ -2677,7 +2677,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
         goto out;  /* don't even attempt compression below a certain srcSize */
     }
     ZSTD_resetSeqStore(&(zc->seqStore));
-    ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;   /* required for optimal parser to read stats from dictionary */
+    /* required for optimal parser to read stats from dictionary */
+    ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
+    /* tell the optimal parser how we expect to compress literals */
+    ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
 
     /* a gap between an attached dict and the current window is not safe,
      * they must remain adjacent,
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index e0b54299d..a828de3b4 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -107,6 +107,7 @@ typedef struct {
     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
+    ZSTD_literalCompressionMode_e literalCompressionMode;
 } optState_t;
 
 typedef struct {
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 44de6e97f..cf2f70b11 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -64,9 +64,15 @@ MEM_STATIC double ZSTD_fCost(U32 price)
 }
 #endif
 
+static int ZSTD_compressedLiterals(optState_t const* const optPtr)
+{
+    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
+}
+
 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
 {
-    optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
     optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
     optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
     optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
@@ -99,6 +105,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
             const BYTE* const src, size_t const srcSize,
                   int const optLevel)
 {
+    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
     optPtr->priceType = zop_dynamic;
 
@@ -113,9 +120,10 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
             /* huffman table presumed generated by dictionary */
             optPtr->priceType = zop_dynamic;
 
-            assert(optPtr->litFreq != NULL);
-            optPtr->litSum = 0;
-            {   unsigned lit;
+            if (compressedLiterals) {
+                unsigned lit;
+                assert(optPtr->litFreq != NULL);
+                optPtr->litSum = 0;
                 for (lit=0; lit<=MaxLit; lit++) {
                     U32 const scaleLog = 11;   /* scale to 2K */
                     U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
@@ -163,10 +171,11 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
         } else {  /* not a dictionary */
 
             assert(optPtr->litFreq != NULL);
-            {   unsigned lit = MaxLit;
+            if (compressedLiterals) {
+                unsigned lit = MaxLit;
                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
+                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
             }
-            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
 
             {   unsigned ll;
                 for (ll=0; ll<=MaxLL; ll++)
@@ -190,7 +199,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
 
     } else {   /* new block : re-use previous statistics, scaled down */
 
-        optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+        if (compressedLiterals)
+            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
         optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
         optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
         optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
@@ -207,6 +217,10 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
                                 int optLevel)
 {
     if (litLength == 0) return 0;
+
+    if (!ZSTD_compressedLiterals(optPtr))
+        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
+
     if (optPtr->priceType == zop_predef)
         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
 
@@ -310,7 +324,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
                              U32 offsetCode, U32 matchLength)
 {
     /* literals */
-    {   U32 u;
+    if (ZSTD_compressedLiterals(optPtr)) {
+        U32 u;
         for (u=0; u < litLength; u++)
             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
@@ -1108,7 +1123,8 @@ static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
 /* used in 2-pass strategy */
 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
 {
-    optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
     optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
     optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
     optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
diff --git a/tests/regression/results.csv b/tests/regression/results.csv
index 076e3b454..6d0d1b515 100644
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@@ -179,7 +179,7 @@ silesia,                          small hash log,                   advanced one
 silesia,                          small chain log,                  advanced one pass,                4931093
 silesia,                          explicit params,                  advanced one pass,                4815369
 silesia,                          uncompressed literals,            advanced one pass,                5155424
-silesia,                          uncompressed literals optimal,    advanced one pass,                4426654
+silesia,                          uncompressed literals optimal,    advanced one pass,                4325427
 silesia,                          huffman literals,                 advanced one pass,                5341356
 silesia.tar,                      level -5,                         advanced one pass,                7160438
 silesia.tar,                      level -3,                         advanced one pass,                6789024
@@ -204,7 +204,7 @@ silesia.tar,                      small hash log,                   advanced one
 silesia.tar,                      small chain log,                  advanced one pass,                4943255
 silesia.tar,                      explicit params,                  advanced one pass,                4829974
 silesia.tar,                      uncompressed literals,            advanced one pass,                5157992
-silesia.tar,                      uncompressed literals optimal,    advanced one pass,                4372744
+silesia.tar,                      uncompressed literals optimal,    advanced one pass,                4321094
 silesia.tar,                      huffman literals,                 advanced one pass,                5358079
 github,                           level -5,                         advanced one pass,                232744
 github,                           level -5 with dict,               advanced one pass,                46718
@@ -243,7 +243,7 @@ github,                           small hash log,                   advanced one
 github,                           small chain log,                  advanced one pass,                136314
 github,                           explicit params,                  advanced one pass,                137670
 github,                           uncompressed literals,            advanced one pass,                167004
-github,                           uncompressed literals optimal,    advanced one pass,                164600
+github,                           uncompressed literals optimal,    advanced one pass,                156824
 github,                           huffman literals,                 advanced one pass,                143457
 silesia,                          level -5,                         advanced one pass small out,      7152294
 silesia,                          level -3,                         advanced one pass small out,      6789969
@@ -268,7 +268,7 @@ silesia,                          small hash log,                   advanced one
 silesia,                          small chain log,                  advanced one pass small out,      4931093
 silesia,                          explicit params,                  advanced one pass small out,      4815369
 silesia,                          uncompressed literals,            advanced one pass small out,      5155424
-silesia,                          uncompressed literals optimal,    advanced one pass small out,      4426654
+silesia,                          uncompressed literals optimal,    advanced one pass small out,      4325427
 silesia,                          huffman literals,                 advanced one pass small out,      5341356
 silesia.tar,                      level -5,                         advanced one pass small out,      7160438
 silesia.tar,                      level -3,                         advanced one pass small out,      6789024
@@ -293,7 +293,7 @@ silesia.tar,                      small hash log,                   advanced one
 silesia.tar,                      small chain log,                  advanced one pass small out,      4943255
 silesia.tar,                      explicit params,                  advanced one pass small out,      4829974
 silesia.tar,                      uncompressed literals,            advanced one pass small out,      5157992
-silesia.tar,                      uncompressed literals optimal,    advanced one pass small out,      4372744
+silesia.tar,                      uncompressed literals optimal,    advanced one pass small out,      4321094
 silesia.tar,                      huffman literals,                 advanced one pass small out,      5358079
 github,                           level -5,                         advanced one pass small out,      232744
 github,                           level -5 with dict,               advanced one pass small out,      46718
@@ -332,7 +332,7 @@ github,                           small hash log,                   advanced one
 github,                           small chain log,                  advanced one pass small out,      136314
 github,                           explicit params,                  advanced one pass small out,      137670
 github,                           uncompressed literals,            advanced one pass small out,      167004
-github,                           uncompressed literals optimal,    advanced one pass small out,      164600
+github,                           uncompressed literals optimal,    advanced one pass small out,      156824
 github,                           huffman literals,                 advanced one pass small out,      143457
 silesia,                          level -5,                         advanced streaming,               7152294
 silesia,                          level -3,                         advanced streaming,               6789973
@@ -357,7 +357,7 @@ silesia,                          small hash log,                   advanced str
 silesia,                          small chain log,                  advanced streaming,               4931093
 silesia,                          explicit params,                  advanced streaming,               4815380
 silesia,                          uncompressed literals,            advanced streaming,               5155424
-silesia,                          uncompressed literals optimal,    advanced streaming,               4426654
+silesia,                          uncompressed literals optimal,    advanced streaming,               4325427
 silesia,                          huffman literals,                 advanced streaming,               5341357
 silesia.tar,                      level -5,                         advanced streaming,               7160440
 silesia.tar,                      level -3,                         advanced streaming,               6789026
@@ -382,7 +382,7 @@ silesia.tar,                      small hash log,                   advanced str
 silesia.tar,                      small chain log,                  advanced streaming,               4943260
 silesia.tar,                      explicit params,                  advanced streaming,               4830002
 silesia.tar,                      uncompressed literals,            advanced streaming,               5157995
-silesia.tar,                      uncompressed literals optimal,    advanced streaming,               4372744
+silesia.tar,                      uncompressed literals optimal,    advanced streaming,               4321094
 silesia.tar,                      huffman literals,                 advanced streaming,               5358083
 github,                           level -5,                         advanced streaming,               232744
 github,                           level -5 with dict,               advanced streaming,               46718
@@ -421,7 +421,7 @@ github,                           small hash log,                   advanced str
 github,                           small chain log,                  advanced streaming,               136314
 github,                           explicit params,                  advanced streaming,               137670
 github,                           uncompressed literals,            advanced streaming,               167004
-github,                           uncompressed literals optimal,    advanced streaming,               164600
+github,                           uncompressed literals optimal,    advanced streaming,               156824
 github,                           huffman literals,                 advanced streaming,               143457
 silesia,                          level -5,                         old streaming,                    7152294
 silesia,                          level -3,                         old streaming,                    6789973
-- 
GitLab