Skip to content
Snippets Groups Projects
Commit 88e9a984 authored by Yann Collet's avatar Yann Collet
Browse files

early update literals

for better cost estimation in the following series for matches.

Unfortunately, this does not necessarily result in better compression.
Results are all over the places,
with best outcome observed for silesia/x-ray
but most other files tend to get slightly worse after this change.

It's strange because it seems that we are just providing more accurate information for the cost estimator.

Anyway, as it also increases code complexity,
it's probably not interesting enough for now.
parent 5f2c3d97
Branches fix3764
No related tags found
No related merge requests found
...@@ -324,19 +324,28 @@ ZSTD_getMatchPrice(U32 const offcode, ...@@ -324,19 +324,28 @@ ZSTD_getMatchPrice(U32 const offcode,
return price; return price;
} }
/* ZSTD_updateStats() :
* assumption : literals + litLengtn <= iend */ static void
static void ZSTD_updateStats(optState_t* const optPtr, ZSTD_updateLiterals(optState_t* const optPtr, const BYTE* literals, U32 litLength)
U32 litLength, const BYTE* literals,
U32 offsetCode, U32 matchLength)
{ {
/* literals */
if (ZSTD_compressedLiterals(optPtr)) { if (ZSTD_compressedLiterals(optPtr)) {
U32 u; U32 u;
for (u=0; u < litLength; u++) for (u=0; u < litLength; u++)
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
} }
}
/* ZSTD_updateStats() :
* assumption : literals + litLengtn <= iend */
static void ZSTD_updateStats(optState_t* const optPtr,
U32 litLength, const BYTE* literals,
U32 offsetCode, U32 matchLength)
{
DEBUGLOG(6, "ZSTD_updateStats (ll=%u, ml=%u)", litLength, matchLength);
/* literals */
ZSTD_updateLiterals(optPtr, literals, litLength);
/* literal Length */ /* literal Length */
{ U32 const llCode = ZSTD_LLcode(litLength); { U32 const llCode = ZSTD_LLcode(litLength);
...@@ -1061,6 +1070,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, ...@@ -1061,6 +1070,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_match_t* const matches = optStatePtr->matchTable; ZSTD_match_t* const matches = optStatePtr->matchTable;
ZSTD_optimal_t lastSequence; ZSTD_optimal_t lastSequence;
ZSTD_optLdm_t optLdm; ZSTD_optLdm_t optLdm;
size_t literalsAlreadyCounted = 0;
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
...@@ -1085,6 +1095,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, ...@@ -1085,6 +1095,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
(U32)(ip-istart), (U32)(iend - ip)); (U32)(ip-istart), (U32)(iend - ip));
if (!nbMatches) { ip++; continue; } if (!nbMatches) { ip++; continue; }
if (ip-anchor > 0) {
/* if (literalsAlreadyCounted > 0) : avoid double counting */
size_t const newlits = (size_t)(ip-anchor) - literalsAlreadyCounted;
assert(literalsAlreadyCounted <= (size_t)(ip-anchor));
ZSTD_updateLiterals(optStatePtr, anchor + literalsAlreadyCounted, newlits);
literalsAlreadyCounted += newlits;
}
/* initialize opt[0] */ /* initialize opt[0] */
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; } { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
opt[0].mlen = 0; /* means is_a_literal */ opt[0].mlen = 0; /* means is_a_literal */
...@@ -1301,14 +1319,16 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ ...@@ -1301,14 +1319,16 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
anchor - istart, (unsigned)llen, (unsigned)mlen); anchor - istart, (unsigned)llen, (unsigned)mlen);
if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ if (mlen==0) { /* only literals */
assert(storePos == storeEnd); /* must be last sequence */ assert(storePos == storeEnd); /* must be last sequence */
ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
continue; /* will finish */ continue;
} }
assert(anchor + llen <= iend); assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); assert(llen >= literalsAlreadyCounted);
ZSTD_updateStats(optStatePtr, llen - literalsAlreadyCounted, anchor + literalsAlreadyCounted, offCode, mlen);
literalsAlreadyCounted = 0;
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
anchor += advance; anchor += advance;
ip = anchor; ip = anchor;
...@@ -1317,6 +1337,9 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ ...@@ -1317,6 +1337,9 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
} }
} /* while (ip < ilimit) */ } /* while (ip < ilimit) */
/* update literals statistics, for next block */
assert((size_t)(iend - anchor) >= literalsAlreadyCounted);
ZSTD_updateLiterals(optStatePtr, anchor + literalsAlreadyCounted, (size_t)(iend - anchor) - literalsAlreadyCounted);
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment