diff --git a/lib/common/compiler.h b/lib/common/compiler.h index cc830b2b475126eef1d875141e16d55954c2761d..e6267e90b974a66d609ea11f1c1b43cbbd4035a4 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -92,18 +92,18 @@ * can be disabled, by declaring NO_PREFETCH build macro */ #if defined(NO_PREFETCH) # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ #else # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) -# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) # else # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ # endif #endif /* NO_PREFETCH */ @@ -114,7 +114,7 @@ size_t const _size = (size_t)(s); \ size_t _pos; \ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ - PREFETCH(_ptr + _pos); \ + PREFETCH_L2(_ptr + _pos); \ } \ } diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index af615e07763d6126383bc78a02042a12cb29fc25..e86f225af4a62da4847af328d56b6491a531dd75 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -63,12 +63,13 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms, static void ZSTD_insertDUBT1(ZSTD_matchState_t* ms, U32 current, const BYTE* inputEnd, - U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode) + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) { const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; size_t commonLengthSmaller=0, commonLengthLarger=0; const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; @@ -80,7 +81,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, const BYTE* match; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = smallerPtr + 1; - U32 matchIndex = *smallerPtr; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ U32 dummy32; /* to be nullified at the end */ U32 const windowLow = ms->window.lowLimit; @@ -93,6 +94,9 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ assert(matchIndex < current); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ if ( (dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ @@ -108,7 +112,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + match = base + matchIndex; /* preparation for next read of match[matchLength] */ } DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", @@ -258,7 +262,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, && (nbCandidates > 1) ) { DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", matchIndex); - *unsortedMark = previousCandidate; + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ previousCandidate = matchIndex; matchIndex = *nextCandidate; nextCandidate = bt + 2*(matchIndex&btMask); @@ -266,11 +270,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, nbCandidates --; } + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ if ( (matchIndex > unsortLimit) && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", matchIndex); - *nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */ + *nextCandidate = *unsortedMark = 0; } /* batch sort stacked candidates */ @@ -285,14 +291,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, } /* find longest match */ - { size_t commonLengthSmaller=0, commonLengthLarger=0; + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; const BYTE* const dictBase = ms->window.dictBase; const U32 dictLimit = ms->window.dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8+1; + U32 matchEndIdx = current + 8 + 1; U32 dummy32; /* to be nullified at the end */ size_t bestLength = 0; @@ -433,7 +439,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( /* ********************************* * Hash Chain ***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] /* Update chains up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ @@ -497,6 +503,7 @@ size_t ZSTD_HcFindBestMatch_generic ( size_t currentMl=0; if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ if (match[ml] == ip[ml]) /* potentially better */ currentMl = ZSTD_count(ip, match, iLimit); } else {