diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7721325d78a9d31307e7311034c89c5d689ae010..c6d72f584bcac1ec3c6acd054302c10cfb25e57e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -412,11 +412,12 @@ size_t ZSTD_CCtxParam_setParameter( CCtxParams->forceWindow = (value > 0); return CCtxParams->forceWindow; - case ZSTD_p_forceAttachDict : - CCtxParams->attachDictPref = value ? - (value > 0 ? ZSTD_dictForceAttach : ZSTD_dictForceCopy) : - ZSTD_dictDefaultAttach; + case ZSTD_p_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + CLAMPCHECK(pref, ZSTD_dictDefaultAttach, ZSTD_dictForceCopy); + CCtxParams->attachDictPref = pref; return CCtxParams->attachDictPref; + } case ZSTD_p_nbWorkers : #ifndef ZSTD_MULTITHREAD diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index ec38e5d4b781075a2190355fcb239cfd3f65fb12..ffbb53a78a9743e1632389a24f3349535c9c6d74 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -48,12 +48,6 @@ extern "C" { typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; -typedef enum { - ZSTD_dictDefaultAttach = 0, - ZSTD_dictForceAttach = 1, - ZSTD_dictForceCopy = -1, -} ZSTD_dictAttachPref_e; - typedef struct ZSTD_prefixDict_s { const void* dict; size_t dictSize; diff --git a/lib/zstd.h b/lib/zstd.h index d7646fdaf6ad63fda7c79ba22b5f83e65a0dc585..30090ca491b781ddc45e5994f21bca53b85b7824 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -996,6 +996,38 @@ typedef enum { * Decoder cannot recognise automatically this format, requiring instructions. */ } ZSTD_format_e; +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in two ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ +} ZSTD_dictAttachPref_e; + typedef enum { /* compression format */ ZSTD_p_format = 10, /* See ZSTD_format_e enum definition. @@ -1109,29 +1141,14 @@ typedef enum { ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize, * even when referencing into Dictionary content (default:0) */ - ZSTD_p_forceAttachDict, /* ZSTD supports usage of a CDict in-place - * (avoiding having to copy the compression tables - * from the CDict into the working context). Using - * a CDict in this way saves an initial setup step, - * but comes at the cost of more work per byte of - * input. ZSTD has a simple internal heuristic that - * guesses which strategy will be faster. You can - * use this flag to override that guess. + ZSTD_p_forceAttachDict, /* Controls whether the contents of a CDict are + * used in place, or whether they are copied into + * the working context. * - * Note that the by-reference, in-place strategy is - * only used when reusing a compression context - * with compatible compression parameters. (If - * incompatible / uninitialized, the working - * context needs to be cleared anyways, which is - * about as expensive as overwriting it with the - * dictionary context, so there's no savings in - * using the CDict by-ref.) - * - * Values greater than 0 force attaching the dict. - * Values less than 0 force copying the dict. - * 0 selects the default heuristic-guided behavior. + * Accepts values from the ZSTD_dictAttachPref_e + * enum. See the comments on that enum for an + * explanation of the feature. */ - } ZSTD_cParameter; diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index bf5eccff83c7025164bdf915765247ef0c4d6ec8..75d0359a65df545828d3c526842749802a887dcb 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -71,7 +71,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) setRand(cctx, ZSTD_p_contentSizeFlag, 0, 1, state); setRand(cctx, ZSTD_p_checksumFlag, 0, 1, state); setRand(cctx, ZSTD_p_dictIDFlag, 0, 1, state); - setRand(cctx, ZSTD_p_forceAttachDict, -2, 2, state); + setRand(cctx, ZSTD_p_forceAttachDict, 0, 2, state); /* Select long distance matchig parameters */ setRand(cctx, ZSTD_p_enableLongDistanceMatching, 0, 1, state); setRand(cctx, ZSTD_p_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state);