diff --git a/lib/Makefile b/lib/Makefile
index 9bf83f1a1e90f067d0aaecfe09a3a5cfcf085e14..e096caf718600d21391b71962d4929435a845bcc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -31,9 +31,9 @@
 # ################################################################
 
 # Version numbers
-LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
-LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
-LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
 LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
 LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
 LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
@@ -43,7 +43,7 @@ VERSION?= $(LIBVER)
 
 DESTDIR?=
 PREFIX ?= /usr/local
-CPPFLAGS= -I.
+CPPFLAGS= -I./common
 CFLAGS ?= -O3
 CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes -Wstrict-aliasing=1
 FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
@@ -51,7 +51,10 @@ FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
 LIBDIR ?= $(PREFIX)/lib
 INCLUDEDIR=$(PREFIX)/include
 
-ZSTD_FILES := zstd_compress.c zstd_decompress.c fse.c huff0.c zbuff.c zdict.c divsufsort.c
+ZSTDCOMP_FILES := compress/zstd_compress.c compress/fse_compress.c compress/huf_compress.c compress/zbuff_compress.c 
+ZSTDDECOMP_FILES := decompress/zstd_decompress.c decompress/fse_decompress.c decompress/huf_decompress.c decompress/zbuff_decompress.c
+ZSTDDICT_FILES := dictBuilder/zdict.c dictBuilder/divsufsort.c
+ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMP_FILES) $(ZSTDDICT_FILES)
 ZSTD_LEGACY:= legacy/zstd_v01.c legacy/zstd_v02.c legacy/zstd_v03.c legacy/zstd_v04.c legacy/zstd_v05.c
 
 ifeq ($(ZSTD_LEGACY_SUPPORT), 0)
diff --git a/lib/README.md b/lib/README.md
index 91ef00c4094fb52f0ff803ae1fbaa12b788befa2..ef446427aecda83ff99dda58fcde6eab87a883ac 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -22,9 +22,9 @@ as their definition may change in future version of the library.
 - fse.c
 - fse.h
 - fse_static.h
-- huff0.c
-- huff0.h
-- huff0_static.h
+- huf.c
+- huf.h
+- huf_static.h
 - zstd_compress.c
 - zstd_decompress.c
 - zstd_internal.h
diff --git a/lib/bitstream.h b/lib/common/bitstream.h
similarity index 100%
rename from lib/bitstream.h
rename to lib/common/bitstream.h
diff --git a/lib/error_private.h b/lib/common/error_private.h
similarity index 100%
rename from lib/error_private.h
rename to lib/common/error_private.h
diff --git a/lib/error_public.h b/lib/common/error_public.h
similarity index 100%
rename from lib/error_public.h
rename to lib/common/error_public.h
diff --git a/lib/fse.h b/lib/common/fse.h
similarity index 100%
rename from lib/fse.h
rename to lib/common/fse.h
diff --git a/lib/fse_static.h b/lib/common/fse_static.h
similarity index 89%
rename from lib/fse_static.h
rename to lib/common/fse_static.h
index f3c3d44e0f2a3934475e4cde26784eed3391e95d..0661dbd3e6e4ce8177f241fd30ce0dfcdb4a0131 100644
--- a/lib/fse_static.h
+++ b/lib/common/fse_static.h
@@ -334,6 +334,54 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 }
 
 
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
+
+
 #if defined (__cplusplus)
 }
 #endif
diff --git a/lib/huff0.h b/lib/common/huf.h
similarity index 94%
rename from lib/huff0.h
rename to lib/common/huf.h
index 9d6e11f76b9f7064c2b8d3e700f2a75ebec24728..d07080b15128a6064ab3cda696f93b6d04622dc9 100644
--- a/lib/huff0.h
+++ b/lib/common/huf.h
@@ -1,5 +1,5 @@
 /* ******************************************************************
-   Huff0 : Huffman coder, part of New Generation Entropy library
+   Huffman coder, part of New Generation Entropy library
    header file
    Copyright (C) 2013-2016, Yann Collet.
 
@@ -31,8 +31,8 @@
    You can contact the author at :
    - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 ****************************************************************** */
-#ifndef HUFF0_H
-#define HUFF0_H
+#ifndef HUF_H
+#define HUF_H
 
 #if defined (__cplusplus)
 extern "C" {
@@ -46,7 +46,7 @@ extern "C" {
 
 
 /* ****************************************
-*  Huff0 simple functions
+*  HUF simple functions
 ******************************************/
 size_t HUF_compress(void* dst, size_t dstCapacity,
               const void* src, size_t srcSize);
@@ -63,7 +63,7 @@ HUF_compress() :
                      if HUF_isError(return), compression failed (more details using HUF_getErrorName())
 
 HUF_decompress() :
-    Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
+    Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
     into already allocated destination buffer 'dst', of size 'dstSize'.
     `dstSize` : must be the **exact** size of original (uncompressed) data.
     Note : in contrast with FSE, HUF_decompress can regenerate
@@ -94,4 +94,4 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 }
 #endif
 
-#endif   /* HUFF0_H */
+#endif   /* HUF_H */
diff --git a/lib/huff0_static.h b/lib/common/huf_static.h
similarity index 63%
rename from lib/huff0_static.h
rename to lib/common/huf_static.h
index 32d66d3523b7deb6bc81edb8970a2db9a2552170..d2d29a42bdbd4eee9cdf65df753fbff24253b9ed 100644
--- a/lib/huff0_static.h
+++ b/lib/common/huf_static.h
@@ -1,5 +1,5 @@
 /* ******************************************************************
-   Huff0 : Huffman codec, part of New Generation Entropy library
+   Huffman codec, part of New Generation Entropy library
    header file, for static linking only
    Copyright (C) 2013-2016, Yann Collet
 
@@ -31,8 +31,8 @@
    You can contact the author at :
    - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 ****************************************************************** */
-#ifndef HUFF0_STATIC_H
-#define HUFF0_STATIC_H
+#ifndef HUF_STATIC_H
+#define HUF_STATIC_H
 
 #if defined (__cplusplus)
 extern "C" {
@@ -42,24 +42,26 @@ extern "C" {
 /* ****************************************
 *  Dependency
 ******************************************/
-#include "huff0.h"
+#include "huf.h"
+#include "fse.h"
+#include "bitstream.h"
 
 
 /* ****************************************
 *  Static allocation
 ******************************************/
-/* Huff0 buffer bounds */
+/* HUF buffer bounds */
 #define HUF_CTABLEBOUND 129
 #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
 
-/* static allocation of Huff0's Compression Table */
+/* static allocation of HUF's Compression Table */
 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
     U32 name##hb[maxSymbolValue+1]; \
     void* name##hv = &(name##hb); \
     HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
 
-/* static allocation of Huff0's DTable */
+/* static allocation of HUF's DTable */
 #define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<maxTableLog))
 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
         unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog }
@@ -78,7 +80,7 @@ size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS
 
 
 /* ****************************************
-*  Huff0 detailed API
+*  HUF detailed API
 ******************************************/
 /*!
 HUF_compress() does the following:
@@ -132,8 +134,95 @@ size_t HUF_decompress1X6_usingDTable(void* dst, size_t maxDstSize, const void* c
 size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize);
 
 
+/* **************************************************************
+*  Constants
+****************************************************************/
+#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
+#define HUF_MAX_SYMBOL_VALUE 255
+#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
+#  error "HUF_MAX_TABLELOG is too large !"
+#endif
+
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    @return : size read from `src`
+*/
+MEM_STATIC size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                            U32* nbSymbolsPtr, U32* tableLogPtr,
+                            const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    U32 tableLog;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  { /* special header */
+        if (iSize >= (242)) {  /* RLE */
+            static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else {   /* Incompressible */
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            {   U32 n;
+                for (n=0; n<oSize; n+=2) {
+                    huffWeight[n]   = ip[n/2] >> 4;
+                    huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
+    weightTotal = 0;
+    { U32 n; for (n=0; n<oSize; n++) {
+        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+        rankStats[huffWeight[n]]++;
+        weightTotal += (1 << huffWeight[n]) >> 1;
+    }}
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    tableLog = BIT_highbit32(weightTotal) + 1;
+    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+    /* determine last weight */
+    {   U32 const total = 1 << tableLog;
+        U32 const rest = total - weightTotal;
+        U32 const verif = 1 << BIT_highbit32(rest);
+        U32 const lastWeight = BIT_highbit32(rest) + 1;
+        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+        huffWeight[oSize] = (BYTE)lastWeight;
+        rankStats[lastWeight]++;
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    *tableLogPtr = tableLog;
+    return iSize+1;
+}
+
+
+
 #if defined (__cplusplus)
 }
 #endif
 
-#endif /* HUFF0_STATIC_H */
+#endif /* HUF_STATIC_H */
diff --git a/lib/mem.h b/lib/common/mem.h
similarity index 100%
rename from lib/mem.h
rename to lib/common/mem.h
diff --git a/lib/zbuff.h b/lib/common/zbuff.h
similarity index 100%
rename from lib/zbuff.h
rename to lib/common/zbuff.h
diff --git a/lib/zbuff_static.h b/lib/common/zbuff_static.h
similarity index 91%
rename from lib/zbuff_static.h
rename to lib/common/zbuff_static.h
index 9fb522e5f596aab096e0c472541771e5d608d33b..4c52eef2271816a1bffbc275879d73f90f8f97a6 100644
--- a/lib/zbuff_static.h
+++ b/lib/common/zbuff_static.h
@@ -46,6 +46,7 @@ extern "C" {
 ***************************************/
 #include "zstd_static.h"     /* ZSTD_parameters */
 #include "zbuff.h"
+#include "zstd_internal.h"  /* MIN  */
 
 
 /* *************************************
@@ -55,6 +56,13 @@ ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx,
                                                const void* dict, size_t dictSize,
                                                ZSTD_parameters params, U64 pledgedSrcSize);
 
+MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t length = MIN(dstCapacity, srcSize);
+    memcpy(dst, src, length);
+    return length;
+}
+
 
 #if defined (__cplusplus)
 }
diff --git a/lib/zstd.h b/lib/common/zstd.h
similarity index 100%
rename from lib/zstd.h
rename to lib/common/zstd.h
diff --git a/lib/zstd_internal.h b/lib/common/zstd_internal.h
similarity index 100%
rename from lib/zstd_internal.h
rename to lib/common/zstd_internal.h
diff --git a/lib/zstd_static.h b/lib/common/zstd_static.h
similarity index 100%
rename from lib/zstd_static.h
rename to lib/common/zstd_static.h
diff --git a/lib/.debug/zstd_stats.h b/lib/compress/.debug/zstd_stats.h
similarity index 100%
rename from lib/.debug/zstd_stats.h
rename to lib/compress/.debug/zstd_stats.h
diff --git a/lib/fse.c b/lib/compress/fse_compress.c
similarity index 69%
rename from lib/fse.c
rename to lib/compress/fse_compress.c
index 63898ab1c0d5c66aae68a85b87cabc3f75f08b50..dad0be7942d7384fb0382280512981ff120c1253 100644
--- a/lib/fse.c
+++ b/lib/compress/fse_compress.c
@@ -1,5 +1,5 @@
 /* ******************************************************************
-   FSE : Finite State Entropy coder
+   FSE : Finite State Entropy encoder
    Copyright (C) 2013-2015, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@@ -32,35 +32,6 @@
     - Public forum : https://groups.google.com/forum/#!forum/lz4c
 ****************************************************************** */
 
-#ifndef FSE_COMMONDEFS_ONLY
-
-/* **************************************************************
-*  Tuning parameters
-****************************************************************/
-/*!MEMORY_USAGE :
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-*  Increasing memory usage improves compression ratio
-*  Reduced memory usage can improve speed, due to cache effect
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
-#define FSE_MAX_MEMORY_USAGE 14
-#define FSE_DEFAULT_MEMORY_USAGE 13
-
-/*!FSE_MAX_SYMBOL_VALUE :
-*  Maximum symbol value authorized.
-*  Required for proper stack allocation */
-#define FSE_MAX_SYMBOL_VALUE 255
-
-
-/* **************************************************************
-*  template functions type & suffix
-****************************************************************/
-#define FSE_FUNCTION_TYPE BYTE
-#define FSE_FUNCTION_EXTENSION
-#define FSE_DECODE_TYPE FSE_decode_t
-
-
-#endif   /* !FSE_COMMONDEFS_ONLY */
-
 /* **************************************************************
 *  Compiler specifics
 ****************************************************************/
@@ -89,21 +60,6 @@
 #include "fse_static.h"
 
 
-/* ***************************************************************
-*  Constants
-*****************************************************************/
-#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
-#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
-#define FSE_MIN_TABLELOG 5
-
-#define FSE_TABLELOG_ABSOLUTE_MAX 15
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
-#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
-#endif
-
-
 /* **************************************************************
 *  Error Management
 ****************************************************************/
@@ -114,7 +70,6 @@
 *  Complex types
 ****************************************************************/
 typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
-typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
 
 
 /* **************************************************************
@@ -141,8 +96,6 @@ typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
 
 
 /* Function templates */
-static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
-
 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
     U32 const tableSize = 1 << tableLog;
@@ -151,12 +104,15 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
     U16* const tableU16 = ( (U16*) ptr) + 2;
     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
-    U32 const step = FSE_tableStep(tableSize);
+    U32 const step = FSE_TABLESTEP(tableSize);
     U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
+
     FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
     U32 highThreshold = tableSize-1;
 
     /* CTable header */
+
+
     tableU16[-2] = (U16) tableLog;
     tableU16[-1] = (U16) maxSymbolValue;
 
@@ -186,6 +142,7 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
                 position = (position + step) & tableMask;
                 while (position > highThreshold) position = (position + step) & tableMask;   /* Low proba area */
         }   }
+
         if (position!=0) return ERROR(GENERIC);   /* Must have gone through all positions */
     }
 
@@ -202,6 +159,7 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
             switch (normalizedCounter[s])
             {
             case  0: break;
+
             case -1:
             case  1:
                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
@@ -221,81 +179,8 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
 }
 
 
-FSE_DTable* FSE_createDTable (unsigned tableLog)
-{
-    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
-    return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
-}
-
-void FSE_freeDTable (FSE_DTable* dt)
-{
-    free(dt);
-}
-
-size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
-{
-    FSE_DTableHeader DTableH;
-    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
-    const U32 tableSize = 1 << tableLog;
-    const U32 tableMask = tableSize-1;
-    const U32 step = FSE_tableStep(tableSize);
-    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
-    U32 highThreshold = tableSize-1;
-    S16 const largeLimit= (S16)(1 << (tableLog-1));
-    U32 noLarge = 1;
-    U32 s;
-
-    /* Sanity Checks */
-    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
-
-    /* Init, lay down lowprob symbols */
-    DTableH.tableLog = (U16)tableLog;
-    for (s=0; s<=maxSymbolValue; s++) {
-        if (normalizedCounter[s]==-1) {
-            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
-            symbolNext[s] = 1;
-        } else {
-            if (normalizedCounter[s] >= largeLimit) noLarge=0;
-            symbolNext[s] = normalizedCounter[s];
-    }   }
-
-    /* Spread symbols */
-    {   U32 position = 0;
-        for (s=0; s<=maxSymbolValue; s++) {
-            int i;
-            for (i=0; i<normalizedCounter[s]; i++) {
-                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
-                position = (position + step) & tableMask;
-                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
-        }   }
-        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
-    }
-
-    /* Build Decoding table */
-    {   U32 u;
-        for (u=0; u<tableSize; u++) {
-            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
-            U16 nextState = symbolNext[symbol]++;
-            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
-            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
-    }   }
-
-    DTableH.fastMode = (U16)noLarge;
-    memcpy(dt, &DTableH, sizeof(DTableH));
-    return 0;
-}
-
 
 #ifndef FSE_COMMONDEFS_ONLY
-/*-****************************************
-*  FSE helper functions
-******************************************/
-unsigned FSE_isError(size_t code) { return ERR_isError(code); }
-
-const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
-
 
 /*-**************************************************************
 *  FSE NCount encoding-decoding
@@ -408,98 +293,6 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
 }
 
 
-size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-                 const void* headerBuffer, size_t hbSize)
-{
-    const BYTE* const istart = (const BYTE*) headerBuffer;
-    const BYTE* const iend = istart + hbSize;
-    const BYTE* ip = istart;
-    int nbBits;
-    int remaining;
-    int threshold;
-    U32 bitStream;
-    int bitCount;
-    unsigned charnum = 0;
-    int previous0 = 0;
-
-    if (hbSize < 4) return ERROR(srcSize_wrong);
-    bitStream = MEM_readLE32(ip);
-    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
-    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
-    bitStream >>= 4;
-    bitCount = 4;
-    *tableLogPtr = nbBits;
-    remaining = (1<<nbBits)+1;
-    threshold = 1<<nbBits;
-    nbBits++;
-
-    while ((remaining>1) && (charnum<=*maxSVPtr)) {
-        if (previous0) {
-            unsigned n0 = charnum;
-            while ((bitStream & 0xFFFF) == 0xFFFF) {
-                n0+=24;
-                if (ip < iend-5) {
-                    ip+=2;
-                    bitStream = MEM_readLE32(ip) >> bitCount;
-                } else {
-                    bitStream >>= 16;
-                    bitCount+=16;
-            }   }
-            while ((bitStream & 3) == 3) {
-                n0+=3;
-                bitStream>>=2;
-                bitCount+=2;
-            }
-            n0 += bitStream & 3;
-            bitCount += 2;
-            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
-            while (charnum < n0) normalizedCounter[charnum++] = 0;
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
-                ip += bitCount>>3;
-                bitCount &= 7;
-                bitStream = MEM_readLE32(ip) >> bitCount;
-            }
-            else
-                bitStream >>= 2;
-        }
-        {   short const max = (short)((2*threshold-1)-remaining);
-            short count;
-
-            if ((bitStream & (threshold-1)) < (U32)max) {
-                count = (short)(bitStream & (threshold-1));
-                bitCount   += nbBits-1;
-            } else {
-                count = (short)(bitStream & (2*threshold-1));
-                if (count >= threshold) count -= max;
-                bitCount   += nbBits;
-            }
-
-            count--;   /* extra accuracy */
-            remaining -= FSE_abs(count);
-            normalizedCounter[charnum++] = count;
-            previous0 = !count;
-            while (remaining < threshold) {
-                nbBits--;
-                threshold >>= 1;
-            }
-
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
-                ip += bitCount>>3;
-                bitCount &= 7;
-            } else {
-                bitCount -= (int)(8 * (iend - 4 - ip));
-                ip = iend - 4;
-            }
-            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
-    }   }
-    if (remaining != 1) return ERROR(GENERIC);
-    *maxSVPtr = charnum-1;
-
-    ip += (bitCount+7)>>3;
-    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
-    return ip-istart;
-}
-
 
 /*-**************************************************************
 *  Counting histogram
@@ -519,6 +312,7 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
     unsigned maxSymbolValue = *maxSymbolValuePtr;
     unsigned max=0;
 
+
     memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
 
@@ -542,6 +336,7 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
     unsigned maxSymbolValue = *maxSymbolValuePtr;
     unsigned max=0;
 
+
     U32 Counting1[256] = { 0 };
     U32 Counting2[256] = { 0 };
     U32 Counting3[256] = { 0 };
@@ -619,6 +414,7 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
 }
 
 
+
 /*-**************************************************************
 *  FSE Compression Code
 ****************************************************************/
@@ -763,6 +559,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
 
     {   U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+
         U64 const scale = 62 - tableLog;
         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
         U64 const vStep = 1ULL<<(scale-20);
@@ -839,11 +636,13 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
 
     /* Build Symbol Transformation Table */
     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+
         for (s=0; s<=maxSymbolValue; s++) {
             symbolTT[s].deltaNbBits = deltaNbBits;
             symbolTT[s].deltaFindState = s-1;
     }   }
 
+
     return 0;
 }
 
@@ -878,6 +677,8 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
     const BYTE* const istart = (const BYTE*) src;
     const BYTE* const iend = istart + srcSize;
     const BYTE* ip=iend;
+
+
     BIT_CStream_t bitC;
     FSE_CState_t CState1, CState2;
 
@@ -908,6 +709,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
 
     /* 2 or 4 encoding per loop */
     for ( ; ip>istart ; ) {
+
         FSE_encodeSymbol(&bitC, &CState2, *--ip);
 
         if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
@@ -998,162 +800,4 @@ size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
 }
 
 
-/*-*******************************************************
-*  Decompression (Byte symbols)
-*********************************************************/
-size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
-{
-    void* ptr = dt;
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
-    void* dPtr = dt + 1;
-    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
-
-    DTableH->tableLog = 0;
-    DTableH->fastMode = 0;
-
-    cell->newState = 0;
-    cell->symbol = symbolValue;
-    cell->nbBits = 0;
-
-    return 0;
-}
-
-
-size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
-{
-    void* ptr = dt;
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
-    void* dPtr = dt + 1;
-    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
-    const unsigned tableSize = 1 << nbBits;
-    const unsigned tableMask = tableSize - 1;
-    const unsigned maxSymbolValue = tableMask;
-    unsigned s;
-
-    /* Sanity checks */
-    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
-
-    /* Build Decoding Table */
-    DTableH->tableLog = (U16)nbBits;
-    DTableH->fastMode = 1;
-    for (s=0; s<=maxSymbolValue; s++) {
-        dinfo[s].newState = 0;
-        dinfo[s].symbol = (BYTE)s;
-        dinfo[s].nbBits = (BYTE)nbBits;
-    }
-
-    return 0;
-}
-
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
-          void* dst, size_t maxDstSize,
-    const void* cSrc, size_t cSrcSize,
-    const FSE_DTable* dt, const unsigned fast)
-{
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* op = ostart;
-    BYTE* const omax = op + maxDstSize;
-    BYTE* const olimit = omax-3;
-
-    BIT_DStream_t bitD;
-    FSE_DState_t state1;
-    FSE_DState_t state2;
-    size_t errorCode;
-
-    /* Init */
-    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
-    if (FSE_isError(errorCode)) return errorCode;
-
-    FSE_initDState(&state1, &bitD, dt);
-    FSE_initDState(&state2, &bitD, dt);
-
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
-
-    /* 4 symbols per loop */
-    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4) {
-        op[0] = FSE_GETSYMBOL(&state1);
-
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
-            BIT_reloadDStream(&bitD);
-
-        op[1] = FSE_GETSYMBOL(&state2);
-
-        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
-            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
-
-        op[2] = FSE_GETSYMBOL(&state1);
-
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
-            BIT_reloadDStream(&bitD);
-
-        op[3] = FSE_GETSYMBOL(&state2);
-    }
-
-    /* tail */
-    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
-    while (1) {
-        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
-
-        *op++ = FSE_GETSYMBOL(&state1);
-
-        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
-            *op++ = FSE_GETSYMBOL(&state2);
-            break;
-        }
-
-        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
-
-        *op++ = FSE_GETSYMBOL(&state2);
-
-        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
-            *op++ = FSE_GETSYMBOL(&state1);
-            break;
-    }   }
-
-    return op-ostart;
-}
-
-
-size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
-                            const void* cSrc, size_t cSrcSize,
-                            const FSE_DTable* dt)
-{
-    const void* ptr = dt;
-    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
-    const U32 fastMode = DTableH->fastMode;
-
-    /* select fast mode (static) */
-    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
-    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
-}
-
-
-size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
-{
-    const BYTE* const istart = (const BYTE*)cSrc;
-    const BYTE* ip = istart;
-    short counting[FSE_MAX_SYMBOL_VALUE+1];
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
-    unsigned tableLog;
-    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-    size_t errorCode;
-
-    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
-
-    /* normal FSE decoding mode */
-    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
-    if (FSE_isError(errorCode)) return errorCode;
-    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
-    ip += errorCode;
-    cSrcSize -= errorCode;
-
-    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
-    if (FSE_isError(errorCode)) return errorCode;
-
-    /* always return, even if it is an error code */
-    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
-}
-
-
-
 #endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
new file mode 100644
index 0000000000000000000000000000000000000000..d126305c6f4f817a9e6f048e2d7af7d50a5931d1
--- /dev/null
+++ b/lib/compress/huf_compress.c
@@ -0,0 +1,560 @@
+/* ******************************************************************
+   Huffman encoder, part of New Generation Entropy library
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "huf_static.h"
+#include "bitstream.h"
+#include "fse.h"        /* header compression */
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* *******************************************************
+*  HUF : Huffman block compression
+*********************************************************/
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt within huf_static.h */
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+/*! HUF_writeCTable() :
+    `CTable` : huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
+{
+    BYTE bitsToWeight[HUF_MAX_TABLELOG + 1];
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 n;
+    BYTE* op = (BYTE*)dst;
+    size_t size;
+
+     /* check conditions */
+    if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1)
+        return ERROR(GENERIC);
+
+    /* convert to weight */
+    bitsToWeight[0] = 0;
+    for (n=1; n<=huffLog; n++)
+        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+    for (n=0; n<maxSymbolValue; n++)
+        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+
+    size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);   /* don't need last symbol stat : implied */
+    if (HUF_isError(size)) return size;
+    if (size >= 128) return ERROR(GENERIC);   /* should never happen, since maxSymbolValue <= 255 */
+    if ((size <= 1) || (size >= maxSymbolValue/2)) {
+        if (size==1) {  /* RLE */
+            /* only possible case : serie of 1 (because there are at least 2) */
+            /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
+            BYTE code;
+            switch(maxSymbolValue)
+            {
+            case 1: code = 0; break;
+            case 2: code = 1; break;
+            case 3: code = 2; break;
+            case 4: code = 3; break;
+            case 7: code = 4; break;
+            case 8: code = 5; break;
+            case 15: code = 6; break;
+            case 16: code = 7; break;
+            case 31: code = 8; break;
+            case 32: code = 9; break;
+            case 63: code = 10; break;
+            case 64: code = 11; break;
+            case 127: code = 12; break;
+            case 128: code = 13; break;
+            default : return ERROR(corruption_detected);
+            }
+            op[0] = (BYTE)(255-13 + code);
+            return 1;
+        }
+         /* Not compressible */
+        if (maxSymbolValue > (241-128)) return ERROR(GENERIC);   /* not implemented (not possible with current format) */
+        if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+        op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
+        huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
+        for (n=0; n<maxSymbolValue; n+=2)
+            op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+        return ((maxSymbolValue+1)/2) + 1;
+    }
+
+    /* normal header case */
+    op[0] = (BYTE)size;
+    return size+1;
+}
+
+
+
+size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
+    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t readSize;
+    U32 nbSymbols = 0;
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    /* get symbol weights */
+    readSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(readSize)) return readSize;
+
+    /* check result */
+    if (tableLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall);
+
+    /* Prepare base value per rank */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<=tableLog; n++) {
+            U32 current = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = current;
+    }   }
+
+    /* fill nbBits */
+    { U32 n; for (n=0; n<nbSymbols; n++) {
+        const U32 w = huffWeight[n];
+        CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+    }}
+
+    /* fill val */
+    {   U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
+        U16 valPerRank[HUF_MAX_TABLELOG+1] = {0};
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        /* determine stating value per rank */
+        {   U16 min = 0;
+            U32 n; for (n=HUF_MAX_TABLELOG; n>0; n--) {
+                valPerRank[n] = min;      /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        /* assign value within rank, symbol order */
+        { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
+    }
+
+    return readSize;
+}
+
+
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+    const U32 largestBits = huffNode[lastNonNull].nbBits;
+    if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
+
+    /* there are several too large elements (at least >= 2) */
+    {   int totalCost = 0;
+        const U32 baseCost = 1 << (largestBits - maxNbBits);
+        U32 n = lastNonNull;
+
+        while (huffNode[n].nbBits > maxNbBits) {
+            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+            huffNode[n].nbBits = (BYTE)maxNbBits;
+            n --;
+        }  /* n stops at huffNode[n].nbBits <= maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
+
+        /* renorm totalCost */
+        totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
+
+        /* repay normalized cost */
+        {   U32 const noSymbol = 0xF0F0F0F0;
+            U32 rankLast[HUF_MAX_TABLELOG+1];
+            int pos;
+
+            /* Get pos of last (smallest) symbol per rank */
+            memset(rankLast, 0xF0, sizeof(rankLast));
+            {   U32 currentNbBits = maxNbBits;
+                for (pos=n ; pos >= 0; pos--) {
+                    if (huffNode[pos].nbBits >= currentNbBits) continue;
+                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                    rankLast[maxNbBits-currentNbBits] = pos;
+            }   }
+
+            while (totalCost > 0) {
+                U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
+                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
+                    U32 highPos = rankLast[nBitsToDecrease];
+                    U32 lowPos = rankLast[nBitsToDecrease-1];
+                    if (highPos == noSymbol) continue;
+                    if (lowPos == noSymbol) break;
+                    {   U32 const highTotal = huffNode[highPos].count;
+                        U32 const lowTotal = 2 * huffNode[lowPos].count;
+                        if (highTotal <= lowTotal) break;
+                }   }
+                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                while ((nBitsToDecrease<=HUF_MAX_TABLELOG) && (rankLast[nBitsToDecrease] == noSymbol))  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                    nBitsToDecrease ++;
+                totalCost -= 1 << (nBitsToDecrease-1);
+                if (rankLast[nBitsToDecrease-1] == noSymbol)
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
+                huffNode[rankLast[nBitsToDecrease]].nbBits ++;
+                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
+                    rankLast[nBitsToDecrease] = noSymbol;
+                else {
+                    rankLast[nBitsToDecrease]--;
+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+            }   }   /* while (totalCost > 0) */
+
+            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
+                if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
+                    while (huffNode[n].nbBits == maxNbBits) n--;
+                    huffNode[n+1].nbBits--;
+                    rankLast[1] = n+1;
+                    totalCost++;
+                    continue;
+                }
+                huffNode[ rankLast[1] + 1 ].nbBits--;
+                rankLast[1]++;
+                totalCost ++;
+    }   }   }   /* there are several too large elements (at least >= 2) */
+
+    return maxNbBits;
+}
+
+
+typedef struct {
+    U32 base;
+    U32 current;
+} rankPos;
+
+static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
+{
+    rankPos rank[32];
+    U32 n;
+
+    memset(rank, 0, sizeof(rank));
+    for (n=0; n<=maxSymbolValue; n++) {
+        U32 r = BIT_highbit32(count[n] + 1);
+        rank[r].base ++;
+    }
+    for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
+    for (n=0; n<32; n++) rank[n].current = rank[n].base;
+    for (n=0; n<=maxSymbolValue; n++) {
+        U32 const c = count[n];
+        U32 const r = BIT_highbit32(c+1) + 1;
+        U32 pos = rank[r].current++;
+        while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
+        huffNode[pos].count = c;
+        huffNode[pos].byte  = (BYTE)n;
+    }
+}
+
+
+#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1)
+size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
+{
+    nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1];
+    nodeElt* huffNode = huffNode0 + 1;
+    U32 n, nonNullRank;
+    int lowS, lowN;
+    U16 nodeNb = STARTNODE;
+    U32 nodeRoot;
+
+    /* safety checks */
+    if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG;
+    if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return ERROR(GENERIC);
+    memset(huffNode0, 0, sizeof(huffNode0));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue);
+
+    /* init for parents */
+    nonNullRank = maxSymbolValue;
+    while(huffNode[nonNullRank].count == 0) nonNullRank--;
+    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
+    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
+    nodeNb++; lowS-=2;
+    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
+    huffNode0[0].count = (U32)(1U<<31);
+
+    /* create parents */
+    while (nodeNb <= nodeRoot) {
+        U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+        huffNode[n1].parent = huffNode[n2].parent = nodeNb;
+        nodeNb++;
+    }
+
+    /* distribute weights (unlimited tree height) */
+    huffNode[nodeRoot].nbBits = 0;
+    for (n=nodeRoot-1; n>=STARTNODE; n--)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+    for (n=0; n<=nonNullRank; n++)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
+
+    /* fill result into tree (val, nbBits) */
+    {   U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
+        U16 valPerRank[HUF_MAX_TABLELOG+1] = {0};
+        if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC);   /* check fit into table */
+        for (n=0; n<=nonNullRank; n++)
+            nbPerRank[huffNode[n].nbBits]++;
+        /* determine stating value per rank */
+        {   U16 min = 0;
+            for (n=maxNbBits; n>0; n--) {
+                valPerRank[n] = min;      /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        for (n=0; n<=maxSymbolValue; n++)
+            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+        for (n=0; n<=maxSymbolValue; n++)
+            tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
+    }
+
+    return maxNbBits;
+}
+
+static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+{
+    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+#define HUF_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+#define HUF_FLUSHBITS_1(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) HUF_FLUSHBITS(stream)
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    const BYTE* ip = (const BYTE*) src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t n;
+    const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
+    BIT_CStream_t bitC;
+
+    /* init */
+    if (dstSize < 8) return 0;   /* not enough space to compress */
+    { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op);
+      if (HUF_isError(errorCode)) return 0; }
+
+    n = srcSize & ~3;  /* join to mod 4 */
+    switch (srcSize & 3)
+    {
+        case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
+                 HUF_FLUSHBITS_2(&bitC);
+        case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
+                 HUF_FLUSHBITS_1(&bitC);
+        case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
+                 HUF_FLUSHBITS(&bitC);
+        case 0 :
+        default: ;
+    }
+
+    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
+        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
+        HUF_FLUSHBITS_2(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
+        HUF_FLUSHBITS(&bitC);
+    }
+
+    return BIT_closeCStream(&bitC);
+}
+
+
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    size_t segmentSize = (srcSize+3)/4;   /* first 3 segments */
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t errorCode;
+
+    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
+    if (srcSize < 12) return 0;   /* no saving possible : too small input */
+    op += 6;   /* jumpTable */
+
+    errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+    MEM_writeLE16(ostart, (U16)errorCode);
+
+    ip += segmentSize;
+    op += errorCode;
+    errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+    MEM_writeLE16(ostart+2, (U16)errorCode);
+
+    ip += segmentSize;
+    op += errorCode;
+    errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+    MEM_writeLE16(ostart+4, (U16)errorCode);
+
+    ip += segmentSize;
+    op += errorCode;
+    errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+
+    op += errorCode;
+    return op-ostart;
+}
+
+
+static size_t HUF_compress_internal (
+                void* dst, size_t dstSize,
+                const void* src, size_t srcSize,
+                unsigned maxSymbolValue, unsigned huffLog,
+                unsigned singleStream)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    U32 count[HUF_MAX_SYMBOL_VALUE+1];
+    HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
+    size_t errorCode;
+
+    /* checks & inits */
+    if (srcSize < 1) return 0;  /* Uncompressed - note : 1 means rle, so first byte must be correct */
+    if (dstSize < 1) return 0;  /* not compressible within dst budget */
+    if (srcSize > 128 * 1024) return ERROR(srcSize_wrong);   /* current block size limit */
+    if (huffLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+    if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE;
+    if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG;
+
+    /* Scan input and build symbol stats */
+    errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }
+    if (errorCode <= (srcSize >> 7)+1) return 0;   /* Heuristic : not compressible enough */
+
+    /* Build Huffman Tree */
+    errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
+    if (HUF_isError(errorCode)) return errorCode;
+    huffLog = (U32)errorCode;
+
+    /* Write table description header */
+    errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode + 12 >= srcSize) return 0;   /* not useful to try compression */
+    op += errorCode;
+
+    /* Compress */
+    if (singleStream)
+        errorCode = HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable);   /* single segment */
+    else
+        errorCode = HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
+    if (HUF_isError(errorCode)) return errorCode;
+    if (errorCode==0) return 0;
+    op += errorCode;
+
+    /* check compressibility */
+    if ((size_t)(op-ostart) >= srcSize-1)
+        return 0;
+
+    return op-ostart;
+}
+
+
+size_t HUF_compress1X (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                 unsigned maxSymbolValue, unsigned huffLog)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1);
+}
+
+size_t HUF_compress2 (void* dst, size_t dstSize,
+                const void* src, size_t srcSize,
+                unsigned maxSymbolValue, unsigned huffLog)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0);
+}
+
+
+size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG);
+}
diff --git a/lib/zbuff.c b/lib/compress/zbuff_compress.c
similarity index 57%
rename from lib/zbuff.c
rename to lib/compress/zbuff_compress.c
index eab0c482bb5f9e4425a7c03db26cbc278658fbd7..260aca0870ef9ce77ab066002f6ff9f668d7f157 100644
--- a/lib/zbuff.c
+++ b/lib/compress/zbuff_compress.c
@@ -169,13 +169,6 @@ size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
 
 /* *** Compression *** */
 
-static size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    size_t length = MIN(dstCapacity, srcSize);
-    memcpy(dst, src, length);
-    return length;
-}
-
 static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
                               void* dst, size_t* dstCapacityPtr,
                         const void* src, size_t* srcSizePtr,
@@ -290,215 +283,9 @@ size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
 }
 
 
-/*-***************************************************************************
-*  Streaming decompression howto
-*
-*  A ZBUFF_DCtx object is required to track streaming operations.
-*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
-*  Use ZBUFF_decompressInit() to start a new decompression operation,
-*   or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
-*  Note that ZBUFF_DCtx objects can be re-init multiple times.
-*
-*  Use ZBUFF_decompressContinue() repetitively to consume your input.
-*  *srcSizePtr and *dstCapacityPtr can be any size.
-*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
-*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
-*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change @dst.
-*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
-*            or 0 when a frame is completely decoded,
-*            or an error code, which can be tested using ZBUFF_isError().
-*
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize()
-*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
-*  input  : ZBUFF_recommendedDInSize == 128KB + 3;
-*           just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
-* *******************************************************************************/
-
-typedef enum { ZBUFFds_init, ZBUFFds_readHeader,
-               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
-
-/* *** Resource management *** */
-struct ZBUFF_DCtx_s {
-    ZSTD_DCtx* zd;
-    ZSTD_frameParams fParams;
-    size_t blockSize;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inPos;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outStart;
-    size_t outEnd;
-    ZBUFF_dStage stage;
-};   /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */
-
-
-ZBUFF_DCtx* ZBUFF_createDCtx(void)
-{
-    ZBUFF_DCtx* zbd = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx));
-    if (zbd==NULL) return NULL;
-    memset(zbd, 0, sizeof(*zbd));
-    zbd->zd = ZSTD_createDCtx();
-    zbd->stage = ZBUFFds_init;
-    return zbd;
-}
-
-size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd)
-{
-    if (zbd==NULL) return 0;   /* support free on null */
-    ZSTD_freeDCtx(zbd->zd);
-    free(zbd->inBuff);
-    free(zbd->outBuff);
-    free(zbd);
-    return 0;
-}
-
-
-/* *** Initialization *** */
-
-size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize)
-{
-    zbd->stage = ZBUFFds_readHeader;
-    zbd->inPos = zbd->outStart = zbd->outEnd = 0;
-    return ZSTD_decompressBegin_usingDict(zbd->zd, dict, dictSize);
-}
-
-size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd)
-{
-    return ZBUFF_decompressInitDictionary(zbd, NULL, 0);
-}
-
-
-/* *** Decompression *** */
-
-size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
-                                void* dst, size_t* dstCapacityPtr,
-                          const void* src, size_t* srcSizePtr)
-{
-    const char* const istart = (const char*)src;
-    const char* const iend = istart + *srcSizePtr;
-    const char* ip = istart;
-    char* const ostart = (char*)dst;
-    char* const oend = ostart + *dstCapacityPtr;
-    char* op = ostart;
-    U32 notDone = 1;
-
-    while (notDone) {
-        switch(zbd->stage)
-        {
-        case ZBUFFds_init :
-            return ERROR(init_missing);
-
-        case ZBUFFds_readHeader :
-            /* read header from src */
-            {   size_t const headerSize = ZSTD_getFrameParams(&(zbd->fParams), src, *srcSizePtr);
-                if (ZSTD_isError(headerSize)) return headerSize;
-                if (headerSize) {
-                    /* not enough input to decode header : needs headerSize > *srcSizePtr */
-                    *dstCapacityPtr = 0;
-                    *srcSizePtr = 0;
-                    return headerSize;
-            }   }
-
-            /* Frame header instruct buffer sizes */
-            {   size_t const blockSize = MIN(1 << zbd->fParams.windowLog, ZSTD_BLOCKSIZE_MAX);
-                zbd->blockSize = blockSize;
-                if (zbd->inBuffSize < blockSize) {
-                    free(zbd->inBuff);
-                    zbd->inBuffSize = blockSize;
-                    zbd->inBuff = (char*)malloc(blockSize);
-                    if (zbd->inBuff == NULL) return ERROR(memory_allocation);
-                }
-                {   size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize;
-                    if (zbd->outBuffSize < neededOutSize) {
-                        free(zbd->outBuff);
-                        zbd->outBuffSize = neededOutSize;
-                        zbd->outBuff = (char*)malloc(neededOutSize);
-                        if (zbd->outBuff == NULL) return ERROR(memory_allocation);
-            }   }   }
-            zbd->stage = ZBUFFds_read;
-
-        case ZBUFFds_read:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-                if (neededInSize==0) {  /* end of frame */
-                    zbd->stage = ZBUFFds_init;
-                    notDone = 0;
-                    break;
-                }
-                if ((size_t)(iend-ip) >= neededInSize) {
-                    /* directly decode from src */
-                    size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
-                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
-                        ip, neededInSize);
-                    if (ZSTD_isError(decodedSize)) return decodedSize;
-                    ip += neededInSize;
-                    if (!decodedSize) break;   /* this was just a header */
-                    zbd->outEnd = zbd->outStart +  decodedSize;
-                    zbd->stage = ZBUFFds_flush;
-                    break;
-                }
-                if (ip==iend) { notDone = 0; break; }   /* no more input */
-                zbd->stage = ZBUFFds_load;
-            }
-
-        case ZBUFFds_load:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-                size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
-                size_t loadedSize;
-                if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected);   /* should never happen */
-                loadedSize = ZBUFF_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
-                ip += loadedSize;
-                zbd->inPos += loadedSize;
-                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
-                /* decode loaded input */
-                {   size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
-                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
-                        zbd->inBuff, neededInSize);
-                    if (ZSTD_isError(decodedSize)) return decodedSize;
-                    zbd->inPos = 0;   /* input is consumed */
-                    if (!decodedSize) { zbd->stage = ZBUFFds_read; break; }   /* this was just a header */
-                    zbd->outEnd = zbd->outStart +  decodedSize;
-                    zbd->stage = ZBUFFds_flush;
-                    // break; /* ZBUFFds_flush follows */
-            }   }
-
-        case ZBUFFds_flush:
-            {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
-                size_t const flushedSize = ZBUFF_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
-                op += flushedSize;
-                zbd->outStart += flushedSize;
-                if (flushedSize == toFlushSize) {
-                    zbd->stage = ZBUFFds_read;
-                    if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
-                        zbd->outStart = zbd->outEnd = 0;
-                    break;
-                }
-                /* cannot flush everything */
-                notDone = 0;
-                break;
-            }
-        default: return ERROR(GENERIC);   /* impossible */
-    }   }
-
-    /* result */
-    *srcSizePtr = ip-istart;
-    *dstCapacityPtr = op-ostart;
-    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbd->zd);
-        if (nextSrcSizeHint > ZSTD_blockHeaderSize) nextSrcSizeHint+= ZSTD_blockHeaderSize;   /* get following block header too */
-        nextSrcSizeHint -= zbd->inPos;   /* already loaded*/
-        return nextSrcSizeHint;
-    }
-}
-
-
 
 /* *************************************
 *  Tool functions
 ***************************************/
-unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
-const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-
 size_t ZBUFF_recommendedCInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
 size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; }
-size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize /* block header size*/ ; }
-size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
diff --git a/lib/zstd_compress.c b/lib/compress/zstd_compress.c
similarity index 99%
rename from lib/zstd_compress.c
rename to lib/compress/zstd_compress.c
index 6ca1bafa3b5e757cc9c8a707b6a8e2cf1070cb45..1475d5fb3b9aa8408ca67652caf878dfcf971a32 100644
--- a/lib/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -55,7 +55,7 @@
 #include <string.h>   /* memset */
 #include "mem.h"
 #include "fse_static.h"
-#include "huff0_static.h"
+#include "huf_static.h"
 #include "zstd_internal.h"
 
 
diff --git a/lib/zstd_opt.h b/lib/compress/zstd_opt.h
similarity index 100%
rename from lib/zstd_opt.h
rename to lib/compress/zstd_opt.h
diff --git a/lib/decompress/fse_decompress.c b/lib/decompress/fse_decompress.c
new file mode 100644
index 0000000000000000000000000000000000000000..e86a37204a519840d41c7389edb77d116e33164b
--- /dev/null
+++ b/lib/decompress/fse_decompress.c
@@ -0,0 +1,438 @@
+/* ******************************************************************
+   FSE : Finite State Entropy decoder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "bitstream.h"
+#include "fse_static.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+FSE_DTable* FSE_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_freeDTable (FSE_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    FSE_DTableHeader DTableH;
+    void* const tdPtr = dt+1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_TABLESTEP(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+
+    U32 highThreshold = tableSize-1;
+    S16 const largeLimit= (S16)(1 << (tableLog-1));
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    DTableH.tableLog = (U16)tableLog;
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (normalizedCounter[s]==-1) {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        } else {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+    }   }
+
+    /* Spread symbols */
+    {   U32 position = 0;
+        for (s=0; s<=maxSymbolValue; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    DTableH.fastMode = (U16)noLarge;
+    memcpy(dt, &DTableH, sizeof(DTableH));
+    return 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+/*-****************************************
+*  FSE helper functions
+******************************************/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a) { return a<0 ? -a : a; }
+
+size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0+=24;
+                if (ip < iend-5) {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount+=16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {   short const max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            } else {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<=maxSymbolValue; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4) {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+
+        *op++ = FSE_GETSYMBOL(&state2);
+
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    size_t errorCode;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/huff0.c b/lib/decompress/huf_decompress.c
similarity index 65%
rename from lib/huff0.c
rename to lib/decompress/huf_decompress.c
index 505adcecab8e94d2658a1275f42b3ce3d2730598..bcd6f022d0099d170cf246db366fb69395a9f519 100644
--- a/lib/huff0.c
+++ b/lib/decompress/huf_decompress.c
@@ -1,5 +1,5 @@
 /* ******************************************************************
-   Huff0 : Huffman coder, part of New Generation Entropy library
+   Huffman decoder, part of New Generation Entropy library
    Copyright (C) 2013-2016, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@@ -28,7 +28,7 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     You can contact the author at :
-    - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
     - Public forum : https://groups.google.com/forum/#!forum/lz4c
 ****************************************************************** */
 
@@ -62,22 +62,11 @@
 #include <stdlib.h>     /* malloc, free, qsort */
 #include <string.h>     /* memcpy, memset */
 #include <stdio.h>      /* printf (debug) */
-#include "huff0_static.h"
+#include "huf_static.h"
 #include "bitstream.h"
 #include "fse.h"        /* header compression */
 
 
-/* **************************************************************
-*  Constants
-****************************************************************/
-#define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
-#define HUF_MAX_TABLELOG  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
-#define HUF_DEFAULT_TABLELOG  HUF_MAX_TABLELOG   /* tableLog by default, when not specified */
-#define HUF_MAX_SYMBOL_VALUE 255
-#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
-#  error "HUF_MAX_TABLELOG is too large !"
-#endif
-
 
 /* **************************************************************
 *  Error Management
@@ -87,499 +76,9 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
 
-/* *******************************************************
-*  Huff0 : Huffman block compression
-*********************************************************/
-struct HUF_CElt_s {
-  U16  val;
-  BYTE nbBits;
-};   /* typedef'd to HUF_CElt within huff0_static.h */
-
-typedef struct nodeElt_s {
-    U32 count;
-    U16 parent;
-    BYTE byte;
-    BYTE nbBits;
-} nodeElt;
-
-/*! HUF_writeCTable() :
-    `CTable` : huffman tree to save, using huff0 representation.
-    @return : size of saved CTable */
-size_t HUF_writeCTable (void* dst, size_t maxDstSize,
-                        const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
-{
-    BYTE bitsToWeight[HUF_MAX_TABLELOG + 1];
-    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
-    U32 n;
-    BYTE* op = (BYTE*)dst;
-    size_t size;
-
-     /* check conditions */
-    if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1)
-        return ERROR(GENERIC);
-
-    /* convert to weight */
-    bitsToWeight[0] = 0;
-    for (n=1; n<=huffLog; n++)
-        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
-    for (n=0; n<maxSymbolValue; n++)
-        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
-
-    size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);   /* don't need last symbol stat : implied */
-    if (HUF_isError(size)) return size;
-    if (size >= 128) return ERROR(GENERIC);   /* should never happen, since maxSymbolValue <= 255 */
-    if ((size <= 1) || (size >= maxSymbolValue/2)) {
-        if (size==1) {  /* RLE */
-            /* only possible case : serie of 1 (because there are at least 2) */
-            /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
-            BYTE code;
-            switch(maxSymbolValue)
-            {
-            case 1: code = 0; break;
-            case 2: code = 1; break;
-            case 3: code = 2; break;
-            case 4: code = 3; break;
-            case 7: code = 4; break;
-            case 8: code = 5; break;
-            case 15: code = 6; break;
-            case 16: code = 7; break;
-            case 31: code = 8; break;
-            case 32: code = 9; break;
-            case 63: code = 10; break;
-            case 64: code = 11; break;
-            case 127: code = 12; break;
-            case 128: code = 13; break;
-            default : return ERROR(corruption_detected);
-            }
-            op[0] = (BYTE)(255-13 + code);
-            return 1;
-        }
-         /* Not compressible */
-        if (maxSymbolValue > (241-128)) return ERROR(GENERIC);   /* not implemented (not possible with current format) */
-        if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
-        op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
-        huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
-        for (n=0; n<maxSymbolValue; n+=2)
-            op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
-        return ((maxSymbolValue+1)/2) + 1;
-    }
-
-    /* normal header case */
-    op[0] = (BYTE)size;
-    return size+1;
-}
-
-
-static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                            U32* nbSymbolsPtr, U32* tableLogPtr,
-                            const void* src, size_t srcSize);
-
-
-size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
-{
-    BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
-    U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];   /* large enough for values from 0 to 16 */
-    U32 tableLog = 0;
-    size_t readSize;
-    U32 nbSymbols = 0;
-    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
-
-    /* get symbol weights */
-    readSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
-    if (HUF_isError(readSize)) return readSize;
-
-    /* check result */
-    if (tableLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
-    if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall);
-
-    /* Prepare base value per rank */
-    {   U32 n, nextRankStart = 0;
-        for (n=1; n<=tableLog; n++) {
-            U32 current = nextRankStart;
-            nextRankStart += (rankVal[n] << (n-1));
-            rankVal[n] = current;
-    }   }
-
-    /* fill nbBits */
-    { U32 n; for (n=0; n<nbSymbols; n++) {
-        const U32 w = huffWeight[n];
-        CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
-    }}
-
-    /* fill val */
-    {   U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
-        U16 valPerRank[HUF_MAX_TABLELOG+1] = {0};
-        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
-        /* determine stating value per rank */
-        {   U16 min = 0;
-            U32 n; for (n=HUF_MAX_TABLELOG; n>0; n--) {
-                valPerRank[n] = min;      /* get starting value within each rank */
-                min += nbPerRank[n];
-                min >>= 1;
-        }   }
-        /* assign value within rank, symbol order */
-        { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
-    }
-
-    return readSize;
-}
-
-
-static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
-{
-    const U32 largestBits = huffNode[lastNonNull].nbBits;
-    if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
-
-    /* there are several too large elements (at least >= 2) */
-    {   int totalCost = 0;
-        const U32 baseCost = 1 << (largestBits - maxNbBits);
-        U32 n = lastNonNull;
-
-        while (huffNode[n].nbBits > maxNbBits) {
-            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
-            huffNode[n].nbBits = (BYTE)maxNbBits;
-            n --;
-        }  /* n stops at huffNode[n].nbBits <= maxNbBits */
-        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
-
-        /* renorm totalCost */
-        totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
-
-        /* repay normalized cost */
-        {   U32 const noSymbol = 0xF0F0F0F0;
-            U32 rankLast[HUF_MAX_TABLELOG+1];
-            int pos;
-
-            /* Get pos of last (smallest) symbol per rank */
-            memset(rankLast, 0xF0, sizeof(rankLast));
-            {   U32 currentNbBits = maxNbBits;
-                for (pos=n ; pos >= 0; pos--) {
-                    if (huffNode[pos].nbBits >= currentNbBits) continue;
-                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
-                    rankLast[maxNbBits-currentNbBits] = pos;
-            }   }
-
-            while (totalCost > 0) {
-                U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
-                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
-                    U32 highPos = rankLast[nBitsToDecrease];
-                    U32 lowPos = rankLast[nBitsToDecrease-1];
-                    if (highPos == noSymbol) continue;
-                    if (lowPos == noSymbol) break;
-                    {   U32 const highTotal = huffNode[highPos].count;
-                        U32 const lowTotal = 2 * huffNode[lowPos].count;
-                        if (highTotal <= lowTotal) break;
-                }   }
-                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
-                while ((nBitsToDecrease<=HUF_MAX_TABLELOG) && (rankLast[nBitsToDecrease] == noSymbol))  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
-                    nBitsToDecrease ++;
-                totalCost -= 1 << (nBitsToDecrease-1);
-                if (rankLast[nBitsToDecrease-1] == noSymbol)
-                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
-                huffNode[rankLast[nBitsToDecrease]].nbBits ++;
-                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
-                    rankLast[nBitsToDecrease] = noSymbol;
-                else {
-                    rankLast[nBitsToDecrease]--;
-                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
-                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
-            }   }   /* while (totalCost > 0) */
-
-            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
-                if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
-                    while (huffNode[n].nbBits == maxNbBits) n--;
-                    huffNode[n+1].nbBits--;
-                    rankLast[1] = n+1;
-                    totalCost++;
-                    continue;
-                }
-                huffNode[ rankLast[1] + 1 ].nbBits--;
-                rankLast[1]++;
-                totalCost ++;
-    }   }   }   /* there are several too large elements (at least >= 2) */
-
-    return maxNbBits;
-}
-
-
-typedef struct {
-    U32 base;
-    U32 current;
-} rankPos;
-
-static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
-{
-    rankPos rank[32];
-    U32 n;
-
-    memset(rank, 0, sizeof(rank));
-    for (n=0; n<=maxSymbolValue; n++) {
-        U32 r = BIT_highbit32(count[n] + 1);
-        rank[r].base ++;
-    }
-    for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
-    for (n=0; n<32; n++) rank[n].current = rank[n].base;
-    for (n=0; n<=maxSymbolValue; n++) {
-        U32 const c = count[n];
-        U32 const r = BIT_highbit32(c+1) + 1;
-        U32 pos = rank[r].current++;
-        while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
-        huffNode[pos].count = c;
-        huffNode[pos].byte  = (BYTE)n;
-    }
-}
-
-
-#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1)
-size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
-{
-    nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1];
-    nodeElt* huffNode = huffNode0 + 1;
-    U32 n, nonNullRank;
-    int lowS, lowN;
-    U16 nodeNb = STARTNODE;
-    U32 nodeRoot;
-
-    /* safety checks */
-    if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG;
-    if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return ERROR(GENERIC);
-    memset(huffNode0, 0, sizeof(huffNode0));
-
-    /* sort, decreasing order */
-    HUF_sort(huffNode, count, maxSymbolValue);
-
-    /* init for parents */
-    nonNullRank = maxSymbolValue;
-    while(huffNode[nonNullRank].count == 0) nonNullRank--;
-    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
-    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
-    huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
-    nodeNb++; lowS-=2;
-    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
-    huffNode0[0].count = (U32)(1U<<31);
-
-    /* create parents */
-    while (nodeNb <= nodeRoot) {
-        U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-        U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
-        huffNode[n1].parent = huffNode[n2].parent = nodeNb;
-        nodeNb++;
-    }
-
-    /* distribute weights (unlimited tree height) */
-    huffNode[nodeRoot].nbBits = 0;
-    for (n=nodeRoot-1; n>=STARTNODE; n--)
-        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
-    for (n=0; n<=nonNullRank; n++)
-        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
-
-    /* enforce maxTableLog */
-    maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
-
-    /* fill result into tree (val, nbBits) */
-    {   U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0};
-        U16 valPerRank[HUF_MAX_TABLELOG+1] = {0};
-        if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC);   /* check fit into table */
-        for (n=0; n<=nonNullRank; n++)
-            nbPerRank[huffNode[n].nbBits]++;
-        /* determine stating value per rank */
-        {   U16 min = 0;
-            for (n=maxNbBits; n>0; n--) {
-                valPerRank[n] = min;      /* get starting value within each rank */
-                min += nbPerRank[n];
-                min >>= 1;
-        }   }
-        for (n=0; n<=maxSymbolValue; n++)
-            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
-        for (n=0; n<=maxSymbolValue; n++)
-            tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
-    }
-
-    return maxNbBits;
-}
-
-static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
-{
-    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
-}
-
-size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
-
-#define HUF_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
-
-#define HUF_FLUSHBITS_1(stream) \
-    if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) HUF_FLUSHBITS(stream)
-
-#define HUF_FLUSHBITS_2(stream) \
-    if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) HUF_FLUSHBITS(stream)
-
-size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
-{
-    const BYTE* ip = (const BYTE*) src;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart;
-    size_t n;
-    const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
-    BIT_CStream_t bitC;
-
-    /* init */
-    if (dstSize < 8) return 0;   /* not enough space to compress */
-    { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op);
-      if (HUF_isError(errorCode)) return 0; }
-
-    n = srcSize & ~3;  /* join to mod 4 */
-    switch (srcSize & 3)
-    {
-        case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
-                 HUF_FLUSHBITS_2(&bitC);
-        case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
-                 HUF_FLUSHBITS_1(&bitC);
-        case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
-                 HUF_FLUSHBITS(&bitC);
-        case 0 :
-        default: ;
-    }
-
-    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
-        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
-        HUF_FLUSHBITS_1(&bitC);
-        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
-        HUF_FLUSHBITS_2(&bitC);
-        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
-        HUF_FLUSHBITS_1(&bitC);
-        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
-        HUF_FLUSHBITS(&bitC);
-    }
-
-    return BIT_closeCStream(&bitC);
-}
-
-
-size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
-{
-    size_t segmentSize = (srcSize+3)/4;   /* first 3 segments */
-    const BYTE* ip = (const BYTE*) src;
-    const BYTE* const iend = ip + srcSize;
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart;
-    size_t errorCode;
-
-    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
-    if (srcSize < 12) return 0;   /* no saving possible : too small input */
-    op += 6;   /* jumpTable */
-
-    errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-    if (HUF_isError(errorCode)) return errorCode;
-    if (errorCode==0) return 0;
-    MEM_writeLE16(ostart, (U16)errorCode);
-
-    ip += segmentSize;
-    op += errorCode;
-    errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-    if (HUF_isError(errorCode)) return errorCode;
-    if (errorCode==0) return 0;
-    MEM_writeLE16(ostart+2, (U16)errorCode);
-
-    ip += segmentSize;
-    op += errorCode;
-    errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
-    if (HUF_isError(errorCode)) return errorCode;
-    if (errorCode==0) return 0;
-    MEM_writeLE16(ostart+4, (U16)errorCode);
-
-    ip += segmentSize;
-    op += errorCode;
-    errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable);
-    if (HUF_isError(errorCode)) return errorCode;
-    if (errorCode==0) return 0;
-
-    op += errorCode;
-    return op-ostart;
-}
-
-
-static size_t HUF_compress_internal (
-                void* dst, size_t dstSize,
-                const void* src, size_t srcSize,
-                unsigned maxSymbolValue, unsigned huffLog,
-                unsigned singleStream)
-{
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart;
-
-    U32 count[HUF_MAX_SYMBOL_VALUE+1];
-    HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
-    size_t errorCode;
-
-    /* checks & inits */
-    if (srcSize < 1) return 0;  /* Uncompressed - note : 1 means rle, so first byte must be correct */
-    if (dstSize < 1) return 0;  /* not compressible within dst budget */
-    if (srcSize > 128 * 1024) return ERROR(srcSize_wrong);   /* current block size limit */
-    if (huffLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
-    if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE;
-    if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG;
-
-    /* Scan input and build symbol stats */
-    errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
-    if (HUF_isError(errorCode)) return errorCode;
-    if (errorCode == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }
-    if (errorCode <= (srcSize >> 7)+1) return 0;   /* Heuristic : not compressible enough */
-
-    /* Build Huffman Tree */
-    errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
-    if (HUF_isError(errorCode)) return errorCode;
-    huffLog = (U32)errorCode;
-
-    /* Write table description header */
-    errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
-    if (HUF_isError(errorCode)) return errorCode;
-    if (errorCode + 12 >= srcSize) return 0;   /* not useful to try compression */
-    op += errorCode;
-
-    /* Compress */
-    if (singleStream)
-        errorCode = HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable);   /* single segment */
-    else
-        errorCode = HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
-    if (HUF_isError(errorCode)) return errorCode;
-    if (errorCode==0) return 0;
-    op += errorCode;
-
-    /* check compressibility */
-    if ((size_t)(op-ostart) >= srcSize-1)
-        return 0;
-
-    return op-ostart;
-}
-
-
-size_t HUF_compress1X (void* dst, size_t dstSize,
-                 const void* src, size_t srcSize,
-                 unsigned maxSymbolValue, unsigned huffLog)
-{
-    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1);
-}
-
-size_t HUF_compress2 (void* dst, size_t dstSize,
-                const void* src, size_t srcSize,
-                unsigned maxSymbolValue, unsigned huffLog)
-{
-    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0);
-}
-
-
-size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
-{
-    return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG);
-}
-
 
 /* *******************************************************
-*  Huff0 : Huffman block decompression
+*  HUF : Huffman block decompression
 *********************************************************/
 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
 
@@ -587,77 +86,6 @@ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* doubl
 
 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
 
-/*! HUF_readStats() :
-    Read compact Huffman tree, saved by HUF_writeCTable().
-    `huffWeight` is destination buffer.
-    @return : size read from `src`
-*/
-static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                            U32* nbSymbolsPtr, U32* tableLogPtr,
-                            const void* src, size_t srcSize)
-{
-    U32 weightTotal;
-    U32 tableLog;
-    const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
-    size_t oSize;
-
-    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
-
-    if (iSize >= 128)  { /* special header */
-        if (iSize >= (242)) {  /* RLE */
-            static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
-            oSize = l[iSize-242];
-            memset(huffWeight, 1, hwSize);
-            iSize = 0;
-        }
-        else {   /* Incompressible */
-            oSize = iSize - 127;
-            iSize = ((oSize+1)/2);
-            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-            if (oSize >= hwSize) return ERROR(corruption_detected);
-            ip += 1;
-            {   U32 n;
-                for (n=0; n<oSize; n+=2) {
-                    huffWeight[n]   = ip[n/2] >> 4;
-                    huffWeight[n+1] = ip[n/2] & 15;
-    }   }   }   }
-    else  {   /* header compressed with FSE (normal case) */
-        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
-        if (FSE_isError(oSize)) return oSize;
-    }
-
-    /* collect weight stats */
-    memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
-    weightTotal = 0;
-    { U32 n; for (n=0; n<oSize; n++) {
-        if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
-        rankStats[huffWeight[n]]++;
-        weightTotal += (1 << huffWeight[n]) >> 1;
-    }}
-
-    /* get last non-null symbol weight (implied, total must be 2^n) */
-    tableLog = BIT_highbit32(weightTotal) + 1;
-    if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
-    /* determine last weight */
-    {   U32 const total = 1 << tableLog;
-        U32 const rest = total - weightTotal;
-        U32 const verif = 1 << BIT_highbit32(rest);
-        U32 const lastWeight = BIT_highbit32(rest) + 1;
-        if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
-        huffWeight[oSize] = (BYTE)lastWeight;
-        rankStats[lastWeight]++;
-    }
-
-    /* check tree construction validity */
-    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
-
-    /* results */
-    *nbSymbolsPtr = (U32)(oSize+1);
-    *tableLogPtr = tableLog;
-    return iSize+1;
-}
 
 
 /*-***************************/
diff --git a/lib/decompress/zbuff_decompress.c b/lib/decompress/zbuff_decompress.c
new file mode 100644
index 0000000000000000000000000000000000000000..614a79240dd24b8ec598000bc2d45230bf6a82d8
--- /dev/null
+++ b/lib/decompress/zbuff_decompress.c
@@ -0,0 +1,258 @@
+/*
+    Buffered version of Zstd compression library
+    Copyright (C) 2015-2016, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net/
+*/
+
+
+/* *************************************
+*  Dependencies
+***************************************/
+#include <stdlib.h>
+#include "error_private.h"
+#include "zstd_internal.h"  /* MIN, ZSTD_blockHeaderSize */
+#include "zstd_static.h"    /* ZSTD_BLOCKSIZE_MAX */
+#include "zbuff_static.h"
+
+
+/* *************************************
+*  Constants
+***************************************/
+static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE;
+
+
+/*-***************************************************************************
+*  Streaming decompression howto
+*
+*  A ZBUFF_DCtx object is required to track streaming operations.
+*  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
+*  Use ZBUFF_decompressInit() to start a new decompression operation,
+*   or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
+*  Note that ZBUFF_DCtx objects can be re-init multiple times.
+*
+*  Use ZBUFF_decompressContinue() repetitively to consume your input.
+*  *srcSizePtr and *dstCapacityPtr can be any size.
+*  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
+*  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
+*  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change @dst.
+*  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
+*            or 0 when a frame is completely decoded,
+*            or an error code, which can be tested using ZBUFF_isError().
+*
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize()
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFF_recommendedDInSize == 128KB + 3;
+*           just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
+
+typedef enum { ZBUFFds_init, ZBUFFds_readHeader,
+               ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
+
+/* *** Resource management *** */
+struct ZBUFF_DCtx_s {
+    ZSTD_DCtx* zd;
+    ZSTD_frameParams fParams;
+    size_t blockSize;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    ZBUFF_dStage stage;
+};   /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */
+
+
+ZBUFF_DCtx* ZBUFF_createDCtx(void)
+{
+    ZBUFF_DCtx* zbd = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx));
+    if (zbd==NULL) return NULL;
+    memset(zbd, 0, sizeof(*zbd));
+    zbd->zd = ZSTD_createDCtx();
+    zbd->stage = ZBUFFds_init;
+    return zbd;
+}
+
+size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd)
+{
+    if (zbd==NULL) return 0;   /* support free on null */
+    ZSTD_freeDCtx(zbd->zd);
+    free(zbd->inBuff);
+    free(zbd->outBuff);
+    free(zbd);
+    return 0;
+}
+
+
+/* *** Initialization *** */
+
+size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize)
+{
+    zbd->stage = ZBUFFds_readHeader;
+    zbd->inPos = zbd->outStart = zbd->outEnd = 0;
+    return ZSTD_decompressBegin_usingDict(zbd->zd, dict, dictSize);
+}
+
+size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd)
+{
+    return ZBUFF_decompressInitDictionary(zbd, NULL, 0);
+}
+
+
+/* *** Decompression *** */
+
+size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
+                                void* dst, size_t* dstCapacityPtr,
+                          const void* src, size_t* srcSizePtr)
+{
+    const char* const istart = (const char*)src;
+    const char* const iend = istart + *srcSizePtr;
+    const char* ip = istart;
+    char* const ostart = (char*)dst;
+    char* const oend = ostart + *dstCapacityPtr;
+    char* op = ostart;
+    U32 notDone = 1;
+
+    while (notDone) {
+        switch(zbd->stage)
+        {
+        case ZBUFFds_init :
+            return ERROR(init_missing);
+
+        case ZBUFFds_readHeader :
+            /* read header from src */
+            {   size_t const headerSize = ZSTD_getFrameParams(&(zbd->fParams), src, *srcSizePtr);
+                if (ZSTD_isError(headerSize)) return headerSize;
+                if (headerSize) {
+                    /* not enough input to decode header : needs headerSize > *srcSizePtr */
+                    *dstCapacityPtr = 0;
+                    *srcSizePtr = 0;
+                    return headerSize;
+            }   }
+
+            /* Frame header instruct buffer sizes */
+            {   size_t const blockSize = MIN(1 << zbd->fParams.windowLog, ZSTD_BLOCKSIZE_MAX);
+                zbd->blockSize = blockSize;
+                if (zbd->inBuffSize < blockSize) {
+                    free(zbd->inBuff);
+                    zbd->inBuffSize = blockSize;
+                    zbd->inBuff = (char*)malloc(blockSize);
+                    if (zbd->inBuff == NULL) return ERROR(memory_allocation);
+                }
+                {   size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize;
+                    if (zbd->outBuffSize < neededOutSize) {
+                        free(zbd->outBuff);
+                        zbd->outBuffSize = neededOutSize;
+                        zbd->outBuff = (char*)malloc(neededOutSize);
+                        if (zbd->outBuff == NULL) return ERROR(memory_allocation);
+            }   }   }
+            zbd->stage = ZBUFFds_read;
+
+        case ZBUFFds_read:
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
+                if (neededInSize==0) {  /* end of frame */
+                    zbd->stage = ZBUFFds_init;
+                    notDone = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {
+                    /* directly decode from src */
+                    size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
+                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
+                        ip, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    ip += neededInSize;
+                    if (!decodedSize) break;   /* this was just a header */
+                    zbd->outEnd = zbd->outStart +  decodedSize;
+                    zbd->stage = ZBUFFds_flush;
+                    break;
+                }
+                if (ip==iend) { notDone = 0; break; }   /* no more input */
+                zbd->stage = ZBUFFds_load;
+            }
+
+        case ZBUFFds_load:
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
+                size_t const toLoad = neededInSize - zbd->inPos;   /* should always be <= remaining space within inBuff */
+                size_t loadedSize;
+                if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected);   /* should never happen */
+                loadedSize = ZBUFF_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
+                ip += loadedSize;
+                zbd->inPos += loadedSize;
+                if (loadedSize < toLoad) { notDone = 0; break; }   /* not enough input, wait for more */
+                /* decode loaded input */
+                {   size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
+                        zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
+                        zbd->inBuff, neededInSize);
+                    if (ZSTD_isError(decodedSize)) return decodedSize;
+                    zbd->inPos = 0;   /* input is consumed */
+                    if (!decodedSize) { zbd->stage = ZBUFFds_read; break; }   /* this was just a header */
+                    zbd->outEnd = zbd->outStart +  decodedSize;
+                    zbd->stage = ZBUFFds_flush;
+                    // break; /* ZBUFFds_flush follows */
+            }   }
+
+        case ZBUFFds_flush:
+            {   size_t const toFlushSize = zbd->outEnd - zbd->outStart;
+                size_t const flushedSize = ZBUFF_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
+                op += flushedSize;
+                zbd->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {
+                    zbd->stage = ZBUFFds_read;
+                    if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
+                        zbd->outStart = zbd->outEnd = 0;
+                    break;
+                }
+                /* cannot flush everything */
+                notDone = 0;
+                break;
+            }
+        default: return ERROR(GENERIC);   /* impossible */
+    }   }
+
+    /* result */
+    *srcSizePtr = ip-istart;
+    *dstCapacityPtr = op-ostart;
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbd->zd);
+        if (nextSrcSizeHint > ZSTD_blockHeaderSize) nextSrcSizeHint+= ZSTD_blockHeaderSize;   /* get following block header too */
+        nextSrcSizeHint -= zbd->inPos;   /* already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+
+
+/* *************************************
+*  Tool functions
+***************************************/
+unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
+const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+size_t ZBUFF_recommendedDInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize /* block header size*/ ; }
+size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
diff --git a/lib/zstd_decompress.c b/lib/decompress/zstd_decompress.c
similarity index 99%
rename from lib/zstd_decompress.c
rename to lib/decompress/zstd_decompress.c
index 5d3f44224dcd629a716e34e05b393e7cd73c9949..6515c68bfbeab12e473bcaafb870134c9d27a6ac 100644
--- a/lib/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -59,7 +59,7 @@
 #include "mem.h"         /* low level memory routines */
 #include "zstd_internal.h"
 #include "fse_static.h"
-#include "huff0_static.h"
+#include "huf_static.h"
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
 #  include "zstd_legacy.h"
diff --git a/lib/divsufsort.c b/lib/dictBuilder/divsufsort.c
similarity index 100%
rename from lib/divsufsort.c
rename to lib/dictBuilder/divsufsort.c
diff --git a/lib/divsufsort.h b/lib/dictBuilder/divsufsort.h
similarity index 100%
rename from lib/divsufsort.h
rename to lib/dictBuilder/divsufsort.h
diff --git a/lib/zdict.c b/lib/dictBuilder/zdict.c
similarity index 99%
rename from lib/zdict.c
rename to lib/dictBuilder/zdict.c
index 122ac8cb26e13d249d81f2bc0e178115d8066a2a..2ebf2b49ca62046b6b03d260effc9e30bb4e3f7e 100644
--- a/lib/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -61,7 +61,7 @@
 #include "mem.h"           /* read */
 #include "error_private.h"
 #include "fse.h"
-#include "huff0_static.h"
+#include "huf_static.h"
 #include "zstd_internal.h"
 #include "divsufsort.h"
 #include "zdict_static.h"
diff --git a/lib/zdict.h b/lib/dictBuilder/zdict.h
similarity index 100%
rename from lib/zdict.h
rename to lib/dictBuilder/zdict.h
diff --git a/lib/zdict_static.h b/lib/dictBuilder/zdict_static.h
similarity index 100%
rename from lib/zdict_static.h
rename to lib/dictBuilder/zdict_static.h
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 57d724c27c31bb246590b2de1a0ec16c1b31eba6..58dbe71a51f25cb2cdcadaf2b7e645c279bec053 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -2004,11 +2004,6 @@ extern "C" {
 #endif
 
 
-/* ****************************************
-*  Dependency
-******************************************/
-#include "huff0.h"
-
 
 /* ****************************************
 *  Static allocation macros
diff --git a/programs/Makefile b/programs/Makefile
index 36ab3d864737820ea2fc1f5d7d5969c32b51806f..e6c4e8047eac1310b61dcfee761fccbab6b18590 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -32,9 +32,9 @@
 # ##########################################################################
 
 # Version numbers
-LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/zstd.h`
-LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/zstd.h`
-LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/zstd.h`
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/common/zstd.h`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/common/zstd.h`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/common/zstd.h`
 LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
 LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
 LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
@@ -44,7 +44,7 @@ VERSION?= $(LIBVER)
 
 DESTDIR?=
 PREFIX ?= /usr/local
-CPPFLAGS= -I../lib -DZSTD_VERSION=\"$(VERSION)\"
+CPPFLAGS= -I../lib/common -DZSTD_VERSION=\"$(VERSION)\"
 CFLAGS ?= -O3  # -falign-loops=32   # not always beneficial
 CFLAGS += -std=c99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef
 FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
@@ -53,7 +53,11 @@ BINDIR  = $(PREFIX)/bin
 MANDIR  = $(PREFIX)/share/man/man1
 ZSTDDIR = ../lib
 
-ZSTD_FILES := $(ZSTDDIR)/huff0.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/zstd_compress.c
+ZSTDCOMP_FILES := $(ZSTDDIR)/compress/zstd_compress.c $(ZSTDDIR)/compress/fse_compress.c $(ZSTDDIR)/compress/huf_compress.c
+ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/zstd_decompress.c $(ZSTDDIR)/decompress/fse_decompress.c $(ZSTDDIR)/decompress/huf_decompress.c
+ZDICT_FILES := $(ZSTDDIR)/dictBuilder/zdict.c $(ZSTDDIR)/dictBuilder/divsufsort.c
+ZBUFF_FILES := $(ZSTDDIR)/compress/zbuff_compress.c $(ZSTDDIR)/decompress/zbuff_decompress.c
+ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMP_FILES)
 
 ifeq ($(ZSTD_LEGACY_SUPPORT), 0)
 CPPFLAGS  += -DZSTD_LEGACY_SUPPORT=0
@@ -85,11 +89,11 @@ default: zstd
 
 all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 zbufftest zbufftest32 paramgrill datagen
 
-zstd  : $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \
+zstd  : $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZBUFF_FILES) $(ZDICT_FILES) \
         zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c
 	$(CC)      $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)
 
-zstd32: $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \
+zstd32: $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZBUFF_FILES) $(ZDICT_FILES) \
         zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c 
 	$(CC) -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)
 
@@ -107,18 +111,16 @@ zstd-pgo : clean zstd
 	rm zstd
 	$(MAKE) zstd MOREFLAGS=-fprofile-use
 
-zstd-frugal: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c zstdcli.c fileio.c
+zstd-frugal: $(ZSTD_FILES) $(ZBUFF_FILES) zstdcli.c fileio.c
 	$(CC)      $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_LEGACY_SUPPORT=0 $^ -o zstd$(EXT)
 
 zstd-small: clean 
 	CFLAGS="-Os -s" $(MAKE) zstd-frugal 
 
-fullbench  : $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
-        datagen.c fullbench.c
+fullbench  : $(ZSTD_FILES) $(ZBUFF_FILES) datagen.c fullbench.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)
 
-fullbench32: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
-      datagen.c fullbench.c
+fullbench32: $(ZSTD_FILES) $(ZBUFF_FILES) datagen.c fullbench.c
 	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
 fuzzer  : $(ZSTD_FILES) \
@@ -129,11 +131,11 @@ fuzzer32: $(ZSTD_FILES) \
       datagen.c xxhash.c fuzzer.c
 	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
-zbufftest  : $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
+zbufftest  : $(ZSTD_FILES) $(ZBUFF_FILES) \
       datagen.c xxhash.c zbufftest.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)
 
-zbufftest32: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
+zbufftest32: $(ZSTD_FILES) $(ZBUFF_FILES) \
       datagen.c xxhash.c zbufftest.c
 	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
diff --git a/programs/dibio.c b/programs/dibio.c
index 17f89586b9be0bff581f25f14f5f70e16dd6fe7c..9641ac299a00ec71ec612d86c8b2ea3b7747aaf0 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -52,7 +52,7 @@
 
 #include "mem.h"            /* read */
 #include "error_private.h"
-#include "zdict_static.h"
+#include "dibio.h"
 
 
 /*-*************************************
diff --git a/programs/dibio.h b/programs/dibio.h
index 0ccec4135daac52e02da2a64fdc598a6be23ab87..fd1f21df0c129882a6d8eaae99caef6eb34137e9 100644
--- a/programs/dibio.h
+++ b/programs/dibio.h
@@ -32,7 +32,7 @@
 /*-*************************************
 *  Dependencies
 ***************************************/
-#include "zdict_static.h"   /* ZDICT_params_t */
+#include "../lib/dictBuilder/zdict_static.h"   /* ZDICT_params_t */
 
 
 /*-*************************************
diff --git a/programs/fileio.c b/programs/fileio.c
index ec0d19f218a84a635b71d60e19f426fd7b79cdf9..edd917e815a4a49c1d487343d15b4d362aabc4b5 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -95,10 +95,6 @@
 /*-*************************************
 *  Constants
 ***************************************/
-#define KB *(1U<<10)
-#define MB *(1U<<20)
-#define GB *(1U<<30)
-
 #define _1BIT  0x01
 #define _2BITS 0x03
 #define _3BITS 0x07
@@ -106,9 +102,6 @@
 #define _6BITS 0x3F
 #define _8BITS 0xFF
 
-#define BIT6  0x40
-#define BIT7  0x80
-
 #define BLOCKSIZE      (128 KB)
 #define ROLLBUFFERSIZE (BLOCKSIZE*8*64)
 
@@ -135,7 +128,6 @@ void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
 static const unsigned refreshRate = 150;
 static clock_t g_time = 0;
 
-#define MAX(a,b)   ((a)>(b)?(a):(b))
 
 
 /*-*************************************