Skip to content
Snippets Groups Projects
Commit 4aaa36f7 authored by Nick Terrell's avatar Nick Terrell
Browse files

[regression] Add initial regression test framework

The regression tests run nightly or on the `regression`
branch for convenience. The results get uploaded as the
artifacts of the job. If they change, check the diff
printed in the job. If all is well, download the new
results and commit them to the repo.

This code will only run on a UNIX like platform. It
could be made to run on Windows, but I don't think that
it is necessary. It also uses C99.

* data: This module defines the data to run tests on.
  It downloads data from a URL into a cache directory,
  checks it against a checksum, and unpacks it. It also
  provides helpers for accessing the data.
* config: This module defines the configs to run tests
  with. A config is a set of API parameters and a set of
  CLI flags.
* result: This module is a helper for method that defines
  the result type.
* method: This module defines the compression methods
  to test. It is what runs the regression test using the
  data and the config. It reports the total compressed
  size, or an error/skip.
* test: This is the test binary that runs the tests for
  every (data, config, method) tuple, and prints the
  results to the output file and stderr.
* results.csv: The results that the current commit is
  expected to produce.
parent 787532b9
No related branches found
No related tags found
No related merge requests found
...@@ -15,7 +15,8 @@ references: ...@@ -15,7 +15,8 @@ references:
sudo apt-get -y install \ sudo apt-get -y install \
gcc-multilib-powerpc-linux-gnu gcc-arm-linux-gnueabi \ gcc-multilib-powerpc-linux-gnu gcc-arm-linux-gnueabi \
libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross \
libc6-dev-ppc64-powerpc-cross zstd gzip coreutils libc6-dev-ppc64-powerpc-cross zstd gzip coreutils \
libcurl4-openssl-dev
jobs: jobs:
# the first half of the jobs are in this test # the first half of the jobs are in this test
...@@ -82,6 +83,49 @@ jobs: ...@@ -82,6 +83,49 @@ jobs:
cp $ZSTD_VERSION.tar* $CIRCLE_ARTIFACTS cp $ZSTD_VERSION.tar* $CIRCLE_ARTIFACTS
- store_artifacts: - store_artifacts:
path: /tmp/circleci-artifacts path: /tmp/circleci-artifacts
# This step should only be run in a cron job
regression-test:
docker:
- image: circleci/buildpack-deps:bionic
environment:
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
steps:
- checkout
- *install-dependencies
# Restore the cached resources.
- restore_cache:
# We try our best to bust the cache when the data changes by hashing
# data.c. If that doesn't work, simply update the version number here
# and below. If we fail to bust the cache, the regression testing will
# still work, since it has its own stamp, but will need to redownload
# everything.
keys:
- regression-cache-{{ checksum "tests/regression/data.c" }}-v0
- run:
name: Regression Test
command: |
make -C programs zstd
make -C tests/regression test
mkdir -p $CIRCLE_ARTIFACTS
./tests/regression/test \
--cache tests/regression/cache \
--output $CIRCLE_ARTIFACTS/results.csv \
--zstd programs/zstd
echo "NOTE: The new results.csv is uploaded as an artifact to this job"
echo " If this fails, go to the Artifacts pane in CircleCI, "
echo " download /tmp/circleci-artifacts/results.csv, and if they "
echo " are still good, copy it into the repo and commit it."
echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv"
diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv
# Only save the cache on success (default), since if the failure happened
# before we stamp the data cache, we will have a bad cache for this key.
- save_cache:
key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
paths:
- tests/regression/cache
- store_artifacts:
path: /tmp/circleci-artifacts
workflows: workflows:
version: 2 version: 2
...@@ -96,6 +140,13 @@ workflows: ...@@ -96,6 +140,13 @@ workflows:
filters: filters:
tags: tags:
only: /.*/ only: /.*/
# Create a branch called regression and set it to dev to force a
# regression test run
- regression-test:
filters:
branches:
only:
- regression
# Only run on release tags. # Only run on release tags.
- publish-github-release: - publish-github-release:
requires: requires:
...@@ -106,6 +157,20 @@ workflows: ...@@ -106,6 +157,20 @@ workflows:
ignore: /.*/ ignore: /.*/
tags: tags:
only: /^v\d+\.\d+\.\d+$/ only: /^v\d+\.\d+\.\d+$/
nightly:
triggers:
- schedule:
cron: "0 0 * * *"
filters:
branches:
only:
- master
- dev
jobs:
# Run daily long regression tests
- regression-test
# Longer tests # Longer tests
#- make -C tests test-zstd-nolegacy && make clean #- make -C tests test-zstd-nolegacy && make clean
......
# ################################################################
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################
CFLAGS ?= -O3
CURL_CFLAGS := $(shell curl-config --cflags)
CURL_LDFLAGS := $(shell curl-config --libs)
PROGDIR := ../../programs
LIBDIR := ../../lib
ZSTD_CPPFLAGS := -I$(PROGDIR) -I$(LIBDIR) -I$(LIBDIR)/common
REGRESSION_CFLAGS = $(CFLAGS) $(CURL_CFLAGS)
REGRESSION_CPPFLAGS = $(CPPFLAGS) $(ZSTD_CPPFLAGS)
REGRESSION_LDFLAGS = $(LDFLAGS) $(CURL_LDFLAGS)
all: test
xxhash.o: $(LIBDIR)/common/xxhash.c $(LIBDIR)/common/xxhash.h
$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
util.o: $(PROGDIR)/util.c $(PROGDIR)/util.h
$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
data.o: data.c data.h $(PROGDIR)/util.h $(LIBDIR)/common/xxhash.h
$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
config.o: config.c config.h levels.h
$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
method.h: data.h config.h result.h
method.o: method.c method.h
$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
result.o: result.c result.h
$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
test.o: test.c data.h config.h method.h
$(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@
libzstd.a:
$(MAKE) -C $(LIBDIR) libzstd.a
cp $(LIBDIR)/libzstd.a .
test: test.o data.o config.o util.o method.o result.o xxhash.o libzstd.a
$(CC) $^ $(REGRESSION_LDFLAGS) -o $@
.PHONY: clean
clean:
$(MAKE) -C $(LIBDIR) clean
$(RM) *.o *.a test
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "config.h"
/* Define a config for each fast level we want to test with. */
#define FAST_LEVEL(x) \
param_value_t const level_fast##x##_param_values[] = { \
{.param = ZSTD_p_compressionLevel, .value = (unsigned)-x}, \
}; \
config_t const level_fast##x = { \
.name = "level -" #x, \
.cli_args = "--fast=" #x, \
.param_values = PARAM_VALUES(level_fast##x##_param_values), \
};
/* Define a config for each level we want to test with. */
#define LEVEL(x) \
param_value_t const level_##x##_param_values[] = { \
{.param = ZSTD_p_compressionLevel, .value = (unsigned)x}, \
}; \
config_t const level_##x = { \
.name = "level " #x, \
.cli_args = "-" #x, \
.param_values = PARAM_VALUES(level_##x##_param_values), \
};
#define PARAM_VALUES(pv) \
{ .data = pv, .size = sizeof(pv) / sizeof((pv)[0]) }
#include "levels.h"
#undef LEVEL
#undef FAST_LEVEL
static config_t const* g_configs[] = {
#define FAST_LEVEL(x) &level_fast##x,
#define LEVEL(x) &level_##x,
#include "levels.h"
#undef LEVEL
#undef FAST_LEVEL
NULL,
};
config_t const* const* configs = g_configs;
int config_get_level(config_t const* config) {
param_values_t const params = config->param_values;
size_t i;
for (size_t i = 0; i < params.size; ++i) {
if (params.data[i].param == ZSTD_p_compressionLevel)
return params.data[i].value;
}
return CONFIG_NO_LEVEL;
}
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef CONFIG_H
#define CONFIG_H
#include <stddef.h>
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
typedef struct {
ZSTD_cParameter param;
unsigned value;
} param_value_t;
typedef struct {
size_t size;
param_value_t const* data;
} param_values_t;
/**
* The config tells the compression method what options to use.
*/
typedef struct {
const char* name; /**< Identifies the config in the results table */
/**
* Optional arguments to pass to the CLI. If not set, CLI-based methods
* will skip this config.
*/
char const* cli_args;
/**
* Parameters to pass to the advanced API. If the advanced API isn't used,
* the parameters will be derived from these.
*/
param_values_t param_values;
} config_t;
#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1)
/**
* Returns the compression level specified by the config, or CONFIG_NO_LEVEL if
* no level is specified. Note that 0 is a valid compression level, meaning
* default.
*/
int config_get_level(config_t const* config);
/**
* The NULL-terminated list of configs.
*/
extern config_t const* const* configs;
#endif
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "data.h"
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <curl/curl.h>
#include "mem.h"
#include "util.h"
#define XXH_STATIC_LINKING_ONLY
#include "xxhash.h"
/**
* Data objects
*/
#define REGRESSION_RELEASE(x) \
"https://github.com/facebook/zstd/releases/download/regression-data/" x
data_t silesia = {
.url = REGRESSION_RELEASE("silesia.tar.zst"),
.name = "silesia",
.type = data_type_dir,
.xxhash64 = 0x67558ee5506918b4LL,
};
data_t silesia_tar = {
.url = REGRESSION_RELEASE("silesia.tar.zst"),
.name = "silesia.tar",
.type = data_type_file,
.xxhash64 = 0x67558ee5506918b4LL,
};
static data_t* g_data[] = {
&silesia,
&silesia_tar,
NULL,
};
data_t const* const* data = (data_t const* const*)g_data;
/**
* data buffer helper functions (documented in header).
*/
data_buffer_t data_buffer_create(size_t const capacity) {
data_buffer_t buffer = {};
buffer.data = (uint8_t*)malloc(capacity);
if (buffer.data == NULL)
return buffer;
buffer.capacity = capacity;
return buffer;
}
data_buffer_t data_buffer_read(char const* filename) {
data_buffer_t buffer = {};
uint64_t const size = UTIL_getFileSize(filename);
if (size == UTIL_FILESIZE_UNKNOWN) {
fprintf(stderr, "unknown size for %s\n", filename);
return buffer;
}
buffer.data = (uint8_t*)malloc(size);
if (buffer.data == NULL) {
fprintf(stderr, "malloc failed\n");
return buffer;
}
buffer.capacity = size;
FILE* file = fopen(filename, "rb");
if (file == NULL) {
fprintf(stderr, "file null\n");
goto err;
}
buffer.size = fread(buffer.data, 1, buffer.capacity, file);
fclose(file);
if (buffer.size != buffer.capacity) {
fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity);
goto err;
}
return buffer;
err:
free(buffer.data);
memset(&buffer, 0, sizeof(buffer));
return buffer;
}
data_buffer_t data_buffer_get(data_t const* data) {
data_buffer_t const kEmptyBuffer = {};
if (data->type != data_type_file)
return kEmptyBuffer;
return data_buffer_read(data->path);
}
int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
size_t const size =
buffer1.size < buffer2.size ? buffer1.size : buffer2.size;
int const cmp = memcmp(buffer1.data, buffer2.data, size);
if (cmp != 0)
return cmp;
if (buffer1.size < buffer2.size)
return -1;
if (buffer1.size == buffer2.size)
return 0;
assert(buffer1.size > buffer2.size);
return 1;
}
void data_buffer_free(data_buffer_t buffer) {
free(buffer.data);
}
/**
* Initialization and download functions.
*/
static char* g_data_dir = NULL;
/* mkdir -p */
static int ensure_directory_exists(char const* indir) {
char* const dir = strdup(indir);
char* end = dir;
int ret = 0;
if (dir == NULL) {
ret = EINVAL;
goto out;
}
do {
/* Find the next directory level. */
for (++end; *end != '\0' && *end != '/'; ++end)
;
/* End the string there, make the directory, and restore the string. */
char const save = *end;
*end = '\0';
int const isdir = UTIL_isDirectory(dir);
ret = mkdir(dir, S_IRWXU);
*end = save;
/* Its okay if the directory already exists. */
if (ret == 0 || (errno == EEXIST && isdir))
continue;
ret = errno;
fprintf(stderr, "mkdir() failed\n");
goto out;
} while (*end != '\0');
ret = 0;
out:
free(dir);
return ret;
}
/** Concatenate 3 strings into a new buffer. */
static char* cat3(char const* str1, char const* str2, char const* str3) {
size_t const size1 = strlen(str1);
size_t const size2 = strlen(str2);
size_t const size3 = strlen(str3);
size_t const size = size1 + size2 + size3 + 1;
char* const dst = (char*)malloc(size);
if (dst == NULL)
return NULL;
strcpy(dst, str1);
strcpy(dst + size1, str2);
strcpy(dst + size1 + size2, str3);
assert(strlen(dst) == size1 + size2 + size3);
return dst;
}
/**
* State needed by the curl callback.
* It takes data from curl, hashes it, and writes it to the file.
*/
typedef struct {
FILE* file;
XXH64_state_t xxhash64;
int error;
} curl_data_t;
/** Create the curl state. */
static curl_data_t curl_data_create(data_t const* data) {
curl_data_t cdata = {};
XXH64_reset(&cdata.xxhash64, 0);
assert(UTIL_isDirectory(g_data_dir));
if (data->type == data_type_file) {
/* Decompress the resource and store to the path. */
char* cmd = cat3("zstd -dqfo '", data->path, "'");
if (cmd == NULL) {
cdata.error = ENOMEM;
return cdata;
}
cdata.file = popen(cmd, "w");
free(cmd);
} else {
/* Decompress and extract the resource to the cache directory. */
char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'");
if (cmd == NULL) {
cdata.error = ENOMEM;
return cdata;
}
cdata.file = popen(cmd, "w");
free(cmd);
}
if (cdata.file == NULL) {
cdata.error = errno;
}
return cdata;
}
/** Free the curl state. */
static int curl_data_free(curl_data_t cdata) {
return pclose(cdata.file);
}
/** curl callback. Updates the hash, and writes to the file. */
static size_t curl_write(void* data, size_t size, size_t count, void* ptr) {
curl_data_t* cdata = (curl_data_t*)ptr;
size_t const written = fwrite(data, size, count, cdata->file);
XXH64_update(&cdata->xxhash64, data, written * size);
return written;
}
/** Download a single data object. */
static int curl_download_datum(CURL* curl, data_t const* data) {
curl_data_t cdata = curl_data_create(data);
int err = EFAULT;
if (cdata.error != 0) {
err = cdata.error;
goto out;
}
/* Download the data. */
if (curl_easy_setopt(curl, CURLOPT_URL, data->url) != 0)
goto out;
if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
goto out;
if (curl_easy_perform(curl) != 0) {
fprintf(stderr, "downloading '%s' failed\n", data->url);
goto out;
}
/* check that the file exists. */
if (data->type == data_type_file && !UTIL_isRegularFile(data->path)) {
fprintf(stderr, "output file '%s' does not exist\n", data->path);
goto out;
}
if (data->type == data_type_dir && !UTIL_isDirectory(data->path)) {
fprintf(stderr, "output directory '%s' does not exist\n", data->path);
goto out;
}
/* Check that the hash matches. */
if (XXH64_digest(&cdata.xxhash64) != data->xxhash64) {
fprintf(
stderr,
"checksum does not match: %llx != %llx\n",
(unsigned long long)XXH64_digest(&cdata.xxhash64),
(unsigned long long)data->xxhash64);
goto out;
}
err = 0;
out:
if (err != 0)
fprintf(stderr, "downloading '%s' failed\n", data->name);
int const close_err = curl_data_free(cdata);
if (close_err != 0 && err == 0) {
fprintf(stderr, "failed to write data for '%s'\n", data->name);
err = close_err;
}
return err;
}
/** Download all the data. */
static int curl_download_data(data_t const* const* data) {
if (curl_global_init(CURL_GLOBAL_ALL) != 0)
return EFAULT;
curl_data_t cdata = {};
CURL* curl = curl_easy_init();
int err = EFAULT;
if (curl == NULL)
return EFAULT;
if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0)
goto out;
if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0)
goto out;
if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0)
goto out;
assert(data != NULL);
for (; *data != NULL; ++data) {
if (curl_download_datum(curl, *data) != 0)
goto out;
}
err = 0;
out:
curl_easy_cleanup(curl);
curl_global_cleanup();
return err;
}
/** Fill the path member variable of the data objects. */
static int data_create_paths(data_t* const* data, char const* dir) {
size_t const dirlen = strlen(dir);
assert(data != NULL);
for (; *data != NULL; ++data) {
data_t* const datum = *data;
datum->path = cat3(dir, "/", datum->name);
if (datum->path == NULL)
return ENOMEM;
}
return 0;
}
/** Free the path member variable of the data objects. */
static void data_free_paths(data_t* const* data) {
assert(data != NULL);
for (; *data != NULL; ++data) {
data_t* datum = *data;
free((void*)datum->path);
datum->path = NULL;
}
}
static char const kStampName[] = "STAMP";
static void xxh_update_le(XXH64_state_t* state, uint64_t data) {
if (!MEM_isLittleEndian())
data = MEM_swap64(data);
XXH64_update(state, &data, sizeof(data));
}
/** Hash the data to create the stamp. */
static uint64_t stamp_hash(data_t const* const* data) {
XXH64_state_t state;
XXH64_reset(&state, 0);
assert(data != NULL);
for (; *data != NULL; ++data) {
data_t const* datum = *data;
/* We don't care about the URL that we fetch from. */
/* The path is derived from the name. */
XXH64_update(&state, datum->name, strlen(datum->name));
xxh_update_le(&state, datum->xxhash64);
xxh_update_le(&state, datum->type);
}
return XXH64_digest(&state);
}
/** Check if the stamp matches the stamp in the cache directory. */
static int stamp_check(char const* dir, data_t const* const* data) {
char* stamp = cat3(dir, "/", kStampName);
uint64_t const expected = stamp_hash(data);
XXH64_canonical_t actual;
FILE* stampfile = NULL;
int matches = 0;
if (stamp == NULL)
goto out;
if (!UTIL_isRegularFile(stamp)) {
fprintf(stderr, "stamp does not exist: recreating the data cache\n");
goto out;
}
stampfile = fopen(stamp, "rb");
if (stampfile == NULL) {
fprintf(stderr, "could not open stamp: recreating the data cache\n");
goto out;
}
size_t b;
if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) {
fprintf(stderr, "invalid stamp: recreating the data cache\n");
goto out;
}
matches = (expected == XXH64_hashFromCanonical(&actual));
if (matches)
fprintf(stderr, "stamp matches: reusing the cached data\n");
else
fprintf(stderr, "stamp does not match: recreating the data cache\n");
out:
free(stamp);
if (stampfile != NULL)
fclose(stampfile);
return matches;
}
/** On success write a new stamp, on failure delete the old stamp. */
static int
stamp_write(char const* dir, data_t const* const* data, int const data_err) {
char* stamp = cat3(dir, "/", kStampName);
FILE* stampfile = NULL;
int err = EIO;
if (stamp == NULL)
return ENOMEM;
if (data_err != 0) {
err = data_err;
goto out;
}
XXH64_canonical_t hash;
XXH64_canonicalFromHash(&hash, stamp_hash(data));
stampfile = fopen(stamp, "wb");
if (stampfile == NULL)
goto out;
if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1)
goto out;
err = 0;
fprintf(stderr, "stamped new data cache\n");
out:
if (err != 0)
/* Ignore errors. */
unlink(stamp);
free(stamp);
if (stampfile != NULL)
fclose(stampfile);
return err;
}
int data_init(char const* dir) {
int err;
if (dir == NULL)
return EINVAL;
/* This must be first to simplify logic. */
err = ensure_directory_exists(dir);
if (err != 0)
return err;
/* Save the cache directory. */
g_data_dir = strdup(dir);
if (g_data_dir == NULL)
return ENOMEM;
err = data_create_paths(g_data, dir);
if (err != 0)
return err;
/* If the stamp matches then we are good to go.
* This must be called before any modifications to the data cache.
* After this point, we MUST call stamp_write() to update the STAMP,
* since we've updated the data cache.
*/
if (stamp_check(dir, data))
return 0;
err = curl_download_data(data);
if (err != 0)
goto out;
out:
/* This must be last, since it must know if data_init() succeeded. */
stamp_write(dir, data, err);
return err;
}
void data_finish(void) {
data_free_paths(g_data);
free(g_data_dir);
g_data_dir = NULL;
}
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef DATA_H
#define DATA_H
#include <stddef.h>
#include <stdint.h>
typedef enum {
data_type_file = 1, /**< This data is a file. *.zst */
data_type_dir = 2, /**< This data is a directory. *.tar.zst */
} data_type_t;
typedef struct {
char const* url; /**< Where to get this resource. */
uint64_t xxhash64; /**< Hash of the url contents. */
char const* name; /**< The logical name of the resource (no extension). */
data_type_t type; /**< The type of this resource. */
char const* path; /**< The path of the unpacked resource (derived). */
size_t size;
} data_t;
/**
* The NULL-terminated list of data objects.
*/
extern data_t const* const* data;
/**
* Initializes the data module and downloads the data necessary.
* Caches the downloads in dir. We add a stamp file in the directory after
* a successful download. If a stamp file already exists, and matches our
* current data stamp, we will use the cached data without downloading.
*
* @param dir The directory to cache the downloaded data into.
*
* @returns 0 on success.
*/
int data_init(char const* dir);
/**
* Must be called at exit to free resources allocated by data_init().
*/
void data_finish(void);
typedef struct {
uint8_t* data;
size_t size;
size_t capacity;
} data_buffer_t;
/**
* Read the file that data points to into a buffer.
* NOTE: data must be a file, not a directory.
*
* @returns The buffer, which is NULL on failure.
*/
data_buffer_t data_buffer_get(data_t const* data);
/**
* Read the contents of filename into a buffer.
*
* @returns The buffer, which is NULL on failure.
*/
data_buffer_t data_buffer_read(char const* filename);
/**
* Create a buffer with the specified capacity.
*
* @returns The buffer, which is NULL on failure.
*/
data_buffer_t data_buffer_create(size_t capacity);
/**
* Calls memcmp() on the contents [0, size) of both buffers.
*/
int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2);
/**
* Frees an allocated buffer.
*/
void data_buffer_free(data_buffer_t buffer);
#endif
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LEVEL
# error LEVEL(x) must be defined
#endif
#ifndef FAST_LEVEL
# error FAST_LEVEL(x) must be defined
#endif
/**
* The levels are chosen to trigger every strategy in every source size,
* as well as some fast levels and the default level.
* If you change the compression levels, you should probably update these.
*/
FAST_LEVEL(5)
FAST_LEVEL(3)
FAST_LEVEL(1)
LEVEL(0)
LEVEL(1)
LEVEL(3)
LEVEL(4)
LEVEL(5)
LEVEL(6)
LEVEL(7)
LEVEL(9)
LEVEL(13)
LEVEL(16)
LEVEL(19)
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "method.h"
#include <stdio.h>
#include <stdlib.h>
#include <zstd.h>
static char const* g_zstdcli = NULL;
void method_set_zstdcli(char const* zstdcli) {
g_zstdcli = zstdcli;
}
/**
* Macro to get a pointer of type, given ptr, which is a member variable with
* the given name, member.
*
* method_state_t* base = ...;
* simple_state_t* state = container_of(base, simple_state_t, base);
*/
#define container_of(ptr, type, member) \
((type*)(char*)(ptr)-offsetof(type, member))
/** State to reuse the same buffers between compression calls. */
typedef struct {
method_state_t base;
data_buffer_t buffer; /**< The constant input data buffer. */
data_buffer_t compressed; /**< The compressed data buffer. */
data_buffer_t decompressed; /**< The decompressed data buffer. */
} simple_state_t;
static method_state_t* simple_create(data_t const* data) {
simple_state_t* state = (simple_state_t*)calloc(1, sizeof(simple_state_t));
if (state == NULL)
return NULL;
state->base.data = data;
state->buffer = data_buffer_get(data);
state->compressed =
data_buffer_create(ZSTD_compressBound(state->buffer.size));
state->decompressed = data_buffer_create(state->buffer.size);
return &state->base;
}
static void simple_destroy(method_state_t* base) {
if (base == NULL)
return;
simple_state_t* state = container_of(base, simple_state_t, base);
free(state);
}
static result_t simple_compress(method_state_t* base, config_t const* config) {
if (base == NULL)
return result_error(result_error_system_error);
simple_state_t* state = container_of(base, simple_state_t, base);
if (base->data->type != data_type_file)
return result_error(result_error_skip);
if (state->buffer.data == NULL || state->compressed.data == NULL ||
state->decompressed.data == NULL) {
return result_error(result_error_system_error);
}
/* If the config doesn't specify a level, skip. */
int const level = config_get_level(config);
if (level == CONFIG_NO_LEVEL)
return result_error(result_error_skip);
/* Compress, decompress, and check the result. */
state->compressed.size = ZSTD_compress(
state->compressed.data,
state->compressed.capacity,
state->buffer.data,
state->buffer.size,
level);
if (ZSTD_isError(state->compressed.size))
return result_error(result_error_compression_error);
state->decompressed.size = ZSTD_decompress(
state->decompressed.data,
state->decompressed.capacity,
state->compressed.data,
state->compressed.size);
if (ZSTD_isError(state->decompressed.size))
return result_error(result_error_decompression_error);
if (data_buffer_compare(state->buffer, state->decompressed))
return result_error(result_error_round_trip_error);
result_data_t data;
data.total_size = state->compressed.size;
return result_data(data);
}
/** Generic state creation function. */
static method_state_t* method_state_create(data_t const* data) {
method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t));
if (state == NULL)
return NULL;
state->data = data;
return state;
}
static void method_state_destroy(method_state_t* state) {
free(state);
}
#define MAX_OUT 32
static result_t cli_file_compress(
method_state_t* state,
config_t const* config) {
if (config->cli_args == NULL)
return result_error(result_error_skip);
if (g_zstdcli == NULL)
return result_error(result_error_system_error);
/* '<zstd>' -r <args> '<file/dir>' | wc -c */
char cmd[1024];
size_t const cmd_size = snprintf(
cmd,
sizeof(cmd),
"'%s' -cqr %s '%s' | wc -c",
g_zstdcli,
config->cli_args,
state->data->path);
if (cmd_size >= sizeof(cmd)) {
fprintf(stderr, "command too large: %s\n", cmd);
return result_error(result_error_system_error);
}
FILE* zstd = popen(cmd, "r");
if (zstd == NULL) {
fprintf(stderr, "failed to popen command: %s\n", cmd);
return result_error(result_error_system_error);
}
/* Read the total compressed size. */
char out[MAX_OUT + 1];
size_t const out_size = fread(out, 1, MAX_OUT, zstd);
out[out_size] = '\0';
int const zstd_ret = pclose(zstd);
if (zstd_ret != 0) {
fprintf(stderr, "zstd failed with command: %s\n", cmd);
return result_error(result_error_compression_error);
}
if (out_size == MAX_OUT) {
fprintf(stderr, "wc -c produced more bytes than expected: %s\n", out);
return result_error(result_error_system_error);
}
result_data_t data;
data.total_size = atoll(out);
return result_data(data);
}
method_t const simple = {
.name = "simple",
.create = simple_create,
.compress = simple_compress,
.destroy = simple_destroy,
};
method_t const cli_file = {
.name = "cli file",
.create = method_state_create,
.compress = cli_file_compress,
.destroy = method_state_destroy,
};
static method_t const* g_methods[] = {
&simple,
&cli_file,
NULL,
};
method_t const* const* methods = g_methods;
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef METHOD_H
#define METHOD_H
#include <stddef.h>
#include "data.h"
#include "config.h"
#include "result.h"
/**
* The base class for state that methods keep.
* All derived method state classes must have a member of this type.
*/
typedef struct {
data_t const* data;
} method_state_t;
/**
* A method that compresses the data using config.
*/
typedef struct {
char const* name; /**< The identifier for this method in the results. */
/**
* Creates a state that must contain a member variable of method_state_t,
* and returns a pointer to that member variable.
*
* This method can be used to do expensive work that only depends on the
* data, like loading the data file into a buffer.
*/
method_state_t* (*create)(data_t const* data);
/**
* Compresses the data in the state using the given config.
*
* @param state A pointer to the state returned by create().
*
* @returns The total compressed size on success, or an error code.
*/
result_t (*compress)(method_state_t* state, config_t const* config);
/**
* Frees the state.
*/
void (*destroy)(method_state_t* state);
} method_t;
/**
* Set the zstd cli path. Must be called before any methods are used.
*/
void method_set_zstdcli(char const* zstdcli);
/**
* A NULL-terminated list of methods.
*/
extern method_t const* const* methods;
#endif
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "result.h"
char const* result_get_error_string(result_t result) {
switch (result_get_error(result)) {
case result_error_ok:
return "okay";
case result_error_skip:
return "skip";
case result_error_system_error:
return "system error";
case result_error_compression_error:
return "compression error";
case result_error_decompression_error:
return "decompression error";
case result_error_round_trip_error:
return "round trip error";
}
}
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef RESULT_H
#define RESULT_H
#include <stddef.h>
/**
* The error type enum.
*/
typedef enum {
result_error_ok, /**< No error. */
result_error_skip, /**< This method was skipped. */
result_error_system_error, /**< Some internal error happened. */
result_error_compression_error, /**< Compression failed. */
result_error_decompression_error, /**< Decompression failed. */
result_error_round_trip_error, /**< Data failed to round trip. */
} result_error_t;
/**
* The success type.
*/
typedef struct {
size_t total_size; /**< The total compressed size. */
} result_data_t;
/**
* The result type.
* Do not access the member variables directory, use the helper functions.
*/
typedef struct {
result_error_t internal_error;
result_data_t internal_data;
} result_t;
/**
* Create a result of the error type.
*/
static result_t result_error(result_error_t error);
/**
* Create a result of the success type.
*/
static result_t result_data(result_data_t data);
/**
* Check if the result is an error or skip.
*/
static int result_is_error(result_t result);
/**
* Check if the result error is skip.
*/
static int result_is_skip(result_t result);
/**
* Get the result error or okay.
*/
static result_error_t result_get_error(result_t result);
/**
* Get the result data. The result MUST be checked with result_is_error() first.
*/
static result_data_t result_get_data(result_t result);
static result_t result_error(result_error_t error) {
result_t result = {
.internal_error = error,
};
return result;
}
static result_t result_data(result_data_t data) {
result_t result = {
.internal_error = result_error_ok,
.internal_data = data,
};
return result;
}
static int result_is_error(result_t result) {
return result_get_error(result) != result_error_ok;
}
static int result_is_skip(result_t result) {
return result_get_error(result) == result_error_skip;
}
static result_error_t result_get_error(result_t result) {
return result.internal_error;
}
char const* result_get_error_string(result_t result);
static result_data_t result_get_data(result_t result) {
return result.internal_data;
}
#endif
Data, Config, Method, Total compressed size
silesia.tar, level -5, simple, 106176430
silesia.tar, level -3, simple, 98476550
silesia.tar, level -1, simple, 87206767
silesia.tar, level 0, simple, 66996953
silesia.tar, level 1, simple, 73658303
silesia.tar, level 3, simple, 66996953
silesia.tar, level 4, simple, 65996020
silesia.tar, level 5, simple, 64421326
silesia.tar, level 6, simple, 62388673
silesia.tar, level 7, simple, 61159525
silesia.tar, level 9, simple, 60214921
silesia.tar, level 13, simple, 58428642
silesia.tar, level 16, simple, 56363759
silesia.tar, level 19, simple, 53274173
silesia, level -5, cli file, 106202112
silesia, level -3, cli file, 98518660
silesia, level -1, cli file, 87226203
silesia, level 0, cli file, 67049190
silesia, level 1, cli file, 73676282
silesia, level 3, cli file, 67049190
silesia, level 4, cli file, 66090040
silesia, level 5, cli file, 64503721
silesia, level 6, cli file, 62446177
silesia, level 7, cli file, 61217029
silesia, level 9, cli file, 60282841
silesia, level 13, cli file, 58480658
silesia, level 16, cli file, 56414170
silesia, level 19, cli file, 53365292
silesia.tar, level -5, cli file, 106250113
silesia.tar, level -3, cli file, 98550747
silesia.tar, level -1, cli file, 87227322
silesia.tar, level 0, cli file, 67111168
silesia.tar, level 1, cli file, 73694374
silesia.tar, level 3, cli file, 67111168
silesia.tar, level 4, cli file, 66154079
silesia.tar, level 5, cli file, 64546998
silesia.tar, level 6, cli file, 62458454
silesia.tar, level 7, cli file, 61231085
silesia.tar, level 9, cli file, 60310313
silesia.tar, level 13, cli file, 58517476
silesia.tar, level 16, cli file, 56448694
silesia.tar, level 19, cli file, 53444920
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <assert.h>
#include <getopt.h>
#include <stdio.h>
#include <string.h>
#include "config.h"
#include "data.h"
#include "method.h"
/** Check if a name contains a comma. */
static int is_name_bad(char const* name) {
if (name == NULL)
return 1;
for (; *name != '\0'; ++name)
if (*name == ',')
return 1;
return 0;
}
/** Check if any of the names contain a comma. */
static int are_names_bad() {
for (size_t method = 0; methods[method] != NULL; ++method)
if (is_name_bad(methods[method]->name)) {
fprintf(stderr, "method name %s is bad\n", methods[method]->name);
return 1;
}
for (size_t datum = 0; data[datum] != NULL; ++datum)
if (is_name_bad(data[datum]->name)) {
fprintf(stderr, "data name %s is bad\n", data[datum]->name);
return 1;
}
for (size_t config = 0; configs[config] != NULL; ++config)
if (is_name_bad(configs[config]->name)) {
fprintf(stderr, "config name %s is bad\n", configs[config]->name);
return 1;
}
return 0;
}
/** Helper macro to print to stderr and a file. */
#define tprintf(file, ...) \
do { \
fprintf(file, __VA_ARGS__); \
fprintf(stderr, __VA_ARGS__); \
} while (0)
/** Helper macro to flush stderr and a file. */
#define tflush(file) \
do { \
fflush(file); \
fflush(stderr); \
} while (0)
/**
* Run all the regression tests and record the results table to results and
* stderr progressively.
*/
static int run_all(FILE* results) {
tprintf(results, "Data,\tConfig,\tMethod,\tTotal compressed size\n");
for (size_t method = 0; methods[method] != NULL; ++method) {
for (size_t datum = 0; data[datum] != NULL; ++datum) {
/* Create the state common to all configs */
method_state_t* state = methods[method]->create(data[datum]);
for (size_t config = 0; configs[config] != NULL; ++config) {
/* Print the result for the (method, data, config) tuple. */
result_t const result =
methods[method]->compress(state, configs[config]);
if (result_is_skip(result))
continue;
tprintf(
results,
"%s,\t%s,\t%s,\t",
data[datum]->name,
configs[config]->name,
methods[method]->name);
if (result_is_error(result)) {
tprintf(results, "%s\n", result_get_error_string(result));
} else {
tprintf(
results,
"%llu\n",
(unsigned long long)result_get_data(result).total_size);
}
tflush(results);
}
methods[method]->destroy(state);
}
}
return 0;
}
/**
* Option parsing using getopt.
* When you add a new option update: long_options, long_extras, and
* short_options.
*/
/** Option variables filled by parse_args. */
static char const* g_output = NULL;
static char const* g_diff = NULL;
static char const* g_cache = NULL;
static char const* g_zstdcli = NULL;
typedef enum {
required_option,
optional_option,
help_option,
} option_type;
/**
* Extra state that we need to keep per-option that we can't store in getopt.
*/
struct option_extra {
int id; /**< The short option name, used as an id. */
char const* help; /**< The help message. */
option_type opt_type; /**< The option type: required, optional, or help. */
char const** value; /**< The value to set or NULL if no_argument. */
};
/** The options. */
static struct option long_options[] = {
{"cache", required_argument, NULL, 'c'},
{"diff", required_argument, NULL, 'd'},
{"help", no_argument, NULL, 'h'},
{"output", required_argument, NULL, 'o'},
{"zstd", required_argument, NULL, 'z'},
};
static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
/** The extra info for the options. Must be in the same order as the options. */
static struct option_extra long_extras[] = {
{'c', "the cache directory", required_option, &g_cache},
{'d', "compare the results to this file", optional_option, &g_diff},
{'h', "display this message", help_option, NULL},
{'o', "write the results here", required_option, &g_output},
{'z', "zstd cli tool", required_option, &g_zstdcli},
};
/** The short options. Must correspond to the options. */
static char const short_options[] = "c:d:ho:z:";
/** Return the help string for the option type. */
static char const* required_message(option_type opt_type) {
switch (opt_type) {
case required_option:
return "[required]";
case optional_option:
return "[optional]";
case help_option:
return "";
default:
assert(0);
return NULL;
}
}
/** Print the help for the program. */
static void print_help(void) {
fprintf(stderr, "regression test runner\n");
size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
for (size_t i = 0; i < nargs; ++i) {
/* Short / long - help [option type] */
fprintf(
stderr,
"-%c / --%s \t- %s %s\n",
long_options[i].val,
long_options[i].name,
long_extras[i].help,
required_message(long_extras[i].opt_type));
}
}
/** Parse the arguments. Teturn 0 on success. Print help on failure. */
static int parse_args(int argc, char** argv) {
int option_index = 0;
int c;
while (1) {
c = getopt_long(argc, argv, short_options, long_options, &option_index);
if (c == -1)
break;
int found = 0;
for (size_t i = 0; i < nargs; ++i) {
if (c == long_extras[i].id && long_extras[i].value != NULL) {
*long_extras[i].value = optarg;
found = 1;
break;
}
}
if (found)
continue;
switch (c) {
case 'h':
case '?':
default:
print_help();
return 1;
}
}
int bad = 0;
for (size_t i = 0; i < nargs; ++i) {
if (long_extras[i].opt_type != required_option)
continue;
if (long_extras[i].value == NULL)
continue;
if (*long_extras[i].value != NULL)
continue;
fprintf(
stderr,
"-%c / --%s is a required argument but is not set\n",
long_options[i].val,
long_options[i].name);
bad = 1;
}
if (bad) {
fprintf(stderr, "\n");
print_help();
return 1;
}
return 0;
}
/** memcmp() the old results file and the new results file. */
static int diff_results(char const* actual_file, char const* expected_file) {
data_buffer_t const actual = data_buffer_read(actual_file);
data_buffer_t const expected = data_buffer_read(expected_file);
int ret = 1;
if (actual.data == NULL) {
fprintf(stderr, "failed to open results '%s' for diff\n", actual_file);
goto out;
}
if (expected.data == NULL) {
fprintf(
stderr,
"failed to open previous results '%s' for diff\n",
expected_file);
goto out;
}
ret = data_buffer_compare(actual, expected);
if (ret != 0) {
fprintf(
stderr,
"actual results '%s' does not match expected results '%s'\n",
actual_file,
expected_file);
} else {
fprintf(stderr, "actual results match expected results\n");
}
out:
data_buffer_free(actual);
data_buffer_free(expected);
return ret;
}
int main(int argc, char** argv) {
/* Parse args and validate modules. */
int ret = parse_args(argc, argv);
if (ret != 0)
return ret;
if (are_names_bad())
return 1;
/* Initialize modules. */
method_set_zstdcli(g_zstdcli);
ret = data_init(g_cache);
if (ret != 0) {
fprintf(stderr, "data_init() failed with error=%s\n", strerror(ret));
return 1;
}
/* Run the regression tests. */
ret = 1;
FILE* results = fopen(g_output, "w");
if (results == NULL) {
fprintf(stderr, "Failed to open the output file\n");
goto out;
}
ret = run_all(results);
fclose(results);
if (ret != 0)
goto out;
if (g_diff)
/* Diff the new results with the previous results. */
ret = diff_results(g_output, g_diff);
out:
data_finish();
return ret;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment