Hello community, here is the log from the commit of package blosc for openSUSE:Factory checked in at 2020-12-21 12:35:10 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/blosc (Old) and /work/SRC/openSUSE:Factory/.blosc.new.5145 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "blosc" Mon Dec 21 12:35:10 2020 rev:20 rq:856832 version:1.20.1 Changes: -------- --- /work/SRC/openSUSE:Factory/blosc/blosc.changes 2020-09-04 11:02:22.490728240 +0200 +++ /work/SRC/openSUSE:Factory/.blosc.new.5145/blosc.changes 2020-12-21 12:35:17.954949548 +0100 @@ -1,0 +2,8 @@ +Fri Dec 18 08:06:48 UTC 2020 - Martin Pluskal <mpluskal@suse.com> + +- Update to version 1.20.1 boo#1179914 CVE-2020-29367: + * More saftey checks have been implemented so that potential flaws discovered by new fuzzers in OSS-Fuzzer are fixed now + * BloscLZ updated to 2.3.0. Expect better compression ratios for faster codecs. For details, see our new blog post: https://blosc.org/posts/beast-release/ + * Fixed the _xgetbv() collision. Thanks to Michał Górny (@mgorny). + +------------------------------------------------------------------- Old: ---- blosc-1.19.0.tar.gz New: ---- blosc-1.20.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ blosc.spec ++++++ --- /var/tmp/diff_new_pack.8MrbQM/_old 2020-12-21 12:35:18.550950972 +0100 +++ /var/tmp/diff_new_pack.8MrbQM/_new 2020-12-21 12:35:18.550950972 +0100 @@ -19,7 +19,7 @@ %define major 1 %define libname lib%{name}%{major} Name: blosc -Version: 1.19.0 +Version: 1.20.1 Release: 0 Summary: A blocking, shuffling and lossless compression library License: MIT AND BSD-3-Clause AND BSD-2-Clause @@ -28,10 +28,10 @@ Source: https://github.com/Blosc/c-blosc/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz BuildRequires: cmake BuildRequires: gcc-c++ -BuildRequires: libzstd-devel BuildRequires: pkgconfig BuildRequires: snappy-devel BuildRequires: pkgconfig(liblz4) +BuildRequires: pkgconfig(libzstd) BuildRequires: pkgconfig(zlib) %description @@ -100,12 +100,11 @@ %files -n %{libname} %doc LICENSES/*.txt -%doc ANNOUNCE.rst README.md README_THREADED.rst RELEASE_NOTES.rst THANKS.rst +%doc ANNOUNCE.rst README.md RELEASE_NOTES.rst THANKS.rst %{_libdir}/libblosc.so.%{major} %{_libdir}/libblosc.so.%{version} %files devel -%doc README_HEADER.rst %doc examples/ %{_includedir}/blosc.h %{_includedir}/blosc-export.h ++++++ blosc-1.19.0.tar.gz -> blosc-1.20.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/.github/workflows/cmake.yml new/c-blosc-1.20.1/.github/workflows/cmake.yml --- old/c-blosc-1.19.0/.github/workflows/cmake.yml 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/.github/workflows/cmake.yml 2020-09-08 17:23:32.000000000 +0200 @@ -143,7 +143,7 @@ run: | mkdir ${{ matrix.build-dir || '.not-used' }} cd ${{ matrix.build-dir || '.' }} - cmake ${{ matrix.build-src-dir || '.' }} ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -DBUILD_SHARED_LIBS=OFF + cmake ${{ matrix.build-src-dir || '.' }} ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -DBUILD_SHARED_LIBS=OFF -DBUILD_FUZZERS=ON env: CC: ${{ matrix.compiler }} CFLAGS: ${{ matrix.cflags }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/ANNOUNCE.rst new/c-blosc-1.20.1/ANNOUNCE.rst --- old/c-blosc-1.19.0/ANNOUNCE.rst 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/ANNOUNCE.rst 2020-09-08 17:23:32.000000000 +0200 @@ -1,19 +1,14 @@ =============================================================== - Announcing C-Blosc 1.19.0 + Announcing C-Blosc 1.20.1 A blocking, shuffling and lossless compression library for C =============================================================== What is new? ============ -The algorithm for choosing the blocksize automatically in fast codecs -(lz4 and blosclz) has been refined to provide better compression ratios -and better performance on modern CPUs (L2 cache sizes >= 256KB), while -staying reasonably fast on less powerful CPUs. - -Also, new versions for blosclz (2.1.0) and zstd (1.4.5) codecs have -been integrated. Expect better compression ratios and performance with -these new versions too. +This is a maintenance release. Vendored zlib 1.2.8 is now compatible +with Python 3.8 in recent Mac OSX. For details, see: +https://github.com/Blosc/python-blosc/issues/229 For more info, please see the release notes in: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/CMakeLists.txt new/c-blosc-1.20.1/CMakeLists.txt --- old/c-blosc-1.19.0/CMakeLists.txt 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/CMakeLists.txt 2020-09-08 17:23:32.000000000 +0200 @@ -9,6 +9,8 @@ # build the shared library version of the Blosc library # BUILD_TESTS: default ON # build test programs and generates the "test" target +# BUILD_FUZZERS: default ON +# build fuzz test programs and generates the "test" target # BUILD_BENCHMARKS: default ON # build the benchmark program # DEACTIVATE_SSE2: default OFF @@ -93,13 +95,15 @@ option(BUILD_SHARED "Build a shared library version of the blosc library." ON) option(BUILD_TESTS - "Build test programs form the blosc compression library" ON) + "Build test programs from the blosc compression library" ON) +option(BUILD_FUZZERS + "Build fuzzer programs from the blosc compression library" ${BUILD_STATIC}) option(BUILD_BENCHMARKS - "Build benchmark programs form the blosc compression library" ON) + "Build benchmark programs from the blosc compression library" ON) option(DEACTIVATE_SSE2 - "Do not attempt to build with SSE2 instructions" OFF) + "Do not attempt to build with SSE2 instructions" OFF) option(DEACTIVATE_AVX2 - "Do not attempt to build with AVX2 instructions" OFF) + "Do not attempt to build with AVX2 instructions" OFF) option(DEACTIVATE_LZ4 "Do not include support for the LZ4 library." OFF) option(DEACTIVATE_SNAPPY @@ -107,9 +111,9 @@ option(DEACTIVATE_ZLIB "Do not include support for the Zlib library." OFF) option(DEACTIVATE_ZSTD - "Do not include support for the Zstd library." OFF) + "Do not include support for the Zstd library." OFF) option(DEACTIVATE_SYMBOLS_CHECK - "Do not check for symbols in shared or static libraries." ON) + "Do not check for symbols in shared or static libraries." ON) option(PREFER_EXTERNAL_LZ4 "Find and use external LZ4 library instead of included sources." OFF) option(PREFER_EXTERNAL_ZLIB @@ -318,6 +322,14 @@ add_subdirectory(compat) endif(BUILD_TESTS) +if(BUILD_FUZZERS) + if(NOT BUILD_STATIC) + message(FATAL_ERROR "BUILD_FUZZERS requires BUILD_STATIC to be enabled.") + endif() + enable_testing() + add_subdirectory(tests/fuzz) +endif(BUILD_FUZZERS) + if(BUILD_BENCHMARKS) add_subdirectory(bench) endif(BUILD_BENCHMARKS) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/CODE_OF_CONDUCT.md new/c-blosc-1.20.1/CODE_OF_CONDUCT.md --- old/c-blosc-1.19.0/CODE_OF_CONDUCT.md 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/CODE_OF_CONDUCT.md 1970-01-01 01:00:00.000000000 +0100 @@ -1,5 +0,0 @@ -# Code of Conduct - -The Blosc community has adopted a Code of Conduct that we expect project participants to adhere to. -Please read the [full text](https://github.com/Blosc/CodeOfConduct/README.md) -so that you can understand what actions will and will not be tolerated. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/README.md new/c-blosc-1.20.1/README.md --- old/c-blosc-1.19.0/README.md 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/README.md 2020-09-08 17:23:32.000000000 +0200 @@ -3,10 +3,9 @@ |--------|---------|-----| | Blosc Development Team | blosc@blosc.org | http://www.blosc.org | -| Gitter | GH Actions | NumFOCUS | -|--------|------------|----------| -| [![Gitter](https://badges.gitter.im/Blosc/c-blosc.svg)](https://gitter.im/Blosc/c-blosc?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | [![CI CMake](https://github.com/Blosc/c-blosc/workflows/CI%20CMake/badge.svg)](https://github.com/Blosc/c-blosc/actions?query=workflow%3A%22CI+CMake%22) | [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org) | - +| Gitter | GH Actions | NumFOCUS | Code of Conduct | +|--------|------------|----------|-----------------| +| [![Gitter](https://badges.gitter.im/Blosc/c-blosc.svg)](https://gitter.im/Blosc/c-blosc?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | [![CI CMake](https://github.com/Blosc/c-blosc/workflows/CI%20CMake/badge.svg)](https://github.com/Blosc/c-blosc/actions?query=workflow%3A%22CI+CMake%22) | [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org) | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](code_of_conduct.md) | ## What is it? diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/README_CHUNK_FORMAT.rst new/c-blosc-1.20.1/README_CHUNK_FORMAT.rst --- old/c-blosc-1.19.0/README_CHUNK_FORMAT.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/c-blosc-1.20.1/README_CHUNK_FORMAT.rst 2020-09-08 17:23:32.000000000 +0200 @@ -0,0 +1,97 @@ +Blosc Chunk Format +================== + +The chunk is composed by a header and a blocks / splits section:: + + +---------+--------+---------+ + | header | blocks / splits | + +---------+--------+---------+ + +These are described below. + +The header section +------------------ + +Blosc (as of Version 1.0.0) has the following 16 byte header that stores +information about the compressed buffer:: + + |-0-|-1-|-2-|-3-|-4-|-5-|-6-|-7-|-8-|-9-|-A-|-B-|-C-|-D-|-E-|-F-| + ^ ^ ^ ^ | nbytes | blocksize | cbytes | + | | | | + | | | +--typesize + | | +------flags + | +----------versionlz + +--------------version + +Datatypes of the header entries +------------------------------- + +All entries are little endian. + +:version: + (``uint8``) Blosc format version. +:versionlz: + (``uint8``) Version of the internal compressor used. +:flags and compressor enumeration: + (``bitfield``) The flags of the buffer + + :bit 0 (``0x01``): + Whether the byte-shuffle filter has been applied or not. + :bit 1 (``0x02``): + Whether the internal buffer is a pure memcpy or not. + :bit 2 (``0x04``): + Whether the bit-shuffle filter has been applied or not. + :bit 3 (``0x08``): + Reserved, must be zero. + :bit 4 (``0x10``): + If set, the blocks will not be split in sub-blocks during compression. + :bit 5 (``0x20``): + Part of the enumeration for compressors. + :bit 6 (``0x40``): + Part of the enumeration for compressors. + :bit 7 (``0x80``): + Part of the enumeration for compressors. + + The last three bits form an enumeration that allows to use alternative + compressors. + + :``0``: + ``blosclz`` + :``1``: + ``lz4`` or ``lz4hc`` + :``2``: + ``snappy`` + :``3``: + ``zlib`` + :``4``: + ``zstd`` + +:typesize: + (``uint8``) Number of bytes for the atomic type. +:nbytes: + (``uint32``) Uncompressed size of the buffer (this header is not included). +:blocksize: + (``uint32``) Size of internal blocks. +:cbytes: + (``uint32``) Compressed size of the buffer (including this header). + +The blocks / splits section +--------------------------- + +After the header, there come the blocks / splits section. Blocks are equal-sized parts of the chunk, except for the last block that can be shorter or equal than the rest. + +At the beginning of the blocks section, there come a list of `int32_t bstarts` to indicate where the different encoded blocks starts (counting from the end of this `bstarts` section):: + + +=========+=========+========+=========+ + | bstart0 | bstart1 | ... | bstartN | + +=========+=========+========+=========+ + +Finally, it comes the actual list of compressed blocks / splits data streams. It turns out that a block may optionally (see bit 4 in `flags` above) be further split in so-called splits which are the actual data streams that are transmitted to codecs for compression. If a block is not split, then the split is equivalent to a whole block. Before each split in the list, there is the compressed size of it, expressed as an `int32_t`:: + + +========+========+========+========+========+========+========+ + | csize0 | split0 | csize1 | split1 | ... | csizeN | splitN | + +========+========+========+========+========+========+========+ + + +*Note*: all the integers are stored in little endian. + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/README_HEADER.rst new/c-blosc-1.20.1/README_HEADER.rst --- old/c-blosc-1.19.0/README_HEADER.rst 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/README_HEADER.rst 1970-01-01 01:00:00.000000000 +0100 @@ -1,65 +0,0 @@ -Blosc Header Format -=================== - -Blosc (as of Version 1.0.0) has the following 16 byte header that stores -information about the compressed buffer:: - - |-0-|-1-|-2-|-3-|-4-|-5-|-6-|-7-|-8-|-9-|-A-|-B-|-C-|-D-|-E-|-F-| - ^ ^ ^ ^ | nbytes | blocksize | ctbytes | - | | | | - | | | +--typesize - | | +------flags - | +----------versionlz - +--------------version - -Datatypes of the Header Entries -------------------------------- - -All entries are little endian. - -:version: - (``uint8``) Blosc format version. -:versionlz: - (``uint8``) Version of the internal compressor used. -:flags and compressor enumeration: - (``bitfield``) The flags of the buffer - - :bit 0 (``0x01``): - Whether the byte-shuffle filter has been applied or not. - :bit 1 (``0x02``): - Whether the internal buffer is a pure memcpy or not. - :bit 2 (``0x04``): - Whether the bit-shuffle filter has been applied or not. - :bit 3 (``0x08``): - Reserved, must be zero. - :bit 4 (``0x10``): - If set, the blocks will not be split in sub-blocks during compression. - :bit 5 (``0x20``): - Part of the enumeration for compressors. - :bit 6 (``0x40``): - Part of the enumeration for compressors. - :bit 7 (``0x80``): - Part of the enumeration for compressors. - - The last three bits form an enumeration that allows to use alternative - compressors. - - :``0``: - ``blosclz`` - :``1``: - ``lz4`` or ``lz4hc`` - :``2``: - ``snappy`` - :``3``: - ``zlib`` - :``4``: - ``zstd`` - -:typesize: - (``uint8``) Number of bytes for the atomic type. -:nbytes: - (``uint32``) Uncompressed size of the buffer. -:blocksize: - (``uint32``) Size of internal blocks. -:ctbytes: - (``uint32``) Compressed size of the buffer. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/RELEASE_NOTES.rst new/c-blosc-1.20.1/RELEASE_NOTES.rst --- old/c-blosc-1.19.0/RELEASE_NOTES.rst 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/RELEASE_NOTES.rst 2020-09-08 17:23:32.000000000 +0200 @@ -2,6 +2,38 @@ Release notes for C-Blosc =========================== +Changes from 1.20.0 to 1.20.1 +============================= + +* Added `<unistd.h>` in vendored zlib 1.2.8 for compatibility with Python 3.8 + in recent Mac OSX. For details, see: + https://github.com/Blosc/python-blosc/issues/229 + +Changes from 1.19.1 to 1.20.0 +============================= + +* More saftey checks have been implemented so that potential flaws + discovered by new fuzzers in OSS-Fuzzer are fixed now. Thanks to + Nathan Moinvaziri (@nmoinvaz). + +* BloscLZ updated to 2.3.0. Expect better compression ratios for faster + codecs. For details, see our new blog post: + https://blosc.org/posts/beast-release/ + +* Fixed the `_xgetbv()` collision. Thanks to Michał Górny (@mgorny). + +* The chunk format has been fully described so that 3rd party software + may come with a different implementation, but still compatible with + C-Blosc chunks. + + +Changes from 1.19.0 to 1.19.1 +============================= + +- pthread_create() errors are now handled and propagated back to the user. + See https://github.com/Blosc/c-blosc/pull/299. + + Changes from 1.18.1 to 1.19.0 ============================= @@ -15,6 +47,11 @@ Also, a new OSS-Fuzz workflow has been added for increased detection of possible vulnerabilities. Thanks to Nathan Moinvaziri. +- For small buffers that cannot be compressed (typically < 128 bytes), + `blosc_compress()` returns now a 0 (cannot compress) instead of a negative + number (internal error). See https://github.com/Blosc/c-blosc/pull/294. + Thanks to @kalvdans for providing the initial patch. + - blosclz codec updated to 2.1.0. Expect better compression ratios and performance in a wider variety of scenarios. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/bench/plot-speeds.py new/c-blosc-1.20.1/bench/plot-speeds.py --- old/c-blosc-1.19.0/bench/plot-speeds.py 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/bench/plot-speeds.py 2020-09-08 17:23:32.000000000 +0200 @@ -29,16 +29,12 @@ tmp = line.split('-->')[1] parts = tmp.split(', ') nthreads, size, elsize, sbits, codec, shuffle = parts[:6] - safe = 'unsafe' - if len(parts) > 6: - safe = parts[6] nthreads, size, elsize, sbits = map(int, (nthreads, size, elsize, sbits)) - values["size"] = size * NCHUNKS / MB_ + values["size"] = size / MB_ values["elsize"] = elsize values["sbits"] = sbits values["codec"] = codec values["shuffle"] = shuffle - values["safe"] = safe # New run for nthreads (ratios, speedsw, speedsr) = ([], [], []) # Add a new entry for (ratios, speedw, speedr) @@ -47,21 +43,21 @@ elif line.startswith('memcpy(write):'): tmp = line.split(',')[1] memcpyw = float(tmp.split(' ')[1]) - values["memcpyw"].append(memcpyw) + values["memcpyw"].append(memcpyw / 1024) elif line.startswith('memcpy(read):'): tmp = line.split(',')[1] memcpyr = float(tmp.split(' ')[1]) - values["memcpyr"].append(memcpyr) + values["memcpyr"].append(memcpyr / 1024) elif line.startswith('comp(write):'): tmp = line.split(',')[1] speedw = float(tmp.split(' ')[1]) ratio = float(line.split(':')[-1]) - speedsw.append(speedw) + speedsw.append(speedw / 1024) ratios.append(ratio) elif line.startswith('decomp(read):'): tmp = line.split(',')[1] speedr = float(tmp.split(' ')[1]) - speedsr.append(speedr) + speedsr.append(speedr / 1024) if "OK" not in line: print("WARNING! OK not found in decomp line!") @@ -71,7 +67,7 @@ def show_plot(plots, yaxis, legends, gtitle, xmax=None, ymax=None): xlabel('Compresssion ratio') - ylabel('Speed (MB/s)') + ylabel('Speed (GB/s)') title(gtitle) xlim(0, xmax) ylim(0, ymax) @@ -190,7 +186,7 @@ if options.title: plot_title = options.title else: - plot_title += " (%(size).1f MB, %(elsize)d bytes, %(sbits)d bits), %(codec)s %(shuffle)s %(safe)s" % values + plot_title += " (%(size).1f MB, %(elsize)d bytes, %(sbits)d bits), %(codec)s %(shuffle)s" % values gtitle = plot_title @@ -219,7 +215,7 @@ mean = np.mean(values["memcpyr"]) message = "memcpy (read from memory)" plot_ = axhline(mean, linewidth=3, linestyle='-.', color='black') - text(4.0, mean+400, message) + text(4.0, mean+.4, message) plots.append(plot_) show_plot(plots, yaxis, legends, gtitle, xmax=int(options.xmax) if options.xmax else None, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/CMakeLists.txt new/c-blosc-1.20.1/blosc/CMakeLists.txt --- old/c-blosc-1.19.0/blosc/CMakeLists.txt 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/blosc/CMakeLists.txt 2020-09-08 17:23:32.000000000 +0200 @@ -86,6 +86,7 @@ else(LZ4_FOUND) file(GLOB LZ4_FILES ${LZ4_LOCAL_DIR}/*.c) set(SOURCES ${SOURCES} ${LZ4_FILES}) + source_group("LZ4" FILES ${LZ4_FILES}) endif(LZ4_FOUND) endif(NOT DEACTIVATE_LZ4) @@ -95,6 +96,7 @@ else(SNAPPY_FOUND) file(GLOB SNAPPY_FILES ${SNAPPY_LOCAL_DIR}/*.cc) set(SOURCES ${SOURCES} ${SNAPPY_FILES}) + source_group("Snappy" FILES ${SNAPPY_FILES}) endif(SNAPPY_FOUND) endif(NOT DEACTIVATE_SNAPPY) @@ -104,6 +106,7 @@ else(ZLIB_FOUND) file(GLOB ZLIB_FILES ${ZLIB_LOCAL_DIR}/*.c) set(SOURCES ${SOURCES} ${ZLIB_FILES}) + source_group("Zlib" FILES ${ZLIB_FILES}) endif(ZLIB_FOUND) endif(NOT DEACTIVATE_ZLIB) @@ -116,6 +119,7 @@ ${ZSTD_LOCAL_DIR}/compress/*.c ${ZSTD_LOCAL_DIR}/decompress/*.c) set(SOURCES ${SOURCES} ${ZSTD_FILES}) + source_group("Zstd" FILES ${ZSTD_FILES}) endif (ZSTD_FOUND) endif (NOT DEACTIVATE_ZSTD) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/blosc.c new/c-blosc-1.20.1/blosc/blosc.c --- old/c-blosc-1.19.0/blosc/blosc.c 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/blosc/blosc.c 2020-09-08 17:23:32.000000000 +0200 @@ -423,10 +423,10 @@ char* output, size_t maxout, int clevel) { int cbytes; - if (input_length > (size_t)(2<<30)) - return -1; /* input larger than 1 GB is not supported */ + if (input_length > (size_t)(UINT32_C(2)<<30)) + return -1; /* input larger than 2 GB is not supported */ /* clevel for lz4hc goes up to 12, at least in LZ4 1.7.5 - * but levels larger than 9 does not buy much compression. */ + * but levels larger than 9 do not buy much compression. */ cbytes = LZ4_compress_HC(input, output, (int)input_length, (int)maxout, clevel); return cbytes; @@ -877,7 +877,9 @@ (void)rc; // just to avoid 'unused-variable' warning /* Check whether we need to restart threads */ - blosc_set_nthreads_(context); + if (blosc_set_nthreads_(context) < 0) { + return -1; + } /* Set sentinels */ context->thread_giveup_code = 1; @@ -1976,7 +1978,9 @@ /* Launch a new pool of threads */ if (context->numthreads > 1 && context->numthreads != context->threads_started) { blosc_release_threadpool(context); - init_threads(context); + if (init_threads(context) < 0) { + return -1; + } } /* We have now started the threads */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/blosc.h new/c-blosc-1.20.1/blosc/blosc.h --- old/c-blosc-1.19.0/blosc/blosc.h 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/blosc/blosc.h 2020-09-08 17:23:32.000000000 +0200 @@ -18,14 +18,14 @@ /* Version numbers */ #define BLOSC_VERSION_MAJOR 1 /* for major interface/format changes */ -#define BLOSC_VERSION_MINOR 19 /* for minor interface/format changes */ -#define BLOSC_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ +#define BLOSC_VERSION_MINOR 20 /* for minor interface/format changes */ +#define BLOSC_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */ -#define BLOSC_VERSION_STRING "1.19.0" /* string version. Sync with above! */ +#define BLOSC_VERSION_STRING "1.20.1" /* string version. Sync with above! */ #define BLOSC_VERSION_REVISION "$Rev$" /* revision version */ -#define BLOSC_VERSION_DATE "$Date:: 2020-06-05 #$" /* date version */ +#define BLOSC_VERSION_DATE "$Date:: 2020-09-08 #$" /* date version */ -#define BLOSCLZ_VERSION_STRING "2.1.0" /* the internal compressor version */ +#define BLOSCLZ_VERSION_STRING "2.3.0" /* the internal compressor version */ /* The *_FORMAT symbols should be just 1-byte long */ #define BLOSC_VERSION_FORMAT 2 /* Blosc format version, starting at 1 */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/blosclz.c new/c-blosc-1.20.1/blosc/blosclz.c --- old/c-blosc-1.19.0/blosc/blosclz.c 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/blosc/blosclz.c 2020-09-08 17:23:32.000000000 +0200 @@ -14,19 +14,7 @@ #include <stdio.h> - -#if defined(_WIN32) && !defined(__MINGW32__) - #include <windows.h> - /* stdint.h only available in VS2010 (VC++ 16.0) and newer */ - #if defined(_MSC_VER) && _MSC_VER < 1600 - #include "win32/stdint-windows.h" - #else - #include <stdint.h> - #endif -#else - #include <stdint.h> -#endif /* _WIN32 */ - +#include <stdbool.h> #include "blosclz.h" #include "fastcopy.h" #include "blosc-common.h" @@ -36,11 +24,11 @@ * Give hints to the compiler for branch prediction optimization. */ #if defined(__GNUC__) && (__GNUC__ > 2) -#define BLOSCLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) -#define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) +#define BLOSCLZ_LIKELY(c) (__builtin_expect((c), 1)) +#define BLOSCLZ_UNLIKELY(c) (__builtin_expect((c), 0)) #else -#define BLOSCLZ_EXPECT_CONDITIONAL(c) (c) -#define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (c) +#define BLOSCLZ_LIKELY(c) (c) +#define BLOSCLZ_UNLIKELY(c) (c) #endif /* @@ -62,28 +50,13 @@ #define BLOSCLZ_READU32(p) *((const uint32_t*)(p)) #endif -#define HASH_LOG (12) +#define HASH_LOG (12U) // This is used in LZ4 and seems to work pretty well here too -#define HASH_FUNCTION(v, s, h) { \ - v = (s * 2654435761U) >> (32U - h); \ -} - - -#define LITERAL(ip, op, op_limit, anchor, copy) { \ - if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + 2 > op_limit)) \ - goto out; \ - *op++ = *anchor++; \ - ip = anchor; \ - copy++; \ - if (BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { \ - copy = 0; \ - *op++ = MAX_COPY-1; \ - } \ +#define HASH_FUNCTION(v, s, h) { \ + v = (s * 2654435761U) >> (32U - h); \ } -#define IP_BOUNDARY 2 -#define BYTES_IN_CYCLE 512 #if defined(__AVX2__) static uint8_t *get_run_32(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { @@ -191,6 +164,7 @@ } #else + static uint8_t *get_run(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { uint8_t x = ip[-1]; int64_t value, value2; @@ -217,6 +191,7 @@ while ((ip < ip_bound) && (*ref++ == x)) ip++; return ip; } + #endif @@ -324,42 +299,236 @@ #endif +static uint8_t* get_run_or_match(uint8_t* ip, uint8_t* ip_bound, const uint8_t* ref, bool run) { + if (BLOSCLZ_UNLIKELY(run)) { +#if defined(__AVX2__) + ip = get_run_32(ip, ip_bound, ref); +#elif defined(__SSE2__) + ip = get_run_16(ip, ip_bound, ref); +#else + ip = get_run(ip, ip_bound, ref); +#endif + } + else { +#if defined(__AVX2__) + ip = get_match_32(ip, ip_bound, ref); +#elif defined(__SSE2__) + ip = get_match_16(ip, ip_bound, ref); +#else + ip = get_match(ip, ip_bound, ref); +#endif + } + + return ip; +} + + +#define LITERAL(ip, op, op_limit, anchor, copy) { \ + if (BLOSCLZ_UNLIKELY(op + 2 > op_limit)) \ + goto out; \ + *op++ = *anchor++; \ + ip = anchor; \ + copy++; \ + if (BLOSCLZ_UNLIKELY(copy == MAX_COPY)) { \ + copy = 0; \ + *op++ = MAX_COPY-1; \ + } \ +} + +#define LITERAL2(ip, oc, anchor, copy) { \ + oc++; anchor++; \ + ip = anchor; \ + copy++; \ + if (BLOSCLZ_UNLIKELY(copy == MAX_COPY)) { \ + copy = 0; \ + oc++; \ + } \ +} + +#define DISTANCE_SHORT(op, op_limit, len, distance) { \ + if (BLOSCLZ_UNLIKELY(op + 2 > op_limit)) \ + goto out; \ + *op++ = (uint8_t)((len << 5U) + (distance >> 8U)); \ + *op++ = (uint8_t)((distance & 255U)); \ +} + +#define DISTANCE_LONG(op, op_limit, len, distance) { \ + if (BLOSCLZ_UNLIKELY(op + 1 > op_limit)) \ + goto out; \ + *op++ = (uint8_t)((7U << 5U) + (distance >> 8U)); \ + for (len -= 7; len >= 255; len -= 255) { \ + if (BLOSCLZ_UNLIKELY(op + 1 > op_limit)) \ + goto out; \ + *op++ = 255; \ + } \ + if (BLOSCLZ_UNLIKELY(op + 2 > op_limit)) \ + goto out; \ + *op++ = (uint8_t)len; \ + *op++ = (uint8_t)((distance & 255U)); \ +} + +#define DISTANCE_SHORT_FAR(op, op_limit, len, distance) { \ + if (BLOSCLZ_UNLIKELY(op + 4 > op_limit)) \ + goto out; \ + *op++ = (uint8_t)((len << 5U) + 31); \ + *op++ = 255; \ + *op++ = (uint8_t)(distance >> 8U); \ + *op++ = (uint8_t)(distance & 255U); \ +} + +#define DISTANCE_LONG_FAR(op, op_limit, len, distance) { \ + if (BLOSCLZ_UNLIKELY(op + 1 > op_limit)) \ + goto out; \ + *op++ = (7U << 5U) + 31; \ + for (len -= 7; len >= 255; len -= 255) { \ + if (BLOSCLZ_UNLIKELY(op + 1 > op_limit)) \ + goto out; \ + *op++ = 255; \ + } \ + if (BLOSCLZ_UNLIKELY(op + 4 > op_limit)) \ + goto out; \ + *op++ = (uint8_t)len; \ + *op++ = 255; \ + *op++ = (uint8_t)(distance >> 8U); \ + *op++ = (uint8_t)(distance & 255U); \ +} + + +// Get the compressed size of a buffer. Useful for testing compression ratios for high clevels. +static int get_csize(uint8_t* ibase, int maxlen, bool force_3b_shift) { + uint8_t* ip = ibase; + int32_t oc = 0; + uint8_t* ip_bound = ibase + maxlen - 1; + uint8_t* ip_limit = ibase + maxlen - 12; + uint32_t htab[1U << (uint8_t)HASH_LOG]; + uint32_t hval; + uint32_t seq; + uint8_t copy; + + // Initialize the hash table to distances of 0 + for (unsigned i = 0; i < (1U << HASH_LOG); i++) { + htab[i] = 0; + } + + /* we start with literal copy */ + copy = 4; + oc += 5; + + /* main loop */ + while (BLOSCLZ_LIKELY(ip < ip_limit)) { + const uint8_t* ref; + unsigned distance; + uint8_t* anchor = ip; /* comparison starting-point */ + + /* find potential match */ + seq = BLOSCLZ_READU32(ip); + HASH_FUNCTION(hval, seq, HASH_LOG) + ref = ibase + htab[hval]; + + /* calculate distance to the match */ + distance = anchor - ref; + + /* update hash table */ + htab[hval] = (uint32_t) (anchor - ibase); + + if (distance == 0 || (distance >= MAX_FARDISTANCE)) { + LITERAL2(ip, oc, anchor, copy) + continue; + } + + /* is this a match? check the first 4 bytes */ + if (BLOSCLZ_UNLIKELY(BLOSCLZ_READU32(ref) == BLOSCLZ_READU32(ip))) { + ref += 4; + } + else { + /* no luck, copy as a literal */ + LITERAL2(ip, oc, anchor, copy) + continue; + } + + /* last matched byte */ + ip = anchor + 4; + + /* distance is biased */ + distance--; + + /* get runs or matches; zero distance means a run */ + ip = get_run_or_match(ip, ip_bound, ref, !distance); + + ip -= force_3b_shift ? 3 : 4; + unsigned len = (int)(ip - anchor); + // If match is close, let's reduce the minimum length to encode it + unsigned minlen = (distance < MAX_DISTANCE) ? 3 : 4; + // Encoding short lengths is expensive during decompression + if (len < minlen) { + LITERAL2(ip, oc, anchor, copy) + continue; + } + + /* if we have'nt copied anything, adjust the output counter */ + if (!copy) + oc--; + /* reset literal counter */ + copy = 0; + + /* encode the match */ + if (distance < MAX_DISTANCE) { + if (len >= 7) { + oc += ((len - 7) / 255) + 1; + } + oc += 2; + } + else { + /* far away, but not yet in the another galaxy... */ + if (len >= 7) { + oc += ((len - 7) / 255) + 1; + } + oc += 4; + } + + /* update the hash at match boundary */ + seq = BLOSCLZ_READU32(ip); + HASH_FUNCTION(hval, seq, HASH_LOG) + htab[hval] = (uint32_t) (ip++ - ibase); + seq >>= 8U; + HASH_FUNCTION(hval, seq, HASH_LOG) + htab[hval] = (uint32_t) (ip++ - ibase); + /* assuming literal copy */ + oc++; + + } + + /* if we have copied something, adjust the copy length */ + if (!copy) + oc--; + + return (int)oc; +} + + int blosclz_compress(const int clevel, const void* input, int length, void* output, int maxout) { uint8_t* ibase = (uint8_t*)input; uint8_t* ip = ibase; - uint8_t* icycle = ibase; - uint8_t* ip_bound = ibase + length - IP_BOUNDARY; + uint8_t* ip_bound = ibase + length - 1; uint8_t* ip_limit = ibase + length - 12; uint8_t* op = (uint8_t*)output; - uint8_t* ocycle = op; uint8_t* op_limit; uint32_t htab[1U << (uint8_t)HASH_LOG]; uint32_t hval; uint32_t seq; uint8_t copy; - long skip_cycle = 0; - double cratio; - // Minimum cratios before issuing and _early giveup_ - // Remind that blosclz is not meant for cratios <= 2 (too costly to decompress) - double maxlength_[10] = {-1, .07, .1, .2, .4, .5, .5, .5, .5, .6}; - int32_t maxlength = (int32_t)(length * maxlength_[clevel]); - if (maxlength > (int32_t)maxout) { - maxlength = (int32_t)maxout; - } - op_limit = op + maxlength; - - // The maximum amount of cycles to skip match lookups - // A 0 means just _early giveup_ whereas > 0 use _entropy sensing_ too - long max_skip_cycles_[10] = {255, 0, 0, 0, 3, 2, 2, 1, 1, 0}; - long max_skip_cycles = max_skip_cycles_[clevel]; - // The minimum compression ratio before skipping a number of cycles - double min_cratio_[10] = {-1, 0., 0., 0., 5., 4., 4., 3., 2., 1.}; - double min_cratio = min_cratio_[clevel]; + op_limit = op + maxout; + + // Minimum lengths for encoding + unsigned minlen_[10] = {0, 12, 12, 11, 10, 9, 8, 7, 6, 5}; + + // Minimum compression ratios for initiate encoding + double cratio_[10] = {0, 2, 2, 2, 2, 1.8, 1.6, 1.4, 1.2, 1.1}; uint8_t hashlog_[10] = {0, HASH_LOG - 2, HASH_LOG - 1, HASH_LOG, HASH_LOG, - HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG}; + HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG}; uint8_t hashlog = hashlog_[clevel]; // Initialize the hash table to distances of 0 for (unsigned i = 0; i < (1U << hashlog); i++) { @@ -371,50 +540,74 @@ return 0; } + /* When we go back in a match (shift), we obtain quite different compression properties. + * It looks like 4 is more useful in combination with bitshuffle and small typesizes + * (compress better and faster in e.g. `b2bench blosclz bitshuffle single 6 6291456 1 19`). + * Fallback to 4 because it provides more consistent results on small itemsizes. + * + * In this block we also check cratios for the beginning of the buffers and + * eventually discard those that are small (take too long to decompress). + * This process is called _entropy probing_. + */ + int ipshift = 4; + int maxlen; // maximum length for entropy probing + int csize_3b; + int csize_4b; + double cratio = 0; + switch (clevel) { + case 1: + case 2: + case 3: + maxlen = length / 8; + csize_4b = get_csize(ibase, maxlen, false); + cratio = (double)maxlen / csize_4b; + break; + case 4: + case 5: + case 6: + case 7: + case 8: + maxlen = length / 8; + csize_4b = get_csize(ibase, maxlen, false); + cratio = (double)maxlen / csize_4b; + break; + case 9: + // case 9 is special. we need to asses the optimal shift + maxlen = length / 8; + csize_3b = get_csize(ibase, maxlen, true); + csize_4b = get_csize(ibase, maxlen, false); + ipshift = (csize_3b < csize_4b) ? 3 : 4; + cratio = (csize_3b < csize_4b) ? ((double)maxlen / csize_3b) : ((double)maxlen / csize_4b); + break; + default: + break; + } + // discard probes with small compression ratios (too expensive) + if (cratio < cratio_ [clevel]) { + goto out; + } + /* we start with literal copy */ - copy = 2; + copy = 4; *op++ = MAX_COPY - 1; *op++ = *ip++; *op++ = *ip++; + *op++ = *ip++; + *op++ = *ip++; /* main loop */ - while (BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) { + while (BLOSCLZ_LIKELY(ip < ip_limit)) { const uint8_t* ref; - uint32_t distance; - uint32_t len = 4; /* minimum match length */ + unsigned distance; uint8_t* anchor = ip; /* comparison starting-point */ - if (BLOSCLZ_EXPECT_CONDITIONAL(max_skip_cycles)) { - // Enter the entropy probing mode - if (skip_cycle) { - LITERAL(ip, op, op_limit, anchor, copy) - // Start a new cycle every 256 bytes - if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip - icycle) >= BYTES_IN_CYCLE) { - skip_cycle--; - icycle = ip; - ocycle = op; - } - continue; - } - // Check whether we are doing well with compression ratios - if (BLOSCLZ_UNEXPECT_CONDITIONAL((op - ocycle) >= BYTES_IN_CYCLE)) { - cratio = (double) (ip - icycle) / (double) (op - ocycle); - if (cratio < min_cratio) { - skip_cycle = max_skip_cycles; - icycle = ip; - ocycle = op; - continue; - } - } - } - /* find potential match */ seq = BLOSCLZ_READU32(ip); HASH_FUNCTION(hval, seq, hashlog) ref = ibase + htab[hval]; /* calculate distance to the match */ - distance = (int32_t)(anchor - ref); + distance = anchor - ref; /* update hash table */ htab[hval] = (uint32_t) (anchor - ibase); @@ -425,39 +618,35 @@ } /* is this a match? check the first 4 bytes */ - if (BLOSCLZ_UNEXPECT_CONDITIONAL(BLOSCLZ_READU32(ref) == BLOSCLZ_READU32(ip))) { + if (BLOSCLZ_UNLIKELY(BLOSCLZ_READU32(ref) == BLOSCLZ_READU32(ip))) { ref += 4; - } - else { + } else { /* no luck, copy as a literal */ LITERAL(ip, op, op_limit, anchor, copy) continue; } /* last matched byte */ - ip = anchor + len; + ip = anchor + 4; /* distance is biased */ distance--; - if (BLOSCLZ_UNEXPECT_CONDITIONAL(!distance)) { - /* zero distance means a run */ -#if defined(__AVX2__) - ip = get_run_32(ip, ip_bound, ref); -#elif defined(__SSE2__) - ip = get_run_16(ip, ip_bound, ref); -#else - ip = get_run(ip, ip_bound, ref); -#endif - } - else { -#if defined(__AVX2__) - ip = get_match_32(ip, ip_bound + IP_BOUNDARY, ref); -#elif defined(__SSE2__) - ip = get_match_16(ip, ip_bound + IP_BOUNDARY, ref); -#else - ip = get_match(ip, ip_bound + IP_BOUNDARY, ref); -#endif + /* get runs or matches; zero distance means a run */ + ip = get_run_or_match(ip, ip_bound, ref, !distance); + + /* length is biased, '1' means a match of 3 bytes */ + ip -= ipshift; + + unsigned len = (int)(ip - anchor); + // If match is close, let's reduce the minimum length to encode it + unsigned minlen = (clevel == 9) ? ipshift : minlen_[clevel]; + + // Encoding short lengths is expensive during decompression + // Encode only for reasonable lengths (extensive experiments done) + if (len < minlen || (len <= 5 && distance >= MAX_DISTANCE)) { + LITERAL(ip, op, op_limit, anchor, copy) + continue; } /* if we have copied something, adjust the copy count */ @@ -467,46 +656,23 @@ else /* back, to overwrite the copy count */ op--; - /* reset literal counter */ copy = 0; - /* length is biased, '1' means a match of 3 bytes */ - ip -= 3; - len = (int32_t)(ip - anchor); - /* encode the match */ if (distance < MAX_DISTANCE) { if (len < 7) { - *op++ = (uint8_t)((len << 5U) + (distance >> 8U)); - *op++ = (uint8_t)((distance & 255U)); - } - else { - *op++ = (uint8_t)((7U << 5U) + (distance >> 8U)); - for (len -= 7; len >= 255; len -= 255) - *op++ = 255; - *op++ = (uint8_t)len; - *op++ = (uint8_t)((distance & 255U)); + DISTANCE_SHORT(op, op_limit, len, distance) + } else { + DISTANCE_LONG(op, op_limit, len, distance) } - } - else { + } else { /* far away, but not yet in the another galaxy... */ + distance -= MAX_DISTANCE; if (len < 7) { - distance -= MAX_DISTANCE; - *op++ = (uint8_t)((len << 5U) + 31); - *op++ = 255; - *op++ = (uint8_t)(distance >> 8U); - *op++ = (uint8_t)(distance & 255U); - } - else { - distance -= MAX_DISTANCE; - *op++ = (7U << 5U) + 31; - for (len -= 7; len >= 255; len -= 255) - *op++ = 255; - *op++ = (uint8_t)len; - *op++ = 255; - *op++ = (uint8_t)(distance >> 8U); - *op++ = (uint8_t)(distance & 255U); + DISTANCE_SHORT_FAR(op, op_limit, len, distance) + } else { + DISTANCE_LONG_FAR(op, op_limit, len, distance) } } @@ -518,17 +684,18 @@ HASH_FUNCTION(hval, seq, hashlog) htab[hval] = (uint32_t) (ip++ - ibase); /* assuming literal copy */ - *op++ = MAX_COPY - 1; + if (BLOSCLZ_UNLIKELY(op + 1 > op_limit)) + goto out; + *op++ = MAX_COPY - 1; } /* left-over as literal copy */ - ip_bound++; - while (BLOSCLZ_UNEXPECT_CONDITIONAL(ip <= ip_bound)) { - if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + 2 > op_limit)) goto out; + while (BLOSCLZ_UNLIKELY(ip <= ip_bound)) { + if (BLOSCLZ_UNLIKELY(op + 2 > op_limit)) goto out; *op++ = *ip++; copy++; - if (BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { + if (BLOSCLZ_UNLIKELY(copy == MAX_COPY)) { copy = 0; *op++ = MAX_COPY - 1; } @@ -547,7 +714,6 @@ out: return 0; - } // See https://habr.com/en/company/yandex/blog/457612/ @@ -605,6 +771,14 @@ } #endif +// LZ4 wildCopy which can reach excellent copy bandwidth (even if insecure) +static inline void wild_copy(uint8_t *out, const uint8_t* from, uint8_t* end) { + uint8_t* d = out; + const uint8_t* s = from; + uint8_t* const e = end; + + do { memcpy(d,s,8); d+=8; s+=8; } while (d<e); +} int blosclz_decompress(const void* input, int length, void* output, int maxout) { const uint8_t* ip = (const uint8_t*)input; @@ -612,23 +786,22 @@ uint8_t* op = (uint8_t*)output; uint32_t ctrl; uint8_t* op_limit = op + maxout; - if (BLOSCLZ_UNEXPECT_CONDITIONAL(length == 0)) { + if (BLOSCLZ_UNLIKELY(length == 0)) { return 0; } ctrl = (*ip++) & 31U; while (1) { - uint8_t* ref = op; - int32_t len = ctrl >> 5U; - int32_t ofs = (ctrl & 31U) << 8U; - if (ctrl >= 32) { + // match + int32_t len = (ctrl >> 5U) - 1 ; + int32_t ofs = (ctrl & 31U) << 8U; uint8_t code; - len--; - ref -= ofs; + const uint8_t* ref = op - ofs; + if (len == 7 - 1) { do { - if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + 1 >= ip_limit)) { + if (BLOSCLZ_UNLIKELY(ip + 1 >= ip_limit)) { return 0; } code = *ip++; @@ -636,17 +809,18 @@ } while (code == 255); } else { - if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + 1 >= ip_limit)) { + if (BLOSCLZ_UNLIKELY(ip + 1 >= ip_limit)) { return 0; } } code = *ip++; + len += 3; ref -= code; /* match from 16-bit distance */ - if (BLOSCLZ_UNEXPECT_CONDITIONAL(code == 255)) { - if (BLOSCLZ_EXPECT_CONDITIONAL(ofs == (31U << 8U))) { - if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + 1 >= ip_limit)) { + if (BLOSCLZ_UNLIKELY(code == 255)) { + if (ofs == (31U << 8U)) { + if (ip + 1 >= ip_limit) { return 0; } ofs = (*ip++) << 8U; @@ -655,29 +829,30 @@ } } - if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) { + if (BLOSCLZ_UNLIKELY(op + len > op_limit)) { return 0; } - if (BLOSCLZ_UNEXPECT_CONDITIONAL(ref - 1 < (uint8_t*)output)) { + if (BLOSCLZ_UNLIKELY(ref - 1 < (uint8_t*)output)) { return 0; } - if (BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) - ctrl = *ip++; - else - break; + if (BLOSCLZ_UNLIKELY(ip >= ip_limit)) break; + ctrl = *ip++; - if (ref == op) { + ref--; + if (ref == op - 1) { /* optimized copy for a run */ - uint8_t b = ref[-1]; - memset(op, b, len + 3); - op += len + 3; + memset(op, *ref, len); + op += len; + } + else if ((op - ref >= 8) && (op_limit - op >= len + 8)) { + // copy with an overlap not larger than 8 + wild_copy(op, ref, op + len); + op += len; } else { - /* copy from reference */ - ref--; - len += 3; + // general copy with any overlap #ifdef __AVX2__ if (op - ref <= 16) { // This is not faster on a combination of compilers (clang, gcc, icc) or machines, but @@ -686,7 +861,6 @@ } else { #endif - // We absolutely need a copy_match here op = copy_match(op, ref, (unsigned) len); #ifdef __AVX2__ } @@ -694,21 +868,23 @@ } } else { + // literal ctrl++; - if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) { + if (BLOSCLZ_UNLIKELY(op + ctrl > op_limit)) { return 0; } - if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) { + if (BLOSCLZ_UNLIKELY(ip + ctrl > ip_limit)) { return 0; } - // memcpy(op, ip, ctrl); op += ctrl; ip += ctrl; + memcpy(op, ip, ctrl); op += ctrl; ip += ctrl; // On GCC-6, fastcopy this is still faster than plain memcpy // However, using recent CLANG/LLVM 9.0, there is almost no difference // in performance. - op = fastcopy(op, ip, (unsigned) ctrl); ip += ctrl; + // And starting on CLANG/LLVM 10 and GCC 9, memcpy is generally faster. + // op = fastcopy(op, ip, (unsigned) ctrl); ip += ctrl; - if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip >= ip_limit)) break; + if (BLOSCLZ_UNLIKELY(ip >= ip_limit)) break; ctrl = *ip++; } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/shuffle.c new/c-blosc-1.20.1/blosc/shuffle.c --- old/c-blosc-1.19.0/blosc/shuffle.c 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/blosc/shuffle.c 2020-09-08 17:23:32.000000000 +0200 @@ -195,6 +195,10 @@ return ((uint64_t)edx << 32) | eax; } +#else + +#define blosc_internal_xgetbv _xgetbv + #endif // !(defined(_IMMINTRIN_H_INCLUDED) && (BLOSC_GCC_VERSION >= 900)) #endif /* defined(_MSC_FULL_VER) */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/code_of_conduct.md new/c-blosc-1.20.1/code_of_conduct.md --- old/c-blosc-1.19.0/code_of_conduct.md 1970-01-01 01:00:00.000000000 +0100 +++ new/c-blosc-1.20.1/code_of_conduct.md 2020-09-08 17:23:32.000000000 +0200 @@ -0,0 +1,5 @@ +# Code of Conduct + +The Blosc community has adopted a Code of Conduct that we expect project participants to adhere to. +Please read the [full text](https://github.com/Blosc/community/blob/master/code_of_conduct.md) +so that you can understand what actions will and will not be tolerated. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/internal-complibs/zlib-1.2.8/gzguts.h new/c-blosc-1.20.1/internal-complibs/zlib-1.2.8/gzguts.h --- old/c-blosc-1.19.0/internal-complibs/zlib-1.2.8/gzguts.h 2020-06-05 11:09:21.000000000 +0200 +++ new/c-blosc-1.20.1/internal-complibs/zlib-1.2.8/gzguts.h 2020-09-08 17:23:32.000000000 +0200 @@ -3,6 +3,10 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ +#ifndef _WIN32 + #include <unistd.h> +#endif + #ifdef _LARGEFILE64_SOURCE # ifndef _LARGEFILE_SOURCE # define _LARGEFILE_SOURCE 1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/tests/fuzz/CMakeLists.txt new/c-blosc-1.20.1/tests/fuzz/CMakeLists.txt --- old/c-blosc-1.19.0/tests/fuzz/CMakeLists.txt 1970-01-01 01:00:00.000000000 +0100 +++ new/c-blosc-1.20.1/tests/fuzz/CMakeLists.txt 2020-09-08 17:23:32.000000000 +0200 @@ -0,0 +1,62 @@ +# flags +link_directories(${PROJECT_BINARY_DIR}/blosc) + +# look for fuzzing lib and link with it if found +if(CMAKE_C_COMPILER_ID STREQUAL "Clang") + enable_language(CXX) + + if(DEFINED ENV{LIB_FUZZING_ENGINE}) + set(FUZZING_ENGINE $ENV{LIB_FUZZING_ENGINE}) + set(FUZZING_ENGINE_FOUND TRUE) + else() + find_library(FUZZING_ENGINE "FuzzingEngine") + endif() +endif() + +# If fuzzing lib not found then create standalone fuzz runner +if(NOT FUZZING_ENGINE_FOUND) + set(FUZZER_SRC standalone.c) +else() + set(FUZZER_SRC) +endif() + +# sources +file(GLOB SOURCES fuzz_*.c) + +# targets and tests +foreach(source ${SOURCES}) + get_filename_component(target ${source} NAME_WE) + + # Enable support for testing accelerated shuffles + if(COMPILER_SUPPORT_SSE2) + # Define a symbol so tests for SSE2 shuffle/unshuffle will be compiled in. + set_property( + SOURCE ${source} + APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_SSE2_ENABLED) + endif(COMPILER_SUPPORT_SSE2) +# if(COMPILER_SUPPORT_AVX2) +# # Define a symbol so tests for AVX2 shuffle/unshuffle will be compiled in. +# set_property( +# SOURCE ${source} +# APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_AVX2_ENABLED) +# endif(COMPILER_SUPPORT_AVX2) + + add_executable(${target} ${source} ${FUZZER_SRC}) + + # OSS-Fuzz expect fuzzers to end with _fuzzer + string(REPLACE "fuzz_" "" output_name ${target}) + set_target_properties(${target} PROPERTIES OUTPUT_NAME ${output_name}_fuzzer) + + if(FUZZING_ENGINE_FOUND) + set_target_properties(${target} PROPERTIES LINKER_LANGUAGE CXX) + target_link_libraries(${target} ${FUZZING_ENGINE}) + endif() + + target_link_libraries(${target} blosc_static) + add_dependencies(${target} blosc_static) + + # run standalone fuzzer against each file + file(GLOB COMPAT_FILES ${PROJECT_SOURCE_DIR}/compat/*.cdata) + add_test(NAME ${target} COMMAND ${target} ${COMPAT_FILES}) + +endforeach(source) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/tests/fuzz/fuzz_compress.c new/c-blosc-1.20.1/tests/fuzz/fuzz_compress.c --- old/c-blosc-1.19.0/tests/fuzz/fuzz_compress.c 1970-01-01 01:00:00.000000000 +0100 +++ new/c-blosc-1.20.1/tests/fuzz/fuzz_compress.c 2020-09-08 17:23:32.000000000 +0200 @@ -0,0 +1,65 @@ +#include <stdint.h> +#include <stdlib.h> + +#include "blosc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + const char *compressors[] = { "blosclz", "lz4", "lz4hc", "snappy", "zlib", "zstd" }; + int level = 9, filter = BLOSC_BITSHUFFLE, cindex = 0, i = 0; + size_t nbytes, cbytes, blocksize; + void *output, *input; + + blosc_set_nthreads(1); + + if (size > 0) + level = data[0] % (9 + 1); + if (size > 1) + filter = data[1] % (BLOSC_BITSHUFFLE + 1); + if (size > 2) + cindex = data[2]; + + /* Find next available compressor */ + while (blosc_set_compressor(compressors[cindex % 6]) == -1 && i < 6) { + cindex++, i++; + } + if (i == 6) { + /* No compressors available */ + return 0; + } + + if (size > 3 && data[3] % 7 == 0) + blosc_set_blocksize(4096); + + if (size > 4) + blosc_set_splitmode(data[4] % BLOSC_FORWARD_COMPAT_SPLIT + 1); + + output = malloc(size + 1); + if (output == NULL) + return 0; + + if (blosc_compress(level, filter, 1, size, data, output, size) == 0) { + /* Cannot compress src buffer into dest */ + free(output); + return 0; + } + + blosc_cbuffer_sizes(output, &nbytes, &cbytes, &blocksize); + + input = malloc(cbytes); + if (input != NULL) { + blosc_decompress(output, input, cbytes); + free(input); + } + + free(output); + + return 0; +} + +#ifdef __cplusplus +} +#endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/tests/fuzz/fuzz_decompress.c new/c-blosc-1.20.1/tests/fuzz/fuzz_decompress.c --- old/c-blosc-1.19.0/tests/fuzz/fuzz_decompress.c 1970-01-01 01:00:00.000000000 +0100 +++ new/c-blosc-1.20.1/tests/fuzz/fuzz_decompress.c 2020-09-08 17:23:32.000000000 +0200 @@ -0,0 +1,41 @@ +#include <stdint.h> +#include <stdlib.h> + +#include "blosc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + size_t nbytes, cbytes, blocksize; + void *output; + + if (size < BLOSC_MIN_HEADER_LENGTH) { + return 0; + } + + blosc_cbuffer_sizes(data, &nbytes, &cbytes, &blocksize); + if (cbytes != size) { + return 0; + } + if (nbytes == 0) { + return 0; + } + + if (blosc_cbuffer_validate(data, size, &nbytes) != 0) { + /* Unexpected nbytes specified in blosc header */ + return 0; + } + + output = malloc(cbytes); + if (output != NULL) { + blosc_decompress(data, output, cbytes); + free(output); + } + return 0; +} + +#ifdef __cplusplus +} +#endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/tests/fuzz/standalone.c new/c-blosc-1.20.1/tests/fuzz/standalone.c --- old/c-blosc-1.19.0/tests/fuzz/standalone.c 1970-01-01 01:00:00.000000000 +0100 +++ new/c-blosc-1.20.1/tests/fuzz/standalone.c 2020-09-08 17:23:32.000000000 +0200 @@ -0,0 +1,44 @@ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> + +extern int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size); + +int main(int argc, char **argv) { + int i; + fprintf(stderr, "Running %d inputs\n", argc - 1); + + for (i = 1; i < argc; i++) { + size_t len, err, n_read = 0; + unsigned char *buf; + FILE *f = NULL; + + f = fopen(argv[i], "rb+"); + if (f == NULL) { + /* Failed to open this file: it may be a directory. */ + fprintf(stderr, "Skipping: %s\n", argv[i]); + continue; + } + fprintf(stderr, "Running: %s %s\n", argv[0], argv[i]); + + fseek(f, 0, SEEK_END); + len = ftell(f); + fseek(f, 0, SEEK_SET); + + buf = (unsigned char *)malloc(len); + if (buf != NULL) { + n_read = fread(buf, 1, len, f); + assert(n_read == len); + LLVMFuzzerTestOneInput(buf, len); + free(buf); + } + + err = fclose(f); + assert(err == 0); + (void)err; + + fprintf(stderr, "Done: %s: (%d bytes)\n", argv[i], (int)n_read); + } + + return 0; +}