commit blosc for openSUSE:Factory

21 Dec 2020

Hello community,

here is the log from the commit of package blosc for openSUSE:Factory checked in at 2020-12-21 12:35:10
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/blosc (Old)
 and      /work/SRC/openSUSE:Factory/.blosc.new.5145 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "blosc"

Mon Dec 21 12:35:10 2020 rev:20 rq:856832 version:1.20.1

Changes:
--------

--- /work/SRC/openSUSE:Factory/blosc/blosc.changes	2020-09-04 11:02:22.490728240 +0200
+++ /work/SRC/openSUSE:Factory/.blosc.new.5145/blosc.changes	2020-12-21 12:35:17.954949548 +0100
@@ -1,0 +2,8 @@
+Fri Dec 18 08:06:48 UTC 2020 - Martin Pluskal <mpluskal@suse.com>
+
+- Update to version 1.20.1 boo#1179914 CVE-2020-29367:
+  * More saftey checks have been implemented so that potential flaws discovered by new fuzzers in OSS-Fuzzer are fixed now
+  * BloscLZ updated to 2.3.0. Expect better compression ratios for faster codecs. For details, see our new blog post: https://blosc.org/posts/beast-release/
+  * Fixed the _xgetbv() collision. Thanks to Michał Górny (@mgorny).
+
+-------------------------------------------------------------------

Old:
----
  blosc-1.19.0.tar.gz

New:
----
  blosc-1.20.1.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ blosc.spec ++++++
--- /var/tmp/diff_new_pack.8MrbQM/_old	2020-12-21 12:35:18.550950972 +0100
+++ /var/tmp/diff_new_pack.8MrbQM/_new	2020-12-21 12:35:18.550950972 +0100
@@ -19,7 +19,7 @@
 %define major   1
 %define libname lib%{name}%{major}
 Name:           blosc
-Version:        1.19.0
+Version:        1.20.1
 Release:        0
 Summary:        A blocking, shuffling and lossless compression library
 License:        MIT AND BSD-3-Clause AND BSD-2-Clause
@@ -28,10 +28,10 @@
 Source:         https://github.com/Blosc/c-blosc/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz
 BuildRequires:  cmake
 BuildRequires:  gcc-c++
-BuildRequires:  libzstd-devel
 BuildRequires:  pkgconfig
 BuildRequires:  snappy-devel
 BuildRequires:  pkgconfig(liblz4)
+BuildRequires:  pkgconfig(libzstd)
 BuildRequires:  pkgconfig(zlib)
 
 %description
@@ -100,12 +100,11 @@
 
 %files -n %{libname}
 %doc LICENSES/*.txt
-%doc ANNOUNCE.rst README.md README_THREADED.rst RELEASE_NOTES.rst THANKS.rst
+%doc ANNOUNCE.rst README.md RELEASE_NOTES.rst THANKS.rst
 %{_libdir}/libblosc.so.%{major}
 %{_libdir}/libblosc.so.%{version}
 
 %files devel
-%doc README_HEADER.rst
 %doc examples/
 %{_includedir}/blosc.h
 %{_includedir}/blosc-export.h

++++++ blosc-1.19.0.tar.gz -> blosc-1.20.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/.github/workflows/cmake.yml new/c-blosc-1.20.1/.github/workflows/cmake.yml
--- old/c-blosc-1.19.0/.github/workflows/cmake.yml	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/.github/workflows/cmake.yml	2020-09-08 17:23:32.000000000 +0200
@@ -143,7 +143,7 @@
       run: |
         mkdir ${{ matrix.build-dir || '.not-used' }}
         cd ${{ matrix.build-dir || '.' }}
-        cmake ${{ matrix.build-src-dir || '.' }} ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -DBUILD_SHARED_LIBS=OFF
+        cmake ${{ matrix.build-src-dir || '.' }} ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -DBUILD_SHARED_LIBS=OFF -DBUILD_FUZZERS=ON
       env:
         CC: ${{ matrix.compiler }}
         CFLAGS: ${{ matrix.cflags }}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/ANNOUNCE.rst new/c-blosc-1.20.1/ANNOUNCE.rst
--- old/c-blosc-1.19.0/ANNOUNCE.rst	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/ANNOUNCE.rst	2020-09-08 17:23:32.000000000 +0200
@@ -1,19 +1,14 @@
 ===============================================================
- Announcing C-Blosc 1.19.0
+ Announcing C-Blosc 1.20.1
  A blocking, shuffling and lossless compression library for C
 ===============================================================
 
 What is new?
 ============
 
-The algorithm for choosing the blocksize automatically in fast codecs
-(lz4 and blosclz) has been refined to provide better compression ratios
-and better performance on modern CPUs (L2 cache sizes >= 256KB), while
-staying reasonably fast on less powerful CPUs.
-
-Also, new versions for blosclz (2.1.0) and zstd (1.4.5) codecs have
-been integrated.  Expect better compression ratios and performance with
-these new versions too.
+This is a maintenance release.  Vendored zlib 1.2.8 is now compatible
+with Python 3.8 in recent Mac OSX.  For details, see:
+https://github.com/Blosc/python-blosc/issues/229
 
 For more info, please see the release notes in:
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/CMakeLists.txt new/c-blosc-1.20.1/CMakeLists.txt
--- old/c-blosc-1.19.0/CMakeLists.txt	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/CMakeLists.txt	2020-09-08 17:23:32.000000000 +0200
@@ -9,6 +9,8 @@
 #       build the shared library version of the Blosc library
 #   BUILD_TESTS: default ON
 #       build test programs and generates the "test" target
+#   BUILD_FUZZERS: default ON
+#       build fuzz test programs and generates the "test" target
 #   BUILD_BENCHMARKS: default ON
 #       build the benchmark program
 #   DEACTIVATE_SSE2: default OFF
@@ -93,13 +95,15 @@
 option(BUILD_SHARED
     "Build a shared library version of the blosc library." ON)
 option(BUILD_TESTS
-    "Build test programs form the blosc compression library" ON)
+    "Build test programs from the blosc compression library" ON)
+option(BUILD_FUZZERS
+    "Build fuzzer programs from the blosc compression library" ${BUILD_STATIC})
 option(BUILD_BENCHMARKS
-    "Build benchmark programs form the blosc compression library" ON)
+    "Build benchmark programs from the blosc compression library" ON)
 option(DEACTIVATE_SSE2
-        "Do not attempt to build with SSE2 instructions" OFF)
+    "Do not attempt to build with SSE2 instructions" OFF)
 option(DEACTIVATE_AVX2
-        "Do not attempt to build with AVX2 instructions" OFF)
+    "Do not attempt to build with AVX2 instructions" OFF)
 option(DEACTIVATE_LZ4
     "Do not include support for the LZ4 library." OFF)
 option(DEACTIVATE_SNAPPY
@@ -107,9 +111,9 @@
 option(DEACTIVATE_ZLIB
     "Do not include support for the Zlib library." OFF)
 option(DEACTIVATE_ZSTD
-        "Do not include support for the Zstd library." OFF)
+    "Do not include support for the Zstd library." OFF)
 option(DEACTIVATE_SYMBOLS_CHECK
-        "Do not check for symbols in shared or static libraries." ON)
+    "Do not check for symbols in shared or static libraries." ON)
 option(PREFER_EXTERNAL_LZ4
     "Find and use external LZ4 library instead of included sources." OFF)
 option(PREFER_EXTERNAL_ZLIB
@@ -318,6 +322,14 @@
     add_subdirectory(compat)
 endif(BUILD_TESTS)
 
+if(BUILD_FUZZERS)
+    if(NOT BUILD_STATIC)
+        message(FATAL_ERROR "BUILD_FUZZERS requires BUILD_STATIC to be enabled.")
+    endif()
+    enable_testing()
+    add_subdirectory(tests/fuzz)
+endif(BUILD_FUZZERS)
+
 if(BUILD_BENCHMARKS)
     add_subdirectory(bench)
 endif(BUILD_BENCHMARKS)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/CODE_OF_CONDUCT.md new/c-blosc-1.20.1/CODE_OF_CONDUCT.md
--- old/c-blosc-1.19.0/CODE_OF_CONDUCT.md	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/CODE_OF_CONDUCT.md	1970-01-01 01:00:00.000000000 +0100
@@ -1,5 +0,0 @@
-# Code of Conduct
-
-The Blosc community has adopted a Code of Conduct that we expect project participants to adhere to.
-Please read the [full text](https://github.com/Blosc/CodeOfConduct/README.md)
-so that you can understand what actions will and will not be tolerated.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/README.md new/c-blosc-1.20.1/README.md
--- old/c-blosc-1.19.0/README.md	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/README.md	2020-09-08 17:23:32.000000000 +0200
@@ -3,10 +3,9 @@
 |--------|---------|-----|
 | Blosc Development Team | blosc@blosc.org | http://www.blosc.org | 
 
-| Gitter | GH Actions | NumFOCUS |
-|--------|------------|----------|
-| [![Gitter](https://badges.gitter.im/Blosc/c-blosc.svg)](https://gitter.im/Blosc/c-blosc?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | [![CI CMake](https://github.com/Blosc/c-blosc/workflows/CI%20CMake/badge.svg)](https://github.com/Blosc/c-blosc/actions?query=workflow%3A%22CI+CMake%22) | [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org) |
-
+| Gitter | GH Actions | NumFOCUS | Code of Conduct |
+|--------|------------|----------|-----------------|
+| [![Gitter](https://badges.gitter.im/Blosc/c-blosc.svg)](https://gitter.im/Blosc/c-blosc?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | [![CI CMake](https://github.com/Blosc/c-blosc/workflows/CI%20CMake/badge.svg)](https://github.com/Blosc/c-blosc/actions?query=workflow%3A%22CI+CMake%22) | [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org) | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](code_of_conduct.md) |
 
 ## What is it?
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/README_CHUNK_FORMAT.rst new/c-blosc-1.20.1/README_CHUNK_FORMAT.rst
--- old/c-blosc-1.19.0/README_CHUNK_FORMAT.rst	1970-01-01 01:00:00.000000000 +0100
+++ new/c-blosc-1.20.1/README_CHUNK_FORMAT.rst	2020-09-08 17:23:32.000000000 +0200
@@ -0,0 +1,97 @@
+Blosc Chunk Format
+==================
+
+The chunk is composed by a header and a blocks / splits section::
+
+    +---------+--------+---------+
+    |  header | blocks / splits  |
+    +---------+--------+---------+
+
+These are described below.
+
+The header section
+------------------
+
+Blosc (as of Version 1.0.0) has the following 16 byte header that stores
+information about the compressed buffer::
+
+    |-0-|-1-|-2-|-3-|-4-|-5-|-6-|-7-|-8-|-9-|-A-|-B-|-C-|-D-|-E-|-F-|
+      ^   ^   ^   ^ |     nbytes    |   blocksize   |    cbytes     |
+      |   |   |   |
+      |   |   |   +--typesize
+      |   |   +------flags
+      |   +----------versionlz
+      +--------------version
+
+Datatypes of the header entries
+-------------------------------
+
+All entries are little endian.
+
+:version:
+    (``uint8``) Blosc format version.
+:versionlz:
+    (``uint8``) Version of the internal compressor used.
+:flags and compressor enumeration:
+    (``bitfield``) The flags of the buffer
+
+    :bit 0 (``0x01``):
+        Whether the byte-shuffle filter has been applied or not.
+    :bit 1 (``0x02``):
+        Whether the internal buffer is a pure memcpy or not.
+    :bit 2 (``0x04``):
+        Whether the bit-shuffle filter has been applied or not.
+    :bit 3 (``0x08``):
+        Reserved, must be zero.
+    :bit 4 (``0x10``):
+        If set, the blocks will not be split in sub-blocks during compression.
+    :bit 5 (``0x20``):
+        Part of the enumeration for compressors.
+    :bit 6 (``0x40``):
+        Part of the enumeration for compressors.
+    :bit 7 (``0x80``):
+        Part of the enumeration for compressors.
+
+    The last three bits form an enumeration that allows to use alternative
+    compressors.
+
+    :``0``:
+        ``blosclz``
+    :``1``:
+        ``lz4`` or ``lz4hc``
+    :``2``:
+        ``snappy``
+    :``3``:
+        ``zlib``
+    :``4``:
+        ``zstd``
+
+:typesize:
+    (``uint8``) Number of bytes for the atomic type.
+:nbytes:
+    (``uint32``) Uncompressed size of the buffer (this header is not included).
+:blocksize:
+    (``uint32``) Size of internal blocks.
+:cbytes:
+    (``uint32``) Compressed size of the buffer (including this header).
+
+The blocks / splits section
+---------------------------
+
+After the header, there come the blocks / splits section.  Blocks are equal-sized parts of the chunk, except for the last block that can be shorter or equal than the rest.
+
+At the beginning of the blocks section, there come a list of `int32_t bstarts` to indicate where the different encoded blocks starts (counting from the end of this `bstarts` section)::
+
+    +=========+=========+========+=========+
+    | bstart0 | bstart1 |   ...  | bstartN |
+    +=========+=========+========+=========+
+
+Finally, it comes the actual list of compressed blocks / splits data streams.  It turns out that a block may optionally (see bit 4 in `flags` above) be further split in so-called splits which are the actual data streams that are transmitted to codecs for compression.  If a block is not split, then the split is equivalent to a whole block.  Before each split in the list, there is the compressed size of it, expressed as an `int32_t`::
+
+    +========+========+========+========+========+========+========+
+    | csize0 | split0 | csize1 | split1 |   ...  | csizeN | splitN |
+    +========+========+========+========+========+========+========+
+
+
+*Note*: all the integers are stored in little endian.
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/README_HEADER.rst new/c-blosc-1.20.1/README_HEADER.rst
--- old/c-blosc-1.19.0/README_HEADER.rst	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/README_HEADER.rst	1970-01-01 01:00:00.000000000 +0100
@@ -1,65 +0,0 @@
-Blosc Header Format
-===================
-
-Blosc (as of Version 1.0.0) has the following 16 byte header that stores
-information about the compressed buffer::
-
-    |-0-|-1-|-2-|-3-|-4-|-5-|-6-|-7-|-8-|-9-|-A-|-B-|-C-|-D-|-E-|-F-|
-      ^   ^   ^   ^ |     nbytes    |   blocksize   |    ctbytes    |
-      |   |   |   |
-      |   |   |   +--typesize
-      |   |   +------flags
-      |   +----------versionlz
-      +--------------version
-
-Datatypes of the Header Entries
--------------------------------
-
-All entries are little endian.
-
-:version:
-    (``uint8``) Blosc format version.
-:versionlz:
-    (``uint8``) Version of the internal compressor used.
-:flags and compressor enumeration:
-    (``bitfield``) The flags of the buffer
-
-    :bit 0 (``0x01``):
-        Whether the byte-shuffle filter has been applied or not.
-    :bit 1 (``0x02``):
-        Whether the internal buffer is a pure memcpy or not.
-    :bit 2 (``0x04``):
-        Whether the bit-shuffle filter has been applied or not.
-    :bit 3 (``0x08``):
-        Reserved, must be zero.
-    :bit 4 (``0x10``):
-        If set, the blocks will not be split in sub-blocks during compression.
-    :bit 5 (``0x20``):
-        Part of the enumeration for compressors.
-    :bit 6 (``0x40``):
-        Part of the enumeration for compressors.
-    :bit 7 (``0x80``):
-        Part of the enumeration for compressors.
-
-    The last three bits form an enumeration that allows to use alternative
-    compressors.
-
-    :``0``:
-        ``blosclz``
-    :``1``:
-        ``lz4`` or ``lz4hc``
-    :``2``:
-        ``snappy``
-    :``3``:
-        ``zlib``
-    :``4``:
-        ``zstd``
-
-:typesize:
-    (``uint8``) Number of bytes for the atomic type.
-:nbytes:
-    (``uint32``) Uncompressed size of the buffer.
-:blocksize:
-    (``uint32``) Size of internal blocks.
-:ctbytes:
-    (``uint32``) Compressed size of the buffer.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/RELEASE_NOTES.rst new/c-blosc-1.20.1/RELEASE_NOTES.rst
--- old/c-blosc-1.19.0/RELEASE_NOTES.rst	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/RELEASE_NOTES.rst	2020-09-08 17:23:32.000000000 +0200
@@ -2,6 +2,38 @@
  Release notes for C-Blosc
 ===========================
 
+Changes from 1.20.0 to 1.20.1
+=============================
+
+* Added `<unistd.h>` in vendored zlib 1.2.8 for compatibility with Python 3.8
+  in recent Mac OSX.  For details, see:
+  https://github.com/Blosc/python-blosc/issues/229
+
+Changes from 1.19.1 to 1.20.0
+=============================
+
+* More saftey checks have been implemented so that potential flaws
+  discovered by new fuzzers in OSS-Fuzzer are fixed now.  Thanks to
+  Nathan Moinvaziri (@nmoinvaz).
+
+* BloscLZ updated to 2.3.0. Expect better compression ratios for faster
+  codecs.  For details, see our new blog post:
+  https://blosc.org/posts/beast-release/
+
+* Fixed the `_xgetbv()` collision. Thanks to Michał Górny (@mgorny).
+
+* The chunk format has been fully described so that 3rd party software
+  may come with a different implementation, but still compatible with
+  C-Blosc chunks.
+
+
+Changes from 1.19.0 to 1.19.1
+=============================
+
+- pthread_create() errors are now handled and propagated back to the user.
+  See https://github.com/Blosc/c-blosc/pull/299.
+
+
 Changes from 1.18.1 to 1.19.0
 =============================
 
@@ -15,6 +47,11 @@
   Also, a new OSS-Fuzz workflow has been added for increased detection
   of possible vulnerabilities.  Thanks to Nathan Moinvaziri.
 
+- For small buffers that cannot be compressed (typically < 128 bytes),
+  `blosc_compress()` returns now a 0 (cannot compress) instead of a negative
+  number (internal error).  See https://github.com/Blosc/c-blosc/pull/294.
+  Thanks to @kalvdans for providing the initial patch.
+
 - blosclz codec updated to 2.1.0.  Expect better compression ratios and
   performance in a wider variety of scenarios.
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/bench/plot-speeds.py new/c-blosc-1.20.1/bench/plot-speeds.py
--- old/c-blosc-1.19.0/bench/plot-speeds.py	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/bench/plot-speeds.py	2020-09-08 17:23:32.000000000 +0200
@@ -29,16 +29,12 @@
             tmp = line.split('-->')[1]
             parts = tmp.split(', ')
             nthreads, size, elsize, sbits, codec, shuffle = parts[:6]
-            safe = 'unsafe'
-            if len(parts) > 6:
-                safe = parts[6]
             nthreads, size, elsize, sbits = map(int, (nthreads, size, elsize, sbits))
-            values["size"] = size * NCHUNKS / MB_
+            values["size"] = size / MB_
             values["elsize"] = elsize
             values["sbits"] = sbits
             values["codec"] = codec
             values["shuffle"] = shuffle
-            values["safe"] = safe
             # New run for nthreads
             (ratios, speedsw, speedsr) = ([], [], [])
             # Add a new entry for (ratios, speedw, speedr)
@@ -47,21 +43,21 @@
         elif line.startswith('memcpy(write):'):
             tmp = line.split(',')[1]
             memcpyw = float(tmp.split(' ')[1])
-            values["memcpyw"].append(memcpyw)
+            values["memcpyw"].append(memcpyw / 1024)
         elif line.startswith('memcpy(read):'):
             tmp = line.split(',')[1]
             memcpyr = float(tmp.split(' ')[1])
-            values["memcpyr"].append(memcpyr)
+            values["memcpyr"].append(memcpyr / 1024)
         elif line.startswith('comp(write):'):
             tmp = line.split(',')[1]
             speedw = float(tmp.split(' ')[1])
             ratio = float(line.split(':')[-1])
-            speedsw.append(speedw)
+            speedsw.append(speedw / 1024)
             ratios.append(ratio)
         elif line.startswith('decomp(read):'):
             tmp = line.split(',')[1]
             speedr = float(tmp.split(' ')[1])
-            speedsr.append(speedr)
+            speedsr.append(speedr / 1024)
             if "OK" not in line:
                 print("WARNING!  OK not found in decomp line!")
 
@@ -71,7 +67,7 @@
 
 def show_plot(plots, yaxis, legends, gtitle, xmax=None, ymax=None):
     xlabel('Compresssion ratio')
-    ylabel('Speed (MB/s)')
+    ylabel('Speed (GB/s)')
     title(gtitle)
     xlim(0, xmax)
     ylim(0, ymax)
@@ -190,7 +186,7 @@
     if options.title:
         plot_title = options.title
     else:
-        plot_title += " (%(size).1f MB, %(elsize)d bytes, %(sbits)d bits), %(codec)s %(shuffle)s %(safe)s" % values
+        plot_title += " (%(size).1f MB, %(elsize)d bytes, %(sbits)d bits), %(codec)s %(shuffle)s" % values
 
     gtitle = plot_title
 
@@ -219,7 +215,7 @@
         mean = np.mean(values["memcpyr"])
         message = "memcpy (read from memory)"
     plot_ = axhline(mean, linewidth=3, linestyle='-.', color='black')
-    text(4.0, mean+400, message)
+    text(4.0, mean+.4, message)
     plots.append(plot_)
     show_plot(plots, yaxis, legends, gtitle,
               xmax=int(options.xmax) if options.xmax else None,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/CMakeLists.txt new/c-blosc-1.20.1/blosc/CMakeLists.txt
--- old/c-blosc-1.19.0/blosc/CMakeLists.txt	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/blosc/CMakeLists.txt	2020-09-08 17:23:32.000000000 +0200
@@ -86,6 +86,7 @@
     else(LZ4_FOUND)
         file(GLOB LZ4_FILES ${LZ4_LOCAL_DIR}/*.c)
         set(SOURCES ${SOURCES} ${LZ4_FILES})
+        source_group("LZ4" FILES ${LZ4_FILES})
     endif(LZ4_FOUND)
 endif(NOT DEACTIVATE_LZ4)
 
@@ -95,6 +96,7 @@
     else(SNAPPY_FOUND)
         file(GLOB SNAPPY_FILES ${SNAPPY_LOCAL_DIR}/*.cc)
         set(SOURCES ${SOURCES} ${SNAPPY_FILES})
+        source_group("Snappy" FILES ${SNAPPY_FILES})
     endif(SNAPPY_FOUND)
 endif(NOT DEACTIVATE_SNAPPY)
 
@@ -104,6 +106,7 @@
     else(ZLIB_FOUND)
         file(GLOB ZLIB_FILES ${ZLIB_LOCAL_DIR}/*.c)
         set(SOURCES ${SOURCES} ${ZLIB_FILES})
+        source_group("Zlib" FILES ${ZLIB_FILES})
     endif(ZLIB_FOUND)
 endif(NOT DEACTIVATE_ZLIB)
 
@@ -116,6 +119,7 @@
         ${ZSTD_LOCAL_DIR}/compress/*.c
         ${ZSTD_LOCAL_DIR}/decompress/*.c)
         set(SOURCES ${SOURCES} ${ZSTD_FILES})
+        source_group("Zstd" FILES ${ZSTD_FILES})
     endif (ZSTD_FOUND)
 endif (NOT DEACTIVATE_ZSTD)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/blosc.c new/c-blosc-1.20.1/blosc/blosc.c
--- old/c-blosc-1.19.0/blosc/blosc.c	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/blosc/blosc.c	2020-09-08 17:23:32.000000000 +0200
@@ -423,10 +423,10 @@
                                char* output, size_t maxout, int clevel)
 {
   int cbytes;
-  if (input_length > (size_t)(2<<30))
-    return -1;   /* input larger than 1 GB is not supported */
+  if (input_length > (size_t)(UINT32_C(2)<<30))
+    return -1;   /* input larger than 2 GB is not supported */
   /* clevel for lz4hc goes up to 12, at least in LZ4 1.7.5
-   * but levels larger than 9 does not buy much compression. */
+   * but levels larger than 9 do not buy much compression. */
   cbytes = LZ4_compress_HC(input, output, (int)input_length, (int)maxout,
                            clevel);
   return cbytes;
@@ -877,7 +877,9 @@
   (void)rc;  // just to avoid 'unused-variable' warning
 
   /* Check whether we need to restart threads */
-  blosc_set_nthreads_(context);
+  if (blosc_set_nthreads_(context) < 0) {
+    return -1;
+  }
 
   /* Set sentinels */
   context->thread_giveup_code = 1;
@@ -1976,7 +1978,9 @@
   /* Launch a new pool of threads */
   if (context->numthreads > 1 && context->numthreads != context->threads_started) {
     blosc_release_threadpool(context);
-    init_threads(context);
+    if (init_threads(context) < 0) {
+      return -1;
+    }
   }
 
   /* We have now started the threads */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/blosc.h new/c-blosc-1.20.1/blosc/blosc.h
--- old/c-blosc-1.19.0/blosc/blosc.h	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/blosc/blosc.h	2020-09-08 17:23:32.000000000 +0200
@@ -18,14 +18,14 @@
 
 /* Version numbers */
 #define BLOSC_VERSION_MAJOR    1    /* for major interface/format changes  */
-#define BLOSC_VERSION_MINOR    19   /* for minor interface/format changes  */
-#define BLOSC_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
+#define BLOSC_VERSION_MINOR    20   /* for minor interface/format changes  */
+#define BLOSC_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
 
-#define BLOSC_VERSION_STRING   "1.19.0"  /* string version.  Sync with above! */
+#define BLOSC_VERSION_STRING   "1.20.1"  /* string version.  Sync with above! */
 #define BLOSC_VERSION_REVISION "$Rev$"   /* revision version */
-#define BLOSC_VERSION_DATE     "$Date:: 2020-06-05 #$"    /* date version */
+#define BLOSC_VERSION_DATE     "$Date:: 2020-09-08 #$"    /* date version */
 
-#define BLOSCLZ_VERSION_STRING "2.1.0"   /* the internal compressor version */
+#define BLOSCLZ_VERSION_STRING "2.3.0"   /* the internal compressor version */
 
 /* The *_FORMAT symbols should be just 1-byte long */
 #define BLOSC_VERSION_FORMAT    2   /* Blosc format version, starting at 1 */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/blosclz.c new/c-blosc-1.20.1/blosc/blosclz.c
--- old/c-blosc-1.19.0/blosc/blosclz.c	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/blosc/blosclz.c	2020-09-08 17:23:32.000000000 +0200
@@ -14,19 +14,7 @@
 
 
 #include <stdio.h>
-
-#if defined(_WIN32) && !defined(__MINGW32__)
-  #include <windows.h>
-  /* stdint.h only available in VS2010 (VC++ 16.0) and newer */
-  #if defined(_MSC_VER) && _MSC_VER < 1600
-    #include "win32/stdint-windows.h"
-  #else
-    #include <stdint.h>
-  #endif
-#else
-  #include <stdint.h>
-#endif  /* _WIN32 */
-
+#include <stdbool.h>
 #include "blosclz.h"
 #include "fastcopy.h"
 #include "blosc-common.h"
@@ -36,11 +24,11 @@
  * Give hints to the compiler for branch prediction optimization.
  */
 #if defined(__GNUC__) && (__GNUC__ > 2)
-#define BLOSCLZ_EXPECT_CONDITIONAL(c)    (__builtin_expect((c), 1))
-#define BLOSCLZ_UNEXPECT_CONDITIONAL(c)  (__builtin_expect((c), 0))
+#define BLOSCLZ_LIKELY(c)    (__builtin_expect((c), 1))
+#define BLOSCLZ_UNLIKELY(c)  (__builtin_expect((c), 0))
 #else
-#define BLOSCLZ_EXPECT_CONDITIONAL(c)    (c)
-#define BLOSCLZ_UNEXPECT_CONDITIONAL(c)  (c)
+#define BLOSCLZ_LIKELY(c)    (c)
+#define BLOSCLZ_UNLIKELY(c)  (c)
 #endif
 
 /*
@@ -62,28 +50,13 @@
   #define BLOSCLZ_READU32(p) *((const uint32_t*)(p))
 #endif
 
-#define HASH_LOG (12)
+#define HASH_LOG (12U)
 
 // This is used in LZ4 and seems to work pretty well here too
-#define HASH_FUNCTION(v, s, h) {                          \
-  v = (s * 2654435761U) >> (32U - h);  \
-}
-
-
-#define LITERAL(ip, op, op_limit, anchor, copy) {        \
-  if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + 2 > op_limit))   \
-    goto out;                                            \
-  *op++ = *anchor++;                                     \
-  ip = anchor;                                           \
-  copy++;                                                \
-  if (BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) {  \
-    copy = 0;                                            \
-    *op++ = MAX_COPY-1;                                  \
-  }                                                      \
+#define HASH_FUNCTION(v, s, h) {      \
+  v = (s * 2654435761U) >> (32U - h); \
 }
 
-#define IP_BOUNDARY 2
-#define BYTES_IN_CYCLE 512
 
 #if defined(__AVX2__)
 static uint8_t *get_run_32(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
@@ -191,6 +164,7 @@
 }
 
 #else
+
 static uint8_t *get_run(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) {
   uint8_t x = ip[-1];
   int64_t value, value2;
@@ -217,6 +191,7 @@
   while ((ip < ip_bound) && (*ref++ == x)) ip++;
   return ip;
 }
+
 #endif
 
 
@@ -324,42 +299,236 @@
 #endif
 
 
+static uint8_t* get_run_or_match(uint8_t* ip, uint8_t* ip_bound, const uint8_t* ref, bool run) {
+  if (BLOSCLZ_UNLIKELY(run)) {
+#if defined(__AVX2__)
+    ip = get_run_32(ip, ip_bound, ref);
+#elif defined(__SSE2__)
+    ip = get_run_16(ip, ip_bound, ref);
+#else
+    ip = get_run(ip, ip_bound, ref);
+#endif
+  }
+  else {
+#if defined(__AVX2__)
+    ip = get_match_32(ip, ip_bound, ref);
+#elif defined(__SSE2__)
+    ip = get_match_16(ip, ip_bound, ref);
+#else
+    ip = get_match(ip, ip_bound, ref);
+#endif
+  }
+
+  return ip;
+}
+
+
+#define LITERAL(ip, op, op_limit, anchor, copy) {       \
+  if (BLOSCLZ_UNLIKELY(op + 2 > op_limit))              \
+    goto out;                                           \
+  *op++ = *anchor++;                                    \
+  ip = anchor;                                          \
+  copy++;                                               \
+  if (BLOSCLZ_UNLIKELY(copy == MAX_COPY)) {             \
+    copy = 0;                                           \
+    *op++ = MAX_COPY-1;                                 \
+  }                                                     \
+}
+
+#define LITERAL2(ip, oc, anchor, copy) {                \
+  oc++; anchor++;                                       \
+  ip = anchor;                                          \
+  copy++;                                               \
+  if (BLOSCLZ_UNLIKELY(copy == MAX_COPY)) {             \
+    copy = 0;                                           \
+    oc++;                                               \
+  }                                                     \
+}
+
+#define DISTANCE_SHORT(op, op_limit, len, distance) {   \
+  if (BLOSCLZ_UNLIKELY(op + 2 > op_limit))              \
+    goto out;                                           \
+  *op++ = (uint8_t)((len << 5U) + (distance >> 8U));    \
+  *op++ = (uint8_t)((distance & 255U));                 \
+}
+
+#define DISTANCE_LONG(op, op_limit, len, distance) {    \
+  if (BLOSCLZ_UNLIKELY(op + 1 > op_limit))              \
+    goto out;                                           \
+  *op++ = (uint8_t)((7U << 5U) + (distance >> 8U));     \
+  for (len -= 7; len >= 255; len -= 255) {              \
+    if (BLOSCLZ_UNLIKELY(op + 1 > op_limit))            \
+      goto out;                                         \
+    *op++ = 255;                                        \
+  }                                                     \
+  if (BLOSCLZ_UNLIKELY(op + 2 > op_limit))              \
+    goto out;                                           \
+  *op++ = (uint8_t)len;                                 \
+  *op++ = (uint8_t)((distance & 255U));                 \
+}
+
+#define DISTANCE_SHORT_FAR(op, op_limit, len, distance) {   \
+  if (BLOSCLZ_UNLIKELY(op + 4 > op_limit))                  \
+    goto out;                                               \
+  *op++ = (uint8_t)((len << 5U) + 31);                      \
+  *op++ = 255;                                              \
+  *op++ = (uint8_t)(distance >> 8U);                        \
+  *op++ = (uint8_t)(distance & 255U);                       \
+}
+
+#define DISTANCE_LONG_FAR(op, op_limit, len, distance) {    \
+  if (BLOSCLZ_UNLIKELY(op + 1 > op_limit))                  \
+    goto out;                                               \
+  *op++ = (7U << 5U) + 31;                                  \
+  for (len -= 7; len >= 255; len -= 255) {                  \
+    if (BLOSCLZ_UNLIKELY(op + 1 > op_limit))                \
+      goto out;                                             \
+    *op++ = 255;                                            \
+  }                                                         \
+  if (BLOSCLZ_UNLIKELY(op + 4 > op_limit))                  \
+    goto out;                                               \
+  *op++ = (uint8_t)len;                                     \
+  *op++ = 255;                                              \
+  *op++ = (uint8_t)(distance >> 8U);                        \
+  *op++ = (uint8_t)(distance & 255U);                       \
+}
+
+
+// Get the compressed size of a buffer.  Useful for testing compression ratios for high clevels.
+static int get_csize(uint8_t* ibase, int maxlen, bool force_3b_shift) {
+  uint8_t* ip = ibase;
+  int32_t oc = 0;
+  uint8_t* ip_bound = ibase + maxlen - 1;
+  uint8_t* ip_limit = ibase + maxlen - 12;
+  uint32_t htab[1U << (uint8_t)HASH_LOG];
+  uint32_t hval;
+  uint32_t seq;
+  uint8_t copy;
+
+  // Initialize the hash table to distances of 0
+  for (unsigned i = 0; i < (1U << HASH_LOG); i++) {
+    htab[i] = 0;
+  }
+
+  /* we start with literal copy */
+  copy = 4;
+  oc += 5;
+
+  /* main loop */
+  while (BLOSCLZ_LIKELY(ip < ip_limit)) {
+    const uint8_t* ref;
+    unsigned distance;
+    uint8_t* anchor = ip;    /* comparison starting-point */
+
+    /* find potential match */
+    seq = BLOSCLZ_READU32(ip);
+    HASH_FUNCTION(hval, seq, HASH_LOG)
+    ref = ibase + htab[hval];
+
+    /* calculate distance to the match */
+    distance = anchor - ref;
+
+    /* update hash table */
+    htab[hval] = (uint32_t) (anchor - ibase);
+
+    if (distance == 0 || (distance >= MAX_FARDISTANCE)) {
+      LITERAL2(ip, oc, anchor, copy)
+      continue;
+    }
+
+    /* is this a match? check the first 4 bytes */
+    if (BLOSCLZ_UNLIKELY(BLOSCLZ_READU32(ref) == BLOSCLZ_READU32(ip))) {
+      ref += 4;
+    }
+    else {
+      /* no luck, copy as a literal */
+      LITERAL2(ip, oc, anchor, copy)
+      continue;
+    }
+
+    /* last matched byte */
+    ip = anchor + 4;
+
+    /* distance is biased */
+    distance--;
+
+    /* get runs or matches; zero distance means a run */
+    ip = get_run_or_match(ip, ip_bound, ref, !distance);
+
+    ip -= force_3b_shift ? 3 : 4;
+    unsigned len = (int)(ip - anchor);
+    // If match is close, let's reduce the minimum length to encode it
+    unsigned minlen = (distance < MAX_DISTANCE) ? 3 : 4;
+    // Encoding short lengths is expensive during decompression
+    if (len < minlen) {
+      LITERAL2(ip, oc, anchor, copy)
+      continue;
+    }
+
+    /* if we have'nt copied anything, adjust the output counter */
+    if (!copy)
+      oc--;
+    /* reset literal counter */
+    copy = 0;
+
+    /* encode the match */
+    if (distance < MAX_DISTANCE) {
+      if (len >= 7) {
+        oc += ((len - 7) / 255) + 1;
+      }
+      oc += 2;
+    }
+    else {
+      /* far away, but not yet in the another galaxy... */
+      if (len >= 7) {
+        oc += ((len - 7) / 255) + 1;
+      }
+      oc += 4;
+    }
+
+    /* update the hash at match boundary */
+    seq = BLOSCLZ_READU32(ip);
+    HASH_FUNCTION(hval, seq, HASH_LOG)
+    htab[hval] = (uint32_t) (ip++ - ibase);
+    seq >>= 8U;
+    HASH_FUNCTION(hval, seq, HASH_LOG)
+    htab[hval] = (uint32_t) (ip++ - ibase);
+    /* assuming literal copy */
+    oc++;
+
+  }
+
+  /* if we have copied something, adjust the copy length */
+  if (!copy)
+    oc--;
+
+  return (int)oc;
+}
+
+
 int blosclz_compress(const int clevel, const void* input, int length,
                      void* output, int maxout) {
   uint8_t* ibase = (uint8_t*)input;
   uint8_t* ip = ibase;
-  uint8_t* icycle = ibase;
-  uint8_t* ip_bound = ibase + length - IP_BOUNDARY;
+  uint8_t* ip_bound = ibase + length - 1;
   uint8_t* ip_limit = ibase + length - 12;
   uint8_t* op = (uint8_t*)output;
-  uint8_t* ocycle = op;
   uint8_t* op_limit;
   uint32_t htab[1U << (uint8_t)HASH_LOG];
   uint32_t hval;
   uint32_t seq;
   uint8_t copy;
-  long skip_cycle = 0;
-  double cratio;
 
-  // Minimum cratios before issuing and _early giveup_
-  // Remind that blosclz is not meant for cratios <= 2 (too costly to decompress)
-  double maxlength_[10] = {-1, .07, .1, .2, .4, .5, .5, .5, .5, .6};
-  int32_t maxlength = (int32_t)(length * maxlength_[clevel]);
-  if (maxlength > (int32_t)maxout) {
-    maxlength = (int32_t)maxout;
-  }
-  op_limit = op + maxlength;
-
-  // The maximum amount of cycles to skip match lookups
-  // A 0 means just _early giveup_ whereas > 0 use _entropy sensing_ too
-  long max_skip_cycles_[10] = {255, 0, 0, 0, 3, 2, 2, 1, 1, 0};
-  long max_skip_cycles = max_skip_cycles_[clevel];
-  // The minimum compression ratio before skipping a number of cycles
-  double min_cratio_[10] = {-1, 0., 0., 0., 5., 4., 4., 3., 2., 1.};
-  double min_cratio = min_cratio_[clevel];
+  op_limit = op + maxout;
+
+  // Minimum lengths for encoding
+  unsigned minlen_[10] = {0, 12, 12, 11, 10, 9, 8, 7, 6, 5};
+
+  // Minimum compression ratios for initiate encoding
+  double cratio_[10] = {0, 2, 2, 2, 2, 1.8, 1.6, 1.4, 1.2, 1.1};
 
   uint8_t hashlog_[10] = {0, HASH_LOG - 2, HASH_LOG - 1, HASH_LOG, HASH_LOG,
-                           HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG};
+                          HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG};
   uint8_t hashlog = hashlog_[clevel];
   // Initialize the hash table to distances of 0
   for (unsigned i = 0; i < (1U << hashlog); i++) {
@@ -371,50 +540,74 @@
     return 0;
   }
 
+  /* When we go back in a match (shift), we obtain quite different compression properties.
+   * It looks like 4 is more useful in combination with bitshuffle and small typesizes
+   * (compress better and faster in e.g. `b2bench blosclz bitshuffle single 6 6291456 1 19`).
+   * Fallback to 4 because it provides more consistent results on small itemsizes.
+   *
+   * In this block we also check cratios for the beginning of the buffers and
+   * eventually discard those that are small (take too long to decompress).
+   * This process is called _entropy probing_.
+   */
+  int ipshift = 4;
+  int maxlen;  // maximum length for entropy probing
+  int csize_3b;
+  int csize_4b;
+  double cratio = 0;
+  switch (clevel) {
+    case 1:
+    case 2:
+    case 3:
+      maxlen = length / 8;
+      csize_4b = get_csize(ibase, maxlen, false);
+      cratio = (double)maxlen / csize_4b;
+      break;
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+    case 8:
+      maxlen = length / 8;
+      csize_4b = get_csize(ibase, maxlen, false);
+      cratio = (double)maxlen / csize_4b;
+      break;
+    case 9:
+      // case 9 is special.  we need to asses the optimal shift
+      maxlen = length / 8;
+      csize_3b = get_csize(ibase, maxlen, true);
+      csize_4b = get_csize(ibase, maxlen, false);
+      ipshift = (csize_3b < csize_4b) ? 3 : 4;
+      cratio = (csize_3b < csize_4b) ? ((double)maxlen / csize_3b) : ((double)maxlen / csize_4b);
+      break;
+    default:
+      break;
+  }
+  // discard probes with small compression ratios (too expensive)
+  if (cratio < cratio_ [clevel]) {
+    goto out;
+  }
+
   /* we start with literal copy */
-  copy = 2;
+  copy = 4;
   *op++ = MAX_COPY - 1;
   *op++ = *ip++;
   *op++ = *ip++;
+  *op++ = *ip++;
+  *op++ = *ip++;
 
   /* main loop */
-  while (BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) {
+  while (BLOSCLZ_LIKELY(ip < ip_limit)) {
     const uint8_t* ref;
-    uint32_t distance;
-    uint32_t len = 4;         /* minimum match length */
+    unsigned distance;
     uint8_t* anchor = ip;    /* comparison starting-point */
 
-    if (BLOSCLZ_EXPECT_CONDITIONAL(max_skip_cycles)) {
-      // Enter the entropy probing mode
-      if (skip_cycle) {
-        LITERAL(ip, op, op_limit, anchor, copy)
-        // Start a new cycle every 256 bytes
-        if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip - icycle) >= BYTES_IN_CYCLE) {
-          skip_cycle--;
-          icycle = ip;
-          ocycle = op;
-        }
-        continue;
-      }
-      // Check whether we are doing well with compression ratios
-      if (BLOSCLZ_UNEXPECT_CONDITIONAL((op - ocycle) >= BYTES_IN_CYCLE)) {
-        cratio = (double) (ip - icycle) / (double) (op - ocycle);
-        if (cratio < min_cratio) {
-          skip_cycle = max_skip_cycles;
-          icycle = ip;
-          ocycle = op;
-          continue;
-        }
-      }
-    }
-
     /* find potential match */
     seq = BLOSCLZ_READU32(ip);
     HASH_FUNCTION(hval, seq, hashlog)
     ref = ibase + htab[hval];
 
     /* calculate distance to the match */
-    distance = (int32_t)(anchor - ref);
+    distance = anchor - ref;
 
     /* update hash table */
     htab[hval] = (uint32_t) (anchor - ibase);
@@ -425,39 +618,35 @@
     }
 
     /* is this a match? check the first 4 bytes */
-    if (BLOSCLZ_UNEXPECT_CONDITIONAL(BLOSCLZ_READU32(ref) == BLOSCLZ_READU32(ip))) {
+    if (BLOSCLZ_UNLIKELY(BLOSCLZ_READU32(ref) == BLOSCLZ_READU32(ip))) {
       ref += 4;
-    }
-    else {
+    } else {
       /* no luck, copy as a literal */
       LITERAL(ip, op, op_limit, anchor, copy)
       continue;
     }
 
     /* last matched byte */
-    ip = anchor + len;
+    ip = anchor + 4;
 
     /* distance is biased */
     distance--;
 
-    if (BLOSCLZ_UNEXPECT_CONDITIONAL(!distance)) {
-      /* zero distance means a run */
-#if defined(__AVX2__)
-      ip = get_run_32(ip, ip_bound, ref);
-#elif defined(__SSE2__)
-      ip = get_run_16(ip, ip_bound, ref);
-#else
-      ip = get_run(ip, ip_bound, ref);
-#endif
-    }
-    else {
-#if defined(__AVX2__)
-      ip = get_match_32(ip, ip_bound + IP_BOUNDARY, ref);
-#elif defined(__SSE2__)
-      ip = get_match_16(ip, ip_bound + IP_BOUNDARY, ref);
-#else
-      ip = get_match(ip, ip_bound + IP_BOUNDARY, ref);
-#endif
+    /* get runs or matches; zero distance means a run */
+    ip = get_run_or_match(ip, ip_bound, ref, !distance);
+
+    /* length is biased, '1' means a match of 3 bytes */
+    ip -= ipshift;
+
+    unsigned len = (int)(ip - anchor);
+    // If match is close, let's reduce the minimum length to encode it
+    unsigned minlen = (clevel == 9) ? ipshift : minlen_[clevel];
+
+    // Encoding short lengths is expensive during decompression
+    // Encode only for reasonable lengths (extensive experiments done)
+    if (len < minlen || (len <= 5 && distance >= MAX_DISTANCE)) {
+      LITERAL(ip, op, op_limit, anchor, copy)
+      continue;
     }
 
     /* if we have copied something, adjust the copy count */
@@ -467,46 +656,23 @@
     else
       /* back, to overwrite the copy count */
       op--;
-
     /* reset literal counter */
     copy = 0;
 
-    /* length is biased, '1' means a match of 3 bytes */
-    ip -= 3;
-    len = (int32_t)(ip - anchor);
-
     /* encode the match */
     if (distance < MAX_DISTANCE) {
       if (len < 7) {
-        *op++ = (uint8_t)((len << 5U) + (distance >> 8U));
-        *op++ = (uint8_t)((distance & 255U));
-      }
-      else {
-        *op++ = (uint8_t)((7U << 5U) + (distance >> 8U));
-        for (len -= 7; len >= 255; len -= 255)
-          *op++ = 255;
-        *op++ = (uint8_t)len;
-        *op++ = (uint8_t)((distance & 255U));
+        DISTANCE_SHORT(op, op_limit, len, distance)
+      } else {
+        DISTANCE_LONG(op, op_limit, len, distance)
       }
-    }
-    else {
+    } else {
       /* far away, but not yet in the another galaxy... */
+      distance -= MAX_DISTANCE;
       if (len < 7) {
-        distance -= MAX_DISTANCE;
-        *op++ = (uint8_t)((len << 5U) + 31);
-        *op++ = 255;
-        *op++ = (uint8_t)(distance >> 8U);
-        *op++ = (uint8_t)(distance & 255U);
-      }
-      else {
-        distance -= MAX_DISTANCE;
-        *op++ = (7U << 5U) + 31;
-        for (len -= 7; len >= 255; len -= 255)
-          *op++ = 255;
-        *op++ = (uint8_t)len;
-        *op++ = 255;
-        *op++ = (uint8_t)(distance >> 8U);
-        *op++ = (uint8_t)(distance & 255U);
+        DISTANCE_SHORT_FAR(op, op_limit, len, distance)
+      } else {
+        DISTANCE_LONG_FAR(op, op_limit, len, distance)
       }
     }
 
@@ -518,17 +684,18 @@
     HASH_FUNCTION(hval, seq, hashlog)
     htab[hval] = (uint32_t) (ip++ - ibase);
     /* assuming literal copy */
-    *op++ = MAX_COPY - 1;
 
+    if (BLOSCLZ_UNLIKELY(op + 1 > op_limit))
+      goto out;
+    *op++ = MAX_COPY - 1;
   }
 
   /* left-over as literal copy */
-  ip_bound++;
-  while (BLOSCLZ_UNEXPECT_CONDITIONAL(ip <= ip_bound)) {
-    if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + 2 > op_limit)) goto out;
+  while (BLOSCLZ_UNLIKELY(ip <= ip_bound)) {
+    if (BLOSCLZ_UNLIKELY(op + 2 > op_limit)) goto out;
     *op++ = *ip++;
     copy++;
-    if (BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) {
+    if (BLOSCLZ_UNLIKELY(copy == MAX_COPY)) {
       copy = 0;
       *op++ = MAX_COPY - 1;
     }
@@ -547,7 +714,6 @@
 
   out:
   return 0;
-
 }
 
 // See https://habr.com/en/company/yandex/blog/457612/
@@ -605,6 +771,14 @@
 }
 #endif
 
+// LZ4 wildCopy which can reach excellent copy bandwidth (even if insecure)
+static inline void wild_copy(uint8_t *out, const uint8_t* from, uint8_t* end) {
+  uint8_t* d = out;
+  const uint8_t* s = from;
+  uint8_t* const e = end;
+
+  do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
+}
 
 int blosclz_decompress(const void* input, int length, void* output, int maxout) {
   const uint8_t* ip = (const uint8_t*)input;
@@ -612,23 +786,22 @@
   uint8_t* op = (uint8_t*)output;
   uint32_t ctrl;
   uint8_t* op_limit = op + maxout;
-  if (BLOSCLZ_UNEXPECT_CONDITIONAL(length == 0)) {
+  if (BLOSCLZ_UNLIKELY(length == 0)) {
     return 0;
   }
   ctrl = (*ip++) & 31U;
 
   while (1) {
-    uint8_t* ref = op;
-    int32_t len = ctrl >> 5U;
-    int32_t ofs = (ctrl & 31U) << 8U;
-
     if (ctrl >= 32) {
+      // match
+      int32_t len = (ctrl >> 5U) - 1 ;
+      int32_t ofs = (ctrl & 31U) << 8U;
       uint8_t code;
-      len--;
-      ref -= ofs;
+      const uint8_t* ref = op - ofs;
+
       if (len == 7 - 1) {
         do {
-          if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + 1 >= ip_limit)) {
+          if (BLOSCLZ_UNLIKELY(ip + 1 >= ip_limit)) {
             return 0;
           }
           code = *ip++;
@@ -636,17 +809,18 @@
         } while (code == 255);
       }
       else {
-        if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + 1 >= ip_limit)) {
+        if (BLOSCLZ_UNLIKELY(ip + 1 >= ip_limit)) {
           return 0;
         }
       }
       code = *ip++;
+      len += 3;
       ref -= code;
 
       /* match from 16-bit distance */
-      if (BLOSCLZ_UNEXPECT_CONDITIONAL(code == 255)) {
-        if (BLOSCLZ_EXPECT_CONDITIONAL(ofs == (31U << 8U))) {
-          if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + 1 >= ip_limit)) {
+      if (BLOSCLZ_UNLIKELY(code == 255)) {
+        if (ofs == (31U << 8U)) {
+          if (ip + 1 >= ip_limit) {
             return 0;
           }
           ofs = (*ip++) << 8U;
@@ -655,29 +829,30 @@
         }
       }
 
-      if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) {
+      if (BLOSCLZ_UNLIKELY(op + len > op_limit)) {
         return 0;
       }
 
-      if (BLOSCLZ_UNEXPECT_CONDITIONAL(ref - 1 < (uint8_t*)output)) {
+      if (BLOSCLZ_UNLIKELY(ref - 1 < (uint8_t*)output)) {
         return 0;
       }
 
-      if (BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit))
-        ctrl = *ip++;
-      else
-        break;
+      if (BLOSCLZ_UNLIKELY(ip >= ip_limit)) break;
+      ctrl = *ip++;
 
-      if (ref == op) {
+      ref--;
+      if (ref == op - 1) {
         /* optimized copy for a run */
-        uint8_t b = ref[-1];
-        memset(op, b, len + 3);
-        op += len + 3;
+        memset(op, *ref, len);
+        op += len;
+      }
+      else if ((op - ref >= 8) && (op_limit - op >= len + 8)) {
+        // copy with an overlap not larger than 8
+        wild_copy(op, ref, op + len);
+        op += len;
       }
       else {
-        /* copy from reference */
-        ref--;
-        len += 3;
+        // general copy with any overlap
 #ifdef __AVX2__
         if (op - ref <= 16) {
           // This is not faster on a combination of compilers (clang, gcc, icc) or machines, but
@@ -686,7 +861,6 @@
         }
         else {
 #endif
-          // We absolutely need a copy_match here
           op = copy_match(op, ref, (unsigned) len);
 #ifdef __AVX2__
         }
@@ -694,21 +868,23 @@
       }
     }
     else {
+      // literal
       ctrl++;
-      if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) {
+      if (BLOSCLZ_UNLIKELY(op + ctrl > op_limit)) {
         return 0;
       }
-      if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) {
+      if (BLOSCLZ_UNLIKELY(ip + ctrl > ip_limit)) {
         return 0;
       }
 
-      // memcpy(op, ip, ctrl); op += ctrl; ip += ctrl;
+      memcpy(op, ip, ctrl); op += ctrl; ip += ctrl;
       // On GCC-6, fastcopy this is still faster than plain memcpy
       // However, using recent CLANG/LLVM 9.0, there is almost no difference
       // in performance.
-      op = fastcopy(op, ip, (unsigned) ctrl); ip += ctrl;
+      // And starting on CLANG/LLVM 10 and GCC 9, memcpy is generally faster.
+      // op = fastcopy(op, ip, (unsigned) ctrl); ip += ctrl;
 
-      if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip >= ip_limit)) break;
+      if (BLOSCLZ_UNLIKELY(ip >= ip_limit)) break;
       ctrl = *ip++;
     }
   }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/blosc/shuffle.c new/c-blosc-1.20.1/blosc/shuffle.c
--- old/c-blosc-1.19.0/blosc/shuffle.c	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/blosc/shuffle.c	2020-09-08 17:23:32.000000000 +0200
@@ -195,6 +195,10 @@
   return ((uint64_t)edx << 32) | eax;
 }
 
+#else
+
+#define blosc_internal_xgetbv _xgetbv
+
 #endif  // !(defined(_IMMINTRIN_H_INCLUDED) && (BLOSC_GCC_VERSION >= 900))
 #endif  /* defined(_MSC_FULL_VER) */
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/code_of_conduct.md new/c-blosc-1.20.1/code_of_conduct.md
--- old/c-blosc-1.19.0/code_of_conduct.md	1970-01-01 01:00:00.000000000 +0100
+++ new/c-blosc-1.20.1/code_of_conduct.md	2020-09-08 17:23:32.000000000 +0200
@@ -0,0 +1,5 @@
+# Code of Conduct
+
+The Blosc community has adopted a Code of Conduct that we expect project participants to adhere to.
+Please read the [full text](https://github.com/Blosc/community/blob/master/code_of_conduct.md)
+so that you can understand what actions will and will not be tolerated.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/internal-complibs/zlib-1.2.8/gzguts.h new/c-blosc-1.20.1/internal-complibs/zlib-1.2.8/gzguts.h
--- old/c-blosc-1.19.0/internal-complibs/zlib-1.2.8/gzguts.h	2020-06-05 11:09:21.000000000 +0200
+++ new/c-blosc-1.20.1/internal-complibs/zlib-1.2.8/gzguts.h	2020-09-08 17:23:32.000000000 +0200
@@ -3,6 +3,10 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
+#ifndef _WIN32
+  #include <unistd.h>
+#endif
+
 #ifdef _LARGEFILE64_SOURCE
 #  ifndef _LARGEFILE_SOURCE
 #    define _LARGEFILE_SOURCE 1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/tests/fuzz/CMakeLists.txt new/c-blosc-1.20.1/tests/fuzz/CMakeLists.txt
--- old/c-blosc-1.19.0/tests/fuzz/CMakeLists.txt	1970-01-01 01:00:00.000000000 +0100
+++ new/c-blosc-1.20.1/tests/fuzz/CMakeLists.txt	2020-09-08 17:23:32.000000000 +0200
@@ -0,0 +1,62 @@
+# flags
+link_directories(${PROJECT_BINARY_DIR}/blosc)
+
+# look for fuzzing lib and link with it if found
+if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
+    enable_language(CXX)
+
+    if(DEFINED ENV{LIB_FUZZING_ENGINE})
+        set(FUZZING_ENGINE $ENV{LIB_FUZZING_ENGINE})
+        set(FUZZING_ENGINE_FOUND TRUE)
+    else()
+        find_library(FUZZING_ENGINE "FuzzingEngine")
+    endif()
+endif()
+
+# If fuzzing lib not found then create standalone fuzz runner
+if(NOT FUZZING_ENGINE_FOUND)
+    set(FUZZER_SRC standalone.c)
+else()
+    set(FUZZER_SRC)
+endif()
+
+# sources
+file(GLOB SOURCES fuzz_*.c)
+
+# targets and tests
+foreach(source ${SOURCES})
+    get_filename_component(target ${source} NAME_WE)
+
+    # Enable support for testing accelerated shuffles
+    if(COMPILER_SUPPORT_SSE2)
+        # Define a symbol so tests for SSE2 shuffle/unshuffle will be compiled in.
+        set_property(
+            SOURCE ${source}
+            APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_SSE2_ENABLED)
+    endif(COMPILER_SUPPORT_SSE2)
+#    if(COMPILER_SUPPORT_AVX2)
+#        # Define a symbol so tests for AVX2 shuffle/unshuffle will be compiled in.
+#        set_property(
+#            SOURCE ${source}
+#            APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_AVX2_ENABLED)
+#    endif(COMPILER_SUPPORT_AVX2)
+
+    add_executable(${target} ${source} ${FUZZER_SRC})
+
+    # OSS-Fuzz expect fuzzers to end with _fuzzer
+    string(REPLACE "fuzz_" "" output_name ${target})
+    set_target_properties(${target} PROPERTIES OUTPUT_NAME ${output_name}_fuzzer)
+
+    if(FUZZING_ENGINE_FOUND)
+        set_target_properties(${target} PROPERTIES LINKER_LANGUAGE CXX)
+        target_link_libraries(${target} ${FUZZING_ENGINE})
+    endif()
+
+    target_link_libraries(${target} blosc_static)
+    add_dependencies(${target} blosc_static)
+
+    # run standalone fuzzer against each file
+    file(GLOB COMPAT_FILES ${PROJECT_SOURCE_DIR}/compat/*.cdata)
+    add_test(NAME ${target} COMMAND ${target} ${COMPAT_FILES})
+
+endforeach(source)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/tests/fuzz/fuzz_compress.c new/c-blosc-1.20.1/tests/fuzz/fuzz_compress.c
--- old/c-blosc-1.19.0/tests/fuzz/fuzz_compress.c	1970-01-01 01:00:00.000000000 +0100
+++ new/c-blosc-1.20.1/tests/fuzz/fuzz_compress.c	2020-09-08 17:23:32.000000000 +0200
@@ -0,0 +1,65 @@
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "blosc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  const char *compressors[] = { "blosclz", "lz4", "lz4hc", "snappy", "zlib", "zstd" };
+  int level = 9, filter = BLOSC_BITSHUFFLE, cindex = 0, i = 0;
+  size_t nbytes, cbytes, blocksize;
+  void *output, *input;
+
+  blosc_set_nthreads(1);
+
+  if (size > 0)
+    level = data[0] % (9 + 1);
+  if (size > 1)
+    filter = data[1] % (BLOSC_BITSHUFFLE + 1);
+  if (size > 2)
+    cindex = data[2];
+
+  /* Find next available compressor */
+  while (blosc_set_compressor(compressors[cindex % 6]) == -1 && i < 6) {
+    cindex++, i++;
+  }
+  if (i == 6) {
+    /* No compressors available */
+    return 0;
+  }
+
+  if (size > 3 && data[3] % 7 == 0)
+    blosc_set_blocksize(4096);
+
+  if (size > 4)
+    blosc_set_splitmode(data[4] % BLOSC_FORWARD_COMPAT_SPLIT + 1);
+
+  output = malloc(size + 1);
+  if (output == NULL)
+    return 0;
+
+  if (blosc_compress(level, filter, 1, size, data, output, size) == 0) {
+    /* Cannot compress src buffer into dest */
+    free(output);
+    return 0;
+  }
+
+  blosc_cbuffer_sizes(output, &nbytes, &cbytes, &blocksize);
+
+  input = malloc(cbytes);
+  if (input != NULL) {
+    blosc_decompress(output, input, cbytes);
+    free(input);
+  }
+
+  free(output);
+
+  return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/tests/fuzz/fuzz_decompress.c new/c-blosc-1.20.1/tests/fuzz/fuzz_decompress.c
--- old/c-blosc-1.19.0/tests/fuzz/fuzz_decompress.c	1970-01-01 01:00:00.000000000 +0100
+++ new/c-blosc-1.20.1/tests/fuzz/fuzz_decompress.c	2020-09-08 17:23:32.000000000 +0200
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "blosc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  size_t nbytes, cbytes, blocksize;
+  void *output;
+
+  if (size < BLOSC_MIN_HEADER_LENGTH) {
+    return 0;
+  }
+
+  blosc_cbuffer_sizes(data, &nbytes, &cbytes, &blocksize);
+  if (cbytes != size) {
+    return 0;
+  }
+  if (nbytes == 0) {
+    return 0;
+  }
+  
+  if (blosc_cbuffer_validate(data, size, &nbytes) != 0) {
+    /* Unexpected nbytes specified in blosc header */
+    return 0;
+  }
+
+  output = malloc(cbytes);
+  if (output != NULL) {
+    blosc_decompress(data, output, cbytes);
+    free(output);
+  }
+  return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/c-blosc-1.19.0/tests/fuzz/standalone.c new/c-blosc-1.20.1/tests/fuzz/standalone.c
--- old/c-blosc-1.19.0/tests/fuzz/standalone.c	1970-01-01 01:00:00.000000000 +0100
+++ new/c-blosc-1.20.1/tests/fuzz/standalone.c	2020-09-08 17:23:32.000000000 +0200
@@ -0,0 +1,44 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size);
+
+int main(int argc, char **argv) {
+  int i;
+  fprintf(stderr, "Running %d inputs\n", argc - 1);
+
+  for (i = 1; i < argc; i++) {
+    size_t len, err, n_read = 0;
+    unsigned char *buf;
+    FILE *f = NULL;
+
+    f = fopen(argv[i], "rb+");
+    if (f == NULL) {
+      /* Failed to open this file: it may be a directory. */
+      fprintf(stderr, "Skipping: %s\n", argv[i]);
+      continue;
+    }
+    fprintf(stderr, "Running: %s %s\n", argv[0], argv[i]);
+
+    fseek(f, 0, SEEK_END);
+    len = ftell(f);
+    fseek(f, 0, SEEK_SET);
+
+    buf = (unsigned char *)malloc(len);
+    if (buf != NULL) {
+      n_read = fread(buf, 1, len, f);
+      assert(n_read == len);
+      LLVMFuzzerTestOneInput(buf, len);
+      free(buf);
+    }
+
+    err = fclose(f);
+    assert(err == 0);
+    (void)err;
+
+    fprintf(stderr, "Done:    %s: (%d bytes)\n", argv[i], (int)n_read);
+  }
+
+  return 0;
+}

    

User for buildservice source handling

tags

participants (1)