Hello community,
here is the log from the commit of package tensorflow for openSUSE:Factory checked in at 2019-07-22 12:20:01
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/tensorflow (Old)
and /work/SRC/openSUSE:Factory/.tensorflow.new.4126 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "tensorflow"
Mon Jul 22 12:20:01 2019 rev:5 rq:716088 version:1.13.1
Changes:
--------
--- /work/SRC/openSUSE:Factory/tensorflow/tensorflow.changes 2019-01-21 10:51:01.095970939 +0100
+++ /work/SRC/openSUSE:Factory/.tensorflow.new.4126/tensorflow.changes 2019-07-22 12:20:09.555668006 +0200
@@ -1,0 +2,93 @@
+Wed Jul 17 08:18:34 UTC 2019 - Christian Goll
+
+- fixed installation location of shared library
+
+-------------------------------------------------------------------
+Mon Jul 8 14:04:17 UTC 2019 - Christian Goll
+
+- removed bazel mirror from as much source links as possible
+- added support-new-bazel.patch support newer upcoming bazel
+ versions
+
+-------------------------------------------------------------------
+Tue Jun 4 14:16:10 UTC 2019 - Guillaume GARDET
+
+- Fix build for lite flavor:
+ * tensorflow-fix_lite.patch
+
+-------------------------------------------------------------------
+Wed May 29 16:11:36 UTC 2019 - Guillaume GARDET
+
+- Call ldconfig for devel package in post/postun
+
+-------------------------------------------------------------------
+Mon May 27 15:00:28 UTC 2019 - Guillaume GARDET
+
+- Fix aarch64 build with upstream patch:
+ * tensorflow-make_aws_sdk_work_on_aarch64.patch
+
+-------------------------------------------------------------------
+Mon May 27 04:08:54 UTC 2019 - Guillaume GARDET
+
+- Add Lite flavor
+
+-------------------------------------------------------------------
+Fri Apr 26 08:27:55 UTC 2019 - Christian Goll
+
+- updated to 1.13.1 fixes boo#1133490
+
+-------------------------------------------------------------------
+Fri Mar 29 13:06:28 UTC 2019 - Guillaume GARDET
+
+- Update _constraints to avoid OOM errors
+
+-------------------------------------------------------------------
+Fri Mar 29 08:18:09 UTC 2019 - Guillaume GARDET
+
+- Build and package libtensorflow_cc and libtensorflow_framework
+
+-------------------------------------------------------------------
+Tue Mar 19 15:40:25 UTC 2019 - Christian Goll
+
+- added fix_mvapich_mpi_bzl.patch which fixes detection of
+ mvapich2 mpi library
+- fixed python3 build
+
+-------------------------------------------------------------------
+Tue Mar 12 20:33:56 UTC 2019 - Adrian Schröter
+
+- update to version 1.13.1
+ * Major Features and Improvements
+ * TensorFlow Lite has moved from contrib to core. This means that Python modules are under tf.lite and source code is now under tensorflow/lite rather than tensorflow/contrib/lite.
+ * TensorFlow GPU binaries are now built against CUDA 10 and TensorRT 5.0.
+ * Support for Python3.7 on all operating systems.
+ * Moved NCCL to core.
+- drop merged patch mpilibpath_configure_py.patch
+- drop obsolete pyton3.7 patches
+- disabled jemalloc for now
+
+-------------------------------------------------------------------
+Tue Feb 12 08:39:57 UTC 2019 - cgoll@suse.com
+
+- enabled aws and googlecloud support
+ * removed no_aws_and_googlecloud.patch
+
+-------------------------------------------------------------------
+Mon Feb 11 16:27:20 UTC 2019 - Christian Goll
+
+- Fixed build issues with python 3.7 what introduced the patches
+ * python3_7_compatibility.patch backported from upstream
+ * python3.7_unicode.patch fixes a minor function call
+ * python3.7_async_keyword.patch avoids the new keyword async
+
+-------------------------------------------------------------------
+Thu Jan 31 11:44:21 UTC 2019 - Bernhard Wiedemann
+
+- Fix build with python 3.7
+
+-------------------------------------------------------------------
+Fri Jan 18 16:45:48 UTC 2019 - Guillaume GARDET
+
+- Build and package libtensorflow.so as some packages may link to it
+
+-------------------------------------------------------------------
Old:
----
mpilibpath_configure_py.patch
no_aws_and_googlecloud.patch
protobuf_v3.6.0.tar.gz
re2-2018-04-01.tar.gz
tensorflow-1.10.0.tar.gz
New:
----
816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz
aws-sdk-cpp-1.3.15.tar.gz
bazel-toolchains.tar.gz
fft.tgz
fix_mvapich_mpi_bzl.patch
google-cloud-cpp.tar.gz
google-flatbuffers-1.10.0~pre.tar.gz
google-nsync-1.20.1.tar.gz
grpc.tar.gz
kafka-v0.11.5.tar.gz
keras-applications-1.0.6.tar.gz
keras-preprocessing-1.0.9.tar.gz
license.rst.txt
master.zip
nanopb.tar.gz
protobuf_v3.6.1.2.tar.gz
re2-2018-10-01.tar.gz
release-1.8.0.tar.gz
rules_docker.tar.gz
support-new-bazel.patch
tensorflow-1.13.1.tar.gz
tensorflow-fix_lite.patch
tensorflow-make_aws_sdk_work_on_aarch64.patch
unicode-org-icu.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ tensorflow.spec ++++++
--- /var/tmp/diff_new_pack.3GoBTM/_old 2019-07-22 12:20:14.283666766 +0200
+++ /var/tmp/diff_new_pack.3GoBTM/_new 2019-07-22 12:20:14.287666765 +0200
@@ -18,18 +18,29 @@
#
%define pname tensorflow
-%define vers 1.10.0
-%define _vers 1_10_10
-%define python_ver_hack python3.6
+%define vers 1.13.1
+%define _vers 1_13_1
+%define python_ver_hack python3.[0-9]
%global flavor @BUILD_FLAVOR@%{nil}
+# Build tensorflow, not Tensorflow-lite
+%define is_lite 0
+
%if "%{flavor}" == "standard"
%bcond_with cuda
%bcond_with mpi
%bcond_with opencl
%endif
+%if "%{flavor}" == "lite"
+%define is_lite 1
+%bcond_with cuda
+%bcond_with mpi
+%bcond_with opencl
+%define package_suffix -lite
+%endif
+
%if "%{flavor}" == "hpc"
%bcond_with cuda
%bcond_with mpi
@@ -129,41 +140,48 @@
%{!?compiler_family:%global compiler_family gnu}
%{hpc_init -c %compiler_family %{?with_mpi:-m %mpi_flavor} %{?c_f_ver:-v %{c_f_ver}} %{?mpi_ver:-V %{mpi_ver}} %{?ext:-e %{ext}}}
%{?with_mpi:%global hpc_module_pname p%{pname}}
+%define python_flavor python3
%define package_name %{hpc_package_name %_vers}
%define libname(l:s:) lib%{pname}%{-l*}%{hpc_package_name_tail %{?_vers}}
%define package_python_sitearch %hpc_python_sitearch
%define package_python_sitelib %{hpc_prefix}/lib64/%{python_ver_hack}/site-packages/
%define package_prefix %hpc_prefix
%define package_bindir %hpc_bindir
+%define package_libdir %hpc_libdir
%else
%define package_name %pname%{?package_suffix}
%define package_python_sitearch %{python3_sitearch}
%define package_python_sitelib %{python3_sitelib}
%define package_prefix %_prefix
%define package_bindir %_bindir
+%define package_libdir %_libdir
%define libname(l:s:) lib%{pname}%{!-l:%{-s:-}}%{-l*}%{-s*}%{?package_suffix}
%endif
Name: %{package_name}
Version: %vers
Release: 0
-#Release: 1%{?config_dependant}%{?dist}
Summary: A framework used for deep learning
License: Apache-2.0 AND BSD-2-Clause AND BSD-3-Clause AND FSFUL AND MIT AND MPL-2.0 AND OpenSSL AND Python-2.0
Group: Development/Languages/Python
Url: https://www.tensorflow.org/
Source0: https://github.com/tensorflow/tensorflow/archive/v%{version}.tar.gz#/tensorflow-%{version}.tar.gz
Source1: tensorflow-rpmlintrc
+# IMPORTANT
+# although some of the following libraries are available in factory they could
+# not be used as
+# * explicit versions are needed which differ from the factory ones
+# * bazel and the obs version have different symbols due to hidden compiler flags
# License10: Apache-2.0
Source10: https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c3782...
# License11: BSD-3-Clause
-Source11: https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz#...
+Source11: https://github.com/protocolbuffers/protobuf/archive/v3.6.1.2.tar.gz#/protobu...
# License12: Python-2.0
Source12: https://pypi.python.org/packages/bc/cc/3cdb0a02e7e96f6c70bd971bc8a90b8463fda...
# License13: BSD-3-Clause
Source13: https://github.com/google/double-conversion/archive/3992066a95b823efc8ccc1ba...
# License14: BSD-3-Clause
-Source14: https://mirror.bazel.build/pypi.python.org/packages/5c/78/ff794fcae2ce8aa632...
+Source14: https://pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f60...
# License15: MIT
Source15: https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324...
# License16: Apache-2.0
@@ -173,37 +191,73 @@
# License18: BSD-3-Clause
Source18: https://github.com/hfp/libxsmm/archive/1.9.tar.gz#/libxsmm_1.9.tar.gz
# License19: Apache-2.0
-Source19: https://github.com/abseil/abseil-cpp/archive/9613678332c976568272c8f4a78631a...
-# License20: BSD-2-Clause
-# License21: OpenSSL and ISC and Intel
-Source20: https://github.com/google/boringssl/archive/a0fb951d2a26a8ee746b52f3ba81ab01...
-# License22: Apache-2.0
+Source19: https://github.com/abseil/abseil-cpp/archive/389ec3f906f018661a5308458d623d0...
+# License20: OpenSSL and ISC and Intel
+Source20: https://github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c52358...
+# License21: Apache-2.0
Source21: https://github.com/googleapis/googleapis/archive/f81082ea1e2f85c43649bee26e0...
# License23: Apache-2.0
-Source22: https://mirror.bazel.build/github.com/google/flatbuffers/archive/v1.9.0.tar....
-# License24: BSD-3-Clause
+Source22: https://github.com/google/flatbuffers/archive/v1.9.0.tar.gz#/flatbuffers_v1....
+# License23: BSD-3-Clause
Source23: https://github.com/NVlabs/cub/archive/1.8.0.zip#/cub_1.8.0.zip
-# License25: Apache-2.0
+# License24: Apache-2.0
Source24: https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f4...
-# License28: Apache-2.0
+# License25: Apache-2.0
Source25: https://github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz#/abseil-pypi-...
-# License29: MPL-2.0
+# License26: MPL-2.0
# NOTE: tensorflow only uses MPL-2.0 part of eigen
-Source26: https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz...
-# License30: BSD-2-Clause
-Source27: https://mirror.bazel.build/github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d...
+Source26: https://bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz#/eigen.tar.gz
+# License27: BSD-2-Clause
+Source27: https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee...
Source28: https://mirror.bazel.build/docs.python.org/2.7/_sources/license.txt#/python-...
-# License32: MIT
+# License29: MIT
Source29: https://github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz#/json-cp...
-# License33: FSFUL
+# License30: FSFUL
Source30: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz#/fft.tar.gz
-# Source34: Apache-2.0
+# License31: Apache-2.0
Source31: https://github.com/grpc/grpc/archive/v1.13.0.tar.gz#/grpc-v1.13.0.gz
-# Source35: BSD-3.0
-Source32: https://mirror.bazel.build/github.com/google/re2/archive/2018-04-01.tar.gz#/...
-# patch the libray search path in configure.py
-Patch0: mpilibpath_configure_py.patch
-Patch1: no_aws_and_googlecloud.patch
+# License32: BSD-3.0
+Source32: https://github.com/google/re2/archive/2018-10-01.tar.gz#/re2-2018-10-01.tar....
+# License33: Apache-2.0
+Source33: https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz#/aws-sdk-cpp-1.3.15...
+# License34: BSD-3-Clause and Intel
+Source34: https://github.com/edenhill/librdkafka/archive/v0.11.5.tar.gz#/kafka-v0.11.5...
+# The factory protobuf library has other symbols due to hidden compiler flags
+# License35: Apache-2.0
+Source35: https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/v0.4.0.tar.g...
+# License36: Apache-2.0
+Source36: https://github.com/nlopezgi/bazel-toolchains/archive/3f8c58fe530fedc446de046...
+# License37: Apache-2.0
+Source37: https://github.com/bazelbuild/rules_docker/archive/a9bb1dab84cdf46e34d1b34b5...
+# License38: MIT
+Source38: https://github.com/keras-team/keras-preprocessing/archive/1.0.9.tar.gz#/kera...
+# License39: MIT
+Source39: https://github.com/keras-team/keras-applications/archive/1.0.6.tar.gz#/keras...
+# License40: MIT
+Source40: https://github.com/google/nsync/archive/1.20.1.tar.gz#/google-nsync-1.20.1.t...
+# License41: Apache-2.0
+# something between 1.16.1 and 1.18~pre
+Source41: https://github.com/grpc/grpc/archive/69b6c047bc767b4d80e7af4d00ccb7c45b683da...
+# License42: Apache-2.0
+Source42: https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f91...
+# License43: BSD and ICU License
+Source43: https://github.com/unicode-org/icu/archive/release-62-1.tar.gz#/unicode-org-...
+# License44: BSD like
+Source44: https://github.com/nanopb/nanopb/archive/f8ac463766281625ad710900479130c7fcb...
+# License45: Python license itself, do need as sha256b have to match so could not use system one
+Source45: https://mirror.bazel.build/docs.python.org/2.7/_sources/license.rst.txt
+# Deps sources for Tensorflow-Lite (use same eigen, gemmlowp and abseil_cpp packages as non lite version)
+Source100: https://github.com/google/googletest/archive/release-1.8.0.tar.gz
+Source101: https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip
+Source102: http://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a...
+# Source103: http://mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.11.0.t...
+Source104: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz
+Patch1: support-new-bazel.patch
+Patch2: fix_mvapich_mpi_bzl.patch
+# PATCH-FIX-UPSTREAM https://github.com/tensorflow/tensorflow/pull/22856
+Patch3: tensorflow-make_aws_sdk_work_on_aarch64.patch
+# PATCH-FIX-OPENSUSE - Use installed flatbuffers lib for Tensorflow-Lite
+Patch4: tensorflow-fix_lite.patch
Requires: python3
Requires: python3-abseil
@@ -219,11 +273,11 @@
%else
Provides: python3-tensorflow
%endif
-BuildRequires: bazel
+BuildRequires: bazel == 0.19.2
BuildRequires: curl
%if %{with cuda}
Requires: cuda-9.0
-BuildRequires cuda-9.0
+BuildRequires: cuda-9.0
%endif
%if %{with opencl}
Requires: Mesa-libOpenCL
@@ -232,11 +286,14 @@
%endif
BuildRequires: curl-devel
BuildRequires: fdupes
+%if %{is_lite}
+BuildRequires: flatbuffers-devel
+%endif
BuildRequires: fftw3-devel
BuildRequires: gcc-c++
BuildRequires: giflib-devel
#BuildRequires: grpc-devel >= 1.12
-BuildRequires: jemalloc-devel
+#BuildRequires: jemalloc-devel
BuildRequires: libjpeg-turbo
%if 0%{?suse_version} < 1550
BuildRequires: libjpeg62-turbo
@@ -251,6 +308,9 @@
BuildRequires: pcre-devel
BuildRequires: python3
BuildRequires: python3-Cython
+BuildRequires: python3-Keras-Applications
+BuildRequires: python3-Keras-Preprocessing
+BuildRequires: python3-astor
BuildRequires: python3-base
BuildRequires: python3-devel
BuildRequires: python3-mock
@@ -266,20 +326,18 @@
BuildRequires: unzip
BuildRequires: zlib-devel
%if %{with hpc}
+%hpc_requires
BuildRequires: %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
+BuildRequires: lua-lmod
+BuildRequires: suse-hpc
%if %{with mpi}
BuildRequires: %{mpi_flavor}%{?mpi_vers}-%{compiler_family}%{?c_f_ver}-hpc-macros-devel
%endif
-BuildRequires: lua-lmod
-BuildRequires: suse-hpc
-%hpc_requires
%endif
# just use rpmlint
-# there are some serious compiler warnings, regearding no-return-in-nonvoid-function
-BuildRequires: -post-build-checks
-
-BuildRoot: %{_tmppath}/%{name}-%{version}-build
+# there are some serious compiler warnings, regarding no-return-in-nonvoid-function
+#!BuildRequires: -post-build-checks
%if "%flavor" == ""
ExclusiveArch: do_not_build
@@ -363,40 +421,90 @@
%makebazelcache %{SOURCE30}
%makebazelcache %{SOURCE31}
%makebazelcache %{SOURCE32}
+%makebazelcache %{SOURCE33}
+%makebazelcache %{SOURCE34}
+%makebazelcache %{SOURCE35}
+%makebazelcache %{SOURCE36}
+%makebazelcache %{SOURCE37}
+%makebazelcache %{SOURCE38}
+%makebazelcache %{SOURCE39}
+%makebazelcache %{SOURCE40}
+%makebazelcache %{SOURCE41}
+%makebazelcache %{SOURCE42}
+%makebazelcache %{SOURCE43}
+%makebazelcache %{SOURCE44}
+%makebazelcache %{SOURCE45}
# unpack tensorflow
%setup -q -c -n tensorflow-%{version}
%sanitize_dir
pwd
-%patch0 -p 1
%patch1 -p 1
+%patch2 -p 1
+%patch3 -p 1
+%patch4 -p 1
echo $MPI_DIR
+%if %{is_lite}
+mkdir tensorflow/lite/tools/make/downloads/
+pushd tensorflow/lite/tools/make/downloads/
+# eigen, gemmlowp and abseil_cpp
+cp %{SOURCE26} %{SOURCE17} %{SOURCE19} .
+mkdir tmp
+tar xzf eigen.tar.gz -C tmp && mv tmp/* eigen
+unzip gemmlowp.zip -d tmp && mv tmp/* gemmlowp
+tar xzf %{SOURCE100} -C tmp && mv tmp/* fgoogletest
+tar xzf abseil-cpp.tar.gz -C tmp && mv tmp/* absl
+unzip %{SOURCE101} -d neon_2_sse
+tar xzf %{SOURCE102} -C tmp && mv tmp/* farmhash
+# We use installed flatbuffers
+# tar xzf %{SOURCE103} -C tmp && mv tmp/* flatbuffers
+tar xzf %{SOURCE104} -C tmp && mv tmp/* fft2d
+# sed fixes from tensorflow/lite/tools/make/download_dependencies.sh
+sed -i -e 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
+ "./eigen/Eigen/src/Core/arch/NEON/Complex.h"
+sed -i -e 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#static uint32x2_t p2ui_CONJ_XOR;// = vld1_u32( conj_XOR_DATA ); - Removed by scripts#' \
+ "./eigen/Eigen/src/Core/arch/NEON/Complex.h"
+sed -i -e 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - Removed by script#' \
+ "./eigen/Eigen/src/Core/arch/NEON/Complex.h"
+find -name fixedpoint.h
+popd
+%endif
+
%build
-%limit_build -m 1600
+%limit_build -m 4000
+
+%if %{is_lite}
+make %{?_smp_mflags} -f tensorflow/lite/tools/make/Makefile \
+ $(pwd)/tensorflow/lite/tools/make/gen/linux_$(uname -m)/lib/libtensorflow-lite.a \
+ $(pwd)/tensorflow/lite/tools/make/gen/linux_$(uname -m)/bin/minimal
+# Build of benchmark-lib.a is broken
+%else
%if %{with hpc}
%hpc_setup
module load gnu
%if %{with mpi}
module load %mpi_flavor
+export MPI_HOME=${MPI_HOME:-$MPI_DIR}
%endif #mpi
%endif #hpc
export TEST_TMPDIR=%{bazeldir}
-export PYTHON_LIB_PATH=/usr/lib64/python3.6/site-packages
+export PYTHON_LIB_PATH=%{python3_sitearch}
export PYTHON_BIN_PATH=/usr/bin/python3
export CC_OPT_FLAGS=-O2
export TF_NEED_JEMALLOC=0
export TF_NEED_GCP=0
-export TF_NEED_HDFS=0
-export TF_NEED_S3=0
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
export TF_ENABLE_XLA=0
export TF_NEED_VERBS=0
export TF_NEED_OPENCL=0
-export TF_SYSTEM_LIBS="nasm,jpeg,png_archive,org_sqlite,gif_archive,six_archive,astor_archive,termcolor_archive,pcre,swig,curl,lmdb,zlib_archive,snappy,cython,jemalloc"
+export TF_NEED_ROCM=0
+export TF_SYSTEM_LIBS="nasm,jpeg,png_archive,org_sqlite,gif_archive,six_archive,astor_archive,termcolor_archive,pcre,swig,curl,lmdb,zlib_archive,snappy,cython"
#export TF_SYSTEM_LIBS="com_googlesource_code_re2,nasm,jpeg,png_archive,org_sqlite,gif_archive,six_archive,astor_archive,termcolor_archive,pcre,swig,curl,grpc,lmdb,zlib_archive,snappy,cython,jemalloc"
%if %{with cuda}
export TF_NEED_CUDA=1
@@ -430,8 +538,21 @@
%{?copts} --jobs %{?jobs} \
//tensorflow/tools/pip_package:build_pip_package
bazel-bin/tensorflow/tools/pip_package/build_pip_package %{_topdir}/%{name}-%{version}
+bazel build -c opt //tensorflow:libtensorflow.so
+bazel build -c opt //tensorflow:libtensorflow_cc.so
+%endif
%install
+
+%if %{is_lite}
+pushd tensorflow/lite/tools/make/gen/linux_*/
+install -D bin/minimal %{buildroot}%{_bindir}/tflite_minimal
+install -D lib/libtensorflow-lite.a %{buildroot}%{_libdir}/libtensorflow-lite.a
+popd
+install -D tensorflow/lite/schema/schema_generated.h %{buildroot}%{_includedir}/tensorflow/lite/schema/schema_generated.h
+install -D tensorflow/lite/schema/schema.fbs %{buildroot}%{_includedir}/tensorflow/lite/schema/schema.fbs
+%else
+
pip install %{_topdir}/%{name}-%{version}/*whl --root=%{buildroot}%{?hpc_prefix} \
--no-warn-script-location --no-index --no-deps
# remove spurious executeable bits
@@ -444,6 +565,10 @@
rm -r lib
cd -
%endif
+# install libtensorflow*.so
+install -D bazel-bin/tensorflow/libtensorflow.so %{buildroot}%{package_libdir}/libtensorflow.so
+install -D bazel-bin/tensorflow/libtensorflow_cc.so %{buildroot}%{package_libdir}/libtensorflow_cc.so
+install -D bazel-bin/tensorflow/libtensorflow_framework.so %{buildroot}%{package_libdir}/libtensorflow_framework.so
# remove external libs
%fdupes -s %{buildroot}%{?hpc_prefix}
find %{buildroot} -name \*.h -type f -exec chmod 644 {} +
@@ -492,6 +617,21 @@
EOF
%endif
+# %%{is_lite}
+%endif
+
+%post -n %{package_name}-devel -p /sbin/ldconfig
+%postun -n %{package_name}-devel -p /sbin/ldconfig
+
+# Lite version is very different so package it separetly
+%if %{is_lite}
+%files
+%{package_bindir}/*
+%files -n %{package_name}-devel
+%{package_libdir}/libtensorflow-lite.a
+%dir %{_includedir}/tensorflow/lite/schema/
+%{_includedir}/tensorflow/lite/schema/*
+%else # not lite build
%files
%defattr(-,root,root,-)
%{package_python_sitearch}/*
@@ -504,7 +644,10 @@
%endif
%files -n %{package_name}-devel
%{package_python_sitelib}/tensorflow/include
+%{package_libdir}/libtensorflow*.so
%files -n %{package_name}-doc
%{package_python_sitelib}/tensorflow/examples
+%endif
+
%changelog
++++++ protobuf_v3.6.0.tar.gz -> 816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz ++++++
++++ 821455 lines of diff (skipped)
++++++ _constraints ++++++
--- /var/tmp/diff_new_pack.3GoBTM/_old 2019-07-22 12:20:15.227666518 +0200
+++ /var/tmp/diff_new_pack.3GoBTM/_new 2019-07-22 12:20:15.231666517 +0200
@@ -1,7 +1,7 @@
<constraints>
<hardware>
<memory>
- <size unit="M">8192</size>
+ <size unit="G">10</size>
</memory>
<disk>
<size unit="G">10</size>
++++++ _multibuild ++++++
--- /var/tmp/diff_new_pack.3GoBTM/_old 2019-07-22 12:20:15.247666513 +0200
+++ /var/tmp/diff_new_pack.3GoBTM/_new 2019-07-22 12:20:15.255666511 +0200
@@ -1,5 +1,6 @@
<multibuild>
<package>standard</package>
+ <package>lite</package>
<package>hpc</package>
<package>hpc-openmpi2</package>
<package>hpc-mvapich2</package>
++++++ abseil-cpp.tar.gz ++++++
++++ 65018 lines of diff (skipped)
++++++ arm_neon_2_x86_sse.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d/NEON_2_SSE.h new/ARM_NEON_2_x86_SSE-1200fe90bb174a6224a525ee60148671a786a71f/NEON_2_SSE.h
--- old/ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d/NEON_2_SSE.h 2017-05-30 09:44:55.000000000 +0200
+++ new/ARM_NEON_2_x86_SSE-1200fe90bb174a6224a525ee60148671a786a71f/NEON_2_SSE.h 2018-04-04 09:24:16.000000000 +0200
@@ -1,6 +1,6 @@
//created by Victoria Zhislina, the Senior Application Engineer, Intel Corporation, victoria.zhislina@intel.com
-//*** Copyright (C) 2012-2016 Intel Corporation. All rights reserved.
+//*** Copyright (C) 2012-2017 Intel Corporation. All rights reserved.
//IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
@@ -36,21 +36,21 @@
//performance overhead and the necessity to use the EMMS instruction (_mm_empty())for mmx-x87 floating point switching
//*****************************************************************************************
-//!!!!!!!!!!!!!! To use this file just include it in your project that uses ARM NEON intinsics instead of "arm_neon.h" and complile it as usual
-//!!!!!!!!!!!!!! but please pay attention at #define USE_SSE4 below - you might need to define it manualy for newest Intel Atom platforms for greater performance.
+//!!!!!!!!!!!!!! To use this file just include it in your project that uses ARM NEON intinsics instead of "arm_neon.h" and compile it as usual
+//!!!!!!!!!!!!!! but please pay attention at #define USE_SSE4 below - you might need to define it manualy for newest Intel Atom or any Intel Core platforms for greater performance.
#ifndef NEON2SSE_H
#define NEON2SSE_H
/*********************************************************************************************************************/
//!!!!!!!!!!!!!!
+//if USE_SSE4 is defined, some functions use SSE4 instructions instead of earlier SSE versions, when undefined - SIMD up to SSSE3 are used
+//For older devices without SSE4 support it should be undefined, for newer devices - defined, probably manualy if your compiler doesn't set __SSE4_2__ predefine
#ifndef USE_SSE4
#if defined(__SSE4_2__)
#define USE_SSE4
#endif
#endif
-//if USE_SSE4 is defined, some functions use SSE4 instructions instead of earlier SSE versions, when undefined - SIMD up to SSSE3 are used
-//For older devices without SSE4 support it should be undefined, for newer devices - defined, probably manualy if your compiler doesn't set __SSE4_2__ predefine
/*********************************************************************************************************************/
#include //SSE
@@ -62,6 +62,7 @@
#include //SSE4.2
#endif
+#include
//*************** functions and data attributes, compiler dependent *********************************
//***********************************************************************************
@@ -150,6 +151,9 @@
typedef __m128 float16x4_t; //not supported by IA, for compartibility
typedef __m128 float16x8_t; //not supported by IA, for compartibility
+typedef __m64_128 float64x1_t;
+typedef __m128d float64x2_t;
+
typedef __m128i int8x16_t;
typedef __m128i int16x8_t;
typedef __m128i int32x4_t;
@@ -174,6 +178,9 @@
typedef float __fp16;
#endif
+typedef double float64_t;
+
+
typedef uint8_t poly8_t;
typedef uint16_t poly16_t;
@@ -861,6 +868,9 @@
uint16x8_t vmaxq_u16(uint16x8_t a, uint16x8_t b); // VMAX.U16 q0,q0,q0
uint32x4_t vmaxq_u32(uint32x4_t a, uint32x4_t b); // VMAX.U32 q0,q0,q0
float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b); // VMAX.F32 q0,q0,q0
+
+float64x2_t vmaxq_f64(float64x2_t a, float64x2_t b); // VMAX.F64 q0,q0,q0
+
//vmin -> Vr[i] := (Va[i] >= Vb[i]) ? Vb[i] : Va[i]
int8x8_t vmin_s8(int8x8_t a, int8x8_t b); // VMIN.S8 d0,d0,d0
int16x4_t vmin_s16(int16x4_t a, int16x4_t b); // VMIN.S16 d0,d0,d0
@@ -876,6 +886,9 @@
uint16x8_t vminq_u16(uint16x8_t a, uint16x8_t b); // VMIN.U16 q0,q0,q0
uint32x4_t vminq_u32(uint32x4_t a, uint32x4_t b); // VMIN.U32 q0,q0,q0
float32x4_t vminq_f32(float32x4_t a, float32x4_t b); // VMIN.F32 q0,q0,q0
+
+float64x2_t vminq_f64(float64x2_t a, float64x2_t b); // VMIN.F64 q0,q0,q0
+
//Pairwise addition
//Pairwise add
int8x8_t vpadd_s8(int8x8_t a, int8x8_t b); // VPADD.I8 d0,d0,d0
@@ -1225,6 +1238,9 @@
float32x2_t vld1_f32(__transfersize(2) float32_t const * ptr); // VLD1.32 {d0}, [r0]
poly8x8_t vld1_p8(__transfersize(8) poly8_t const * ptr); // VLD1.8 {d0}, [r0]
poly16x4_t vld1_p16(__transfersize(4) poly16_t const * ptr); // VLD1.16 {d0}, [r0]
+
+float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+
//Load a single lane from memory
uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); //VLD1.8 {d0[0]}, [r0]
uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0]
@@ -1755,6 +1771,7 @@
uint32x2_t vcvt_n_u32_f32(float32x2_t a, __constrange(1,32) int b); // VCVT.U32.F32 d0, d0, #32
int32x4_t vcvtq_n_s32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.S32.F32 q0, q0, #32
uint32x4_t vcvtq_n_u32_f32(float32x4_t a, __constrange(1,32) int b); // VCVT.U32.F32 q0, q0, #32
+int32x4_t vcvtnq_s32_f32(float32x4_t a); // VCVTN.S32.F32 q0, q0
//Convert to float
float32x2_t vcvt_f32_s32(int32x2_t a); // VCVT.F32.S32 d0, d0
float32x2_t vcvt_f32_u32(uint32x2_t a); // VCVT.F32.U32 d0, d0
@@ -2003,6 +2020,10 @@
int16x8_t vabsq_s16(int16x8_t a); // VABS.S16 q0,q0
int32x4_t vabsq_s32(int32x4_t a); // VABS.S32 q0,q0
float32x4_t vabsq_f32(float32x4_t a); // VABS.F32 q0,q0
+
+int64x2_t vabsq_s64(int64x2_t a); // VABS.S64 q0,q0
+float64x2_t vabsq_f64(float64x2_t a); // VABS.F64 q0,q0
+
//Saturating absolute: Vd[i] = sat(|Va[i]|)
int8x8_t vqabs_s8(int8x8_t a); // VQABS.S8 d0,d0
int16x4_t vqabs_s16(int16x4_t a); // VQABS.S16 d0,d0
@@ -2246,16 +2267,26 @@
poly8x16x2_t vuzpq_p8(poly8x16_t a, poly8x16_t b); // VUZP.8 q0,q0
poly16x8x2_t vuzpq_p16(poly16x8_t a, poly16x8_t b); // VUZP.16 q0,q0
+float32x4_t vrndnq_f32(float32x4_t a); // VRND.F32 q0,q0
+
+float64x2_t vrndnq_f64(float64x2_t a); // VRND.F64 q0,q0
+
+//Sqrt
+float32x4_t vsqrtq_f32(float32x4_t a); // VSQRT.F32 q0,q0
+
+float64x2_t vsqrtq_f64(float64x2_t a); // VSQRT.F64 q0,q0
+
+
//^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// the following macros solve the problem of the "immediate parameters requirement" for some x86 intrinsics.
// we need it to compile the code unless the "Intrinsic parameter must be an immediate value" error is our goal
//
-#if ( ((defined(_MSC_VER)|| defined (__INTEL_COMPILER)) && defined DEBUG ) || defined(__GNUC__) && !defined(__llvm__) )
+#if ( defined (__INTEL_COMPILER) || defined (__GNUC__) && !defined(__llvm__) )
#define _MM_ALIGNR_EPI8 _mm_alignr_epi8
- #define _MM_EXTRACT_EPI16 _mm_extract_epi16
+#define _MM_EXTRACT_EPI16 (int16_t) _mm_extract_epi16
#define _MM_INSERT_EPI16 _mm_insert_epi16
#ifdef USE_SSE4
#define _MM_EXTRACT_EPI8 _mm_extract_epi8
@@ -2328,7 +2359,7 @@
_NEON2SSE_SWITCH8(_mm_insert_epi16, vec, LANE, _NEON2SSE_COMMA p)
}
- _NEON2SSE_INLINE int _MM_EXTRACT_EPI16(__m128i vec, const int LANE)
+ _NEON2SSE_INLINE int16_t _MM_EXTRACT_EPI16(__m128i vec, const int LANE)
{
_NEON2SSE_SWITCH8(_mm_extract_epi16, vec, LANE,)
}
@@ -3117,7 +3148,7 @@
{
//no signed average in x86 SIMD, go to unsigned
__m128i c128, au, bu, sum;
- c128 = _mm_set1_epi8(0x80); //-128
+ c128 = _mm_set1_epi8((int8_t)0x80); //-128
au = _mm_sub_epi8(a, c128); //add 128
bu = _mm_sub_epi8(b, c128); //add 128
sum = _mm_avg_epu8(au, bu);
@@ -3129,7 +3160,7 @@
{
//no signed average in x86 SIMD, go to unsigned
__m128i cx8000, au, bu, sum;
- cx8000 = _mm_set1_epi16(0x8000); // - 32768
+ cx8000 = _mm_set1_epi16((int16_t)0x8000); // - 32768
au = _mm_sub_epi16(a, cx8000); //add 32768
bu = _mm_sub_epi16(b, cx8000); //add 32768
sum = _mm_avg_epu16(au, bu);
@@ -4747,7 +4778,7 @@
{
// //need to deal with the possibility of internal overflow
__m128i c128, au,bu;
- c128 = _mm_set1_epi8 (128);
+ c128 = _mm_set1_epi8((int8_t)128);
au = _mm_add_epi8( a, c128);
bu = _mm_add_epi8( b, c128);
return vhsubq_u8(au,bu);
@@ -4758,7 +4789,7 @@
{
//need to deal with the possibility of internal overflow
__m128i c8000, au,bu;
- c8000 = _mm_set1_epi16(0x8000);
+ c8000 = _mm_set1_epi16((int16_t)0x8000);
au = _mm_add_epi16( a, c8000);
bu = _mm_add_epi16( b, c8000);
return vhsubq_u16(au,bu);
@@ -5192,7 +5223,7 @@
return _mm_cmpeq_epi16(cmp, a); //a>=b
#else
__m128i c8000, as, bs, m1, m2;
- c8000 = _mm_set1_epi16 (0x8000);
+ c8000 = _mm_set1_epi16 ((int16_t)0x8000);
as = _mm_sub_epi16(a,c8000);
bs = _mm_sub_epi16(b,c8000);
m1 = _mm_cmpgt_epi16(as, bs);
@@ -5428,7 +5459,7 @@
{
//no unsigned chars comparison, only signed available,so need the trick
__m128i c128, as, bs;
- c128 = _mm_set1_epi8 (128);
+ c128 = _mm_set1_epi8 ((int8_t)128);
as = _mm_sub_epi8(a,c128);
bs = _mm_sub_epi8(b,c128);
return _mm_cmpgt_epi8 (as, bs);
@@ -5439,7 +5470,7 @@
{
//no unsigned short comparison, only signed available,so need the trick
__m128i c8000, as, bs;
- c8000 = _mm_set1_epi16 (0x8000);
+ c8000 = _mm_set1_epi16 ((int16_t)0x8000);
as = _mm_sub_epi16(a,c8000);
bs = _mm_sub_epi16(b,c8000);
return _mm_cmpgt_epi16 ( as, bs);
@@ -6137,6 +6168,11 @@
float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b); // VMAX.F32 q0,q0,q0
#define vmaxq_f32 _mm_max_ps
+
+float64x2_t vmaxq_f64(float64x2_t a, float64x2_t b); // VMAX.F64 q0,q0,q0
+#define vmaxq_f64 _mm_max_pd
+
+
//*************** Minimum: vmin -> Vr[i] := (Va[i] >= Vb[i]) ? Vb[i] : Va[i] ********************************
//***********************************************************************************************************
int8x8_t vmin_s8(int8x8_t a, int8x8_t b); // VMIN.S8 d0,d0,d0
@@ -6221,6 +6257,11 @@
float32x4_t vminq_f32(float32x4_t a, float32x4_t b); // VMIN.F32 q0,q0,q0
#define vminq_f32 _mm_min_ps
+
+float64x2_t vminq_f64(float64x2_t a, float64x2_t b); // VMIN.F64 q0,q0,q0
+#define vminq_f64 _mm_min_pd
+
+
//************* Pairwise addition operations. **************************************
//************************************************************************************
//Pairwise add - adds adjacent pairs of elements of two vectors, and places the results in the destination vector
@@ -6283,7 +6324,7 @@
uint16x4_t res64;
__m128i c32767, cfffe, as, bs, res;
c32767 = _mm_set1_epi16 (32767);
- cfffe = _mm_set1_epi16 (0xfffe);
+ cfffe = _mm_set1_epi16 ((int16_t)0xfffe);
as = _mm_sub_epi16 (_pM128i(a), c32767);
bs = _mm_sub_epi16 (_pM128i(b), c32767);
res = _mm_hadd_epi16 (as, bs);
@@ -8355,7 +8396,7 @@
// manual saturation solution looks more optimal than 32 bits conversion one
__m128i cb, c8000, a_signed, saturation_mask, shift_res;
cb = _mm_set1_epi16((1 << (16 - b)) - 1 - 0x8000 );
- c8000 = _mm_set1_epi16 (0x8000);
+ c8000 = _mm_set1_epi16 ((int16_t)0x8000);
//no unsigned shorts comparison in SSE, only signed available, so need the trick
a_signed = _mm_sub_epi16(a, c8000); //go to signed
saturation_mask = _mm_cmpgt_epi16 (a_signed, cb);
@@ -9196,7 +9237,7 @@
// it loads a 32-byte block aligned on a 16-byte boundary and extracts the 16 bytes corresponding to the unaligned access
//If the ptr is aligned then could use __m128i _mm_load_si128 ((__m128i*) ptr) instead;
#define LOAD_SI128(ptr) \
- ( ((unsigned long)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) : _mm_loadu_si128((__m128i*)(ptr))
+ ( ((uintptr_t)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) : _mm_loadu_si128((__m128i*)(ptr))
uint8x16_t vld1q_u8(__transfersize(16) uint8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
#define vld1q_u8 LOAD_SI128
@@ -9233,7 +9274,7 @@
float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr); // VLD1.32 {d0, d1}, [r0]
_NEON2SSE_INLINE float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr)
{
- if( (((unsigned long)(ptr)) & 15 ) == 0 ) //16 bits aligned
+ if( (((uintptr_t)(ptr)) & 15 ) == 0 ) //16 bits aligned
return _mm_load_ps(ptr);
else
return _mm_loadu_ps(ptr);
@@ -9288,6 +9329,17 @@
poly16x4_t vld1_p16(__transfersize(4) poly16_t const * ptr); // VLD1.16 {d0}, [r0]
#define vld1_p16 vld1_u16
+
+float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
+_NEON2SSE_INLINE float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr)
+{
+ if ((((uintptr_t)(ptr)) & 15) == 0) //16 bits aligned
+ return _mm_load_pd(ptr);
+ else
+ return _mm_loadu_pd(ptr);
+}
+
+
//***********************************************************************************************************
//******* Lane load functions - insert the data at vector's given position (lane) *************************
//***********************************************************************************************************
@@ -9522,7 +9574,7 @@
// If ptr is 16bit aligned and you need to store data without cache pollution then use void _mm_stream_si128 ((__m128i*)ptr, val);
//here we assume the case of NOT 16bit aligned ptr possible. If it is aligned we could to use _mm_store_si128 like shown in the following macro
#define STORE_SI128(ptr, val) \
- (((unsigned long)(ptr) & 15) == 0 ) ? _mm_store_si128 ((__m128i*)(ptr), val) : _mm_storeu_si128 ((__m128i*)(ptr), val);
+ (((uintptr_t)(ptr) & 15) == 0 ) ? _mm_store_si128 ((__m128i*)(ptr), val) : _mm_storeu_si128 ((__m128i*)(ptr), val);
void vst1q_u8(__transfersize(16) uint8_t * ptr, uint8x16_t val); // VST1.8 {d0, d1}, [r0]
#define vst1q_u8 STORE_SI128
@@ -9554,7 +9606,7 @@
void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val); // VST1.32 {d0, d1}, [r0]
_NEON2SSE_INLINE void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val)
{
- if( ((unsigned long)(ptr) & 15) == 0 ) //16 bits aligned
+ if( ((uintptr_t)(ptr) & 15) == 0 ) //16 bits aligned
_mm_store_ps (ptr, val);
else
_mm_storeu_ps (ptr, val);
@@ -9639,22 +9691,22 @@
//***********Store a lane of a vector into memory (extract given lane) *********************
//******************************************************************************************
void vst1q_lane_u8(__transfersize(1) uint8_t * ptr, uint8x16_t val, __constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0]
-#define vst1q_lane_u8(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI8 (val, lane)
+#define vst1q_lane_u8(ptr, val, lane) *(ptr) = (uint8_t) _MM_EXTRACT_EPI8 (val, lane)
void vst1q_lane_u16(__transfersize(1) uint16_t * ptr, uint16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0]
-#define vst1q_lane_u16(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI16 (val, lane)
+#define vst1q_lane_u16(ptr, val, lane) *(ptr) = (uint16_t) _MM_EXTRACT_EPI16 (val, lane)
void vst1q_lane_u32(__transfersize(1) uint32_t * ptr, uint32x4_t val, __constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0]
-#define vst1q_lane_u32(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI32 (val, lane)
+#define vst1q_lane_u32(ptr, val, lane) *(ptr) = (uint32_t) _MM_EXTRACT_EPI32 (val, lane)
void vst1q_lane_u64(__transfersize(1) uint64_t * ptr, uint64x2_t val, __constrange(0,1) int lane); // VST1.64 {d0}, [r0]
-#define vst1q_lane_u64(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI64 (val, lane)
+#define vst1q_lane_u64(ptr, val, lane) *(ptr) = (uint64_t) _MM_EXTRACT_EPI64 (val, lane)
void vst1q_lane_s8(__transfersize(1) int8_t * ptr, int8x16_t val, __constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0]
-#define vst1q_lane_s8(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI8 (val, lane)
+#define vst1q_lane_s8(ptr, val, lane) *(ptr) = (int8_t) _MM_EXTRACT_EPI8 (val, lane)
void vst1q_lane_s16(__transfersize(1) int16_t * ptr, int16x8_t val, __constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0]
-#define vst1q_lane_s16(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI16 (val, lane)
+#define vst1q_lane_s16(ptr, val, lane) *(ptr) = (int16_t) _MM_EXTRACT_EPI16 (val, lane)
void vst1q_lane_s32(__transfersize(1) int32_t * ptr, int32x4_t val, __constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0]
#define vst1q_lane_s32(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI32 (val, lane)
@@ -11881,22 +11933,22 @@
#define vget_lane_f32(vec, lane) vec.m64_f32[lane]
uint8_t vgetq_lane_u8(uint8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0]
-#define vgetq_lane_u8 _MM_EXTRACT_EPI8
+#define vgetq_lane_u8 (uint8_t) _MM_EXTRACT_EPI8
uint16_t vgetq_lane_u16(uint16x8_t vec, __constrange(0,7) int lane); // VMOV.s16 r0, d0[0]
-#define vgetq_lane_u16 _MM_EXTRACT_EPI16
+#define vgetq_lane_u16 (uint16_t) _MM_EXTRACT_EPI16
uint32_t vgetq_lane_u32(uint32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0]
-#define vgetq_lane_u32 _MM_EXTRACT_EPI32
+#define vgetq_lane_u32 (uint32_t) _MM_EXTRACT_EPI32
int8_t vgetq_lane_s8(int8x16_t vec, __constrange(0,15) int lane); // VMOV.S8 r0, d0[0]
-#define vgetq_lane_s8 vgetq_lane_u8
+#define vgetq_lane_s8 _MM_EXTRACT_EPI8
int16_t vgetq_lane_s16(int16x8_t vec, __constrange(0,7) int lane); // VMOV.S16 r0, d0[0]
-#define vgetq_lane_s16 vgetq_lane_u16
+#define vgetq_lane_s16 _MM_EXTRACT_EPI16
int32_t vgetq_lane_s32(int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 r0, d0[0]
-#define vgetq_lane_s32 vgetq_lane_u32
+#define vgetq_lane_s32 _MM_EXTRACT_EPI32
poly8_t vgetq_lane_p8(poly8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 r0, d0[0]
#define vgetq_lane_p8 vgetq_lane_u8
@@ -11920,10 +11972,10 @@
int64_t vgetq_lane_s64(int64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0
-#define vgetq_lane_s64 (int64_t) vgetq_lane_u64
+#define vgetq_lane_s64 _MM_EXTRACT_EPI64
uint64_t vgetq_lane_u64(uint64x2_t vec, __constrange(0,1) int lane); // VMOV r0,r0,d0
-#define vgetq_lane_u64 _MM_EXTRACT_EPI64
+#define vgetq_lane_u64 (uint64_t) _MM_EXTRACT_EPI64
// ***************** Set lanes within a vector ********************************************
// **************************************************************************************
@@ -12725,6 +12777,13 @@
return vcvtq_u32_f32(_mm_mul_ps(a,cconst128));
}
+
+int32x4_t vcvtnq_s32_f32(float32x4_t a); // VCVTN.S32.F32 q0, q0
+_NEON2SSE_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t a)
+{
+ return _mm_cvtps_epi32(a);
+}
+
//***************** Convert to float *************************
//*************************************************************
float32x2_t vcvt_f32_s32(int32x2_t a); // VCVT.F32.S32 d0, d0
@@ -14562,6 +14621,22 @@
return _mm_and_ps (a, *(__m128*)c7fffffff);
}
+#ifdef _NEON2SSE_64BIT
+int64x2_t vabsq_s64(int64x2_t a); // VABS.S64 q0,q0
+_NEON2SSE_INLINE int64x2_t vabsq_s64(int64x2_t a) // VABS.S64 q0,q0
+{
+ __m128i sign = _mm_srai_epi32 (_mm_shuffle_epi32 (a, 0xf5), 31);
+ return _mm_sub_epi64 (_mm_xor_si128 (a, sign), sign);
+}
+
+float64x2_t vabsq_f64(float64x2_t a); // VABS.F64 q0,q0
+_NEON2SSE_INLINE float64x2_t vabsq_f64(float64x2_t a) // VABS.F64 q0,q0
+{
+ _NEON2SSE_ALIGN_16 int64_t mask[2] = {0x7fffffffffffffffLL, 0x7fffffffffffffffLL};
+ return _mm_and_pd (a, *(__m128d*)mask);
+}
+#endif
+
//****** Saturating absolute: Vd[i] = sat(|Va[i]|) *********************
//**********************************************************************
//For signed-integer data types, the absolute value of the most negative value is not representable by the data type, saturation takes place
@@ -14596,7 +14671,7 @@
_NEON2SSE_INLINE int8x16_t vqabsq_s8(int8x16_t a) // VQABS.S8 q0,q0
{
__m128i c_128, abs, abs_cmp;
- c_128 = _mm_set1_epi8 (0x80); //-128
+ c_128 = _mm_set1_epi8 ((int8_t)0x80); //-128
abs = _mm_abs_epi8 (a);
abs_cmp = _mm_cmpeq_epi8 (abs, c_128);
return _mm_xor_si128 (abs, abs_cmp);
@@ -14606,7 +14681,7 @@
_NEON2SSE_INLINE int16x8_t vqabsq_s16(int16x8_t a) // VQABS.S16 q0,q0
{
__m128i c_32768, abs, abs_cmp;
- c_32768 = _mm_set1_epi16 (0x8000); //-32768
+ c_32768 = _mm_set1_epi16 ((int16_t)0x8000); //-32768
abs = _mm_abs_epi16 (a);
abs_cmp = _mm_cmpeq_epi16 (abs, c_32768);
return _mm_xor_si128 (abs, abs_cmp);
@@ -14919,7 +14994,7 @@
{
__m128i cff, c80, c1, a_mask, a_neg, a_pos, a_comb;
cff = _mm_cmpeq_epi8 (a,a); //0xff
- c80 = _mm_set1_epi8(0x80);
+ c80 = _mm_set1_epi8((int8_t)0x80);
c1 = _mm_set1_epi8(1);
a_mask = _mm_and_si128(a, c80);
a_mask = _mm_cmpeq_epi8(a_mask, c80); //0xff if negative input and 0 if positive
@@ -16589,4 +16664,46 @@
uint32x4_t vreinterpretq_u32_p8 (poly8x16_t t);
#define vreinterpretq_u32_p8
+//************* Round ******************
+float32x4_t vrndnq_f32(float32x4_t a);
+#ifdef USE_SSE4
+#define vrndnq_f32(a) _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+#else
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING( float32x4_t vrndnq_f32(float32x4_t a), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ int i;
+ _NEON2SSE_ALIGN_16 float32_t res[4];
+ _mm_store_ps(res, a);
+ for(i = 0; i<4; i++) {
+ res[i] = nearbyintf(res[i]);
+ }
+ return _mm_load_ps(res);
+}
+#endif
+
+
+float64x2_t vrndnq_f64(float64x2_t a);
+#ifdef USE_SSE4
+#define vrndnq_f64(a) _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+#else
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(float64x2_t vrndnq_f64(float64x2_t a), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+ _NEON2SSE_ALIGN_16 float64_t res[2];
+ _mm_store_pd(res, a);
+ res[0] = nearbyintf(res[0]);
+ res[1] = nearbyintf(res[1]);
+ return _mm_load_pd(res);
+}
+#endif
+
+
+
+//************* Sqrt ******************
+float32x4_t vsqrtq_f32(float32x4_t a);
+#define vsqrtq_f32 _mm_sqrt_ps
+
+float64x2_t vsqrtq_f64(float64x2_t a);
+#define vsqrtq_f64 _mm_sqrt_pd
+
+
#endif /* NEON2SSE_H */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d/ReadMe.md new/ARM_NEON_2_x86_SSE-1200fe90bb174a6224a525ee60148671a786a71f/ReadMe.md
--- old/ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d/ReadMe.md 2017-05-30 09:44:55.000000000 +0200
+++ new/ARM_NEON_2_x86_SSE-1200fe90bb174a6224a525ee60148671a786a71f/ReadMe.md 2018-04-04 09:24:16.000000000 +0200
@@ -6,4 +6,6 @@
To take advantage of this file just include it in your project that uses ARM NEON intinsics instead of "arm_neon.h", compile it as usual and enjoy the result.
+For significant performance improvement in some cases you might need to define USE_SSE4 in your project settings. Otherwise SIMD up to SSSE3 to be used.
+
For more information and license please read the NEON_2_SSE.h content.
++++++ boring_ssl.tar.gz ++++++
/work/SRC/openSUSE:Factory/tensorflow/boring_ssl.tar.gz /work/SRC/openSUSE:Factory/.tensorflow.new.4126/boring_ssl.tar.gz differ: char 13, line 1
++++++ eigen.tar.gz ++++++
++++ 72880 lines of diff (skipped)
++++++ fix_mvapich_mpi_bzl.patch ++++++
diff --git a/third_party/mpi/mpi.bzl b/third_party/mpi/mpi.bzl
index 3a48335..1cd43f8 100644
--- a/third_party/mpi/mpi.bzl
+++ b/third_party/mpi/mpi.bzl
@@ -2,7 +2,7 @@
#based on the configuration options return one or the other
def mpi_hdr():
- MPI_LIB_IS_OPENMPI = True
+ MPI_LIB_IS_OPENMPI=True
hdrs = []
if MPI_LIB_IS_OPENMPI:
hdrs = ["mpi.h", "mpi_portable_platform.h"] #When using OpenMPI
++++++ license.rst.txt ++++++
++++ 903 lines (skipped)
++++++ protobuf_v3.6.0.tar.gz -> protobuf_v3.6.1.2.tar.gz ++++++
++++ 4997 lines of diff (skipped)
++++++ re2-2018-04-01.tar.gz -> re2-2018-10-01.tar.gz ++++++
++++ 2612 lines of diff (skipped)
++++++ support-new-bazel.patch ++++++
--- a/configure.py.orig 2019-03-12 21:43:27.333211414 +0100
+++ a/configure.py 2019-03-12 21:43:50.225119652 +0100
@@ -1554,7 +1554,7 @@
# environment variables.
environ_cp = dict(os.environ)
- check_bazel_version('0.19.0', '0.21.0')
+ check_bazel_version('0.19.0', '0.22.0')
reset_tf_configure_bazelrc()
++++++ tensorflow-1.10.0.tar.gz -> tensorflow-1.13.1.tar.gz ++++++
/work/SRC/openSUSE:Factory/tensorflow/tensorflow-1.10.0.tar.gz /work/SRC/openSUSE:Factory/.tensorflow.new.4126/tensorflow-1.13.1.tar.gz differ: char 12, line 1
++++++ tensorflow-fix_lite.patch ++++++
--- tensorflow-1.13.1/tensorflow/lite/tools/make/Makefile.orig 2019-06-04 13:13:08.329080620 +0200
+++ tensorflow-1.13.1/tensorflow/lite/tools/make/Makefile 2019-06-04 16:05:13.325963284 +0200
@@ -38,11 +38,12 @@ INCLUDES := \
-I$(OBJDIR)
# This is at the end so any globally-installed frameworks like protobuf don't
# override local versions in the source tree.
-INCLUDES += -I/usr/local/include
+INCLUDES += -I/usr/include
# These are the default libraries needed, but they can be added to or
# overridden by the platform-specific settings in target makefiles.
LIBS := \
+-lflatbuffers \
-lstdc++ \
-lpthread \
-lm \
++++++ tensorflow-make_aws_sdk_work_on_aarch64.patch ++++++
From 3f88ddb71ba49d343a5db1304c296e78ddeb2575 Mon Sep 17 00:00:00 2001
From: Koan-Sin Tan
Date: Wed, 10 Oct 2018 02:34:02 +0000
Subject: [PATCH] [aarch64] make aws sdk work on aarch64
`bazel build //tensorflow/tools/pip_package:build_pip_package'
requires AWS SDK by default. but platform part was not built
on aarch64
---
tensorflow/BUILD | 6 ++++++
third_party/aws/BUILD.bazel | 3 +++
2 files changed, 9 insertions(+)
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 9b62a504525d..8486922e00b0 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -163,6 +163,12 @@ config_setting(
visibility = ["//visibility:public"],
)
+config_setting(
+ name = "linux_aarch64",
+ values = {"cpu": "aarch64"},
+ visibility = ["//visibility:public"],
+)
+
config_setting(
name = "linux_x86_64",
values = {"cpu": "k8"},
diff --git a/third_party/aws/BUILD.bazel b/third_party/aws/BUILD.bazel
index 5426f79e4650..66baa8fdf3b7 100644
--- a/third_party/aws/BUILD.bazel
+++ b/third_party/aws/BUILD.bazel
@@ -12,6 +12,9 @@ load("@org_tensorflow//third_party:common.bzl", "template_rule")
cc_library(
name = "aws",
srcs = select({
+ "@org_tensorflow//tensorflow:linux_aarch64": glob([
+ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
+ ]),
"@org_tensorflow//tensorflow:linux_x86_64": glob([
"aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
]),