Hello community,
here is the log from the commit of package python-torch for openSUSE:Factory checked in at 2020-06-30 21:56:34
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-torch (Old)
and /work/SRC/openSUSE:Factory/.python-torch.new.3060 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-torch"
Tue Jun 30 21:56:34 2020 rev:4 rq:817740 version:1.5.1
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-torch/python-torch.changes 2020-05-01 11:15:56.756185758 +0200
+++ /work/SRC/openSUSE:Factory/.python-torch.new.3060/python-torch.changes 2020-06-30 21:56:38.490791599 +0200
@@ -1,0 +2,36 @@
+Tue Jun 23 15:28:57 UTC 2020 - Christian Goll
+
+- updated to new stable release 1.5.1 which has following changes:
+ This release includes several major new API additions and improvements. These
+ include new APIs for autograd allowing for easy computation of hessians and
+ jacobians, a significant update to the C++ frontend, ‘channels last’ memory
+ format for more performant computer vision models, a stable release of the
+ distributed RPC framework used for model parallel training, and a new API
+ that allows for the creation of Custom C++ Classes that was inspired by
+ PyBind. Additionally torch_xla 1.5 is now available and tested with the
+ PyTorch 1.5 release providing a mature Cloud TPU experience.
+ * see release.html for detailed information
+- added patches:
+ * fix-call-of-onnxInitGraph.patch for API mismatch in onnx
+ * fix-mov-operand-for-gcc.patch for aarch64 operands
+
+- removed sources:
+ * cpuinfo-89fe1695edf9ee14c22f815f24bac45577a4f135.tar.gz
+ * gloo-7c541247a6fa49e5938e304ab93b6da661823d0f.tar.gz
+ * onnx-fea8568cac61a482ed208748fdc0e1a8e47f62f5.tar.gz
+ * psimd-90a938f30ba414ada2f4b00674ee9631d7d85e19.tar.gz
+ * pthreadpool-13da0b4c21d17f94150713366420baaf1b5a46f4.tar.gz
+- added sources:
+ * cpuinfo-0e6bde92b343c5fbcfe34ecd41abf9515d54b4a7.tar.gz
+ * gloo-113bde13035594cafdca247be953610b53026553.tar.gz
+ * onnx-9fdae4c68960a2d44cd1cc871c74a6a9d469fa1f.tar.gz
+ * psimd-10b4ffc6ea9e2e11668f86969586f88bc82aaefa.tar.gz
+ * pthreadpool-d465747660ecf9ebbaddf8c3db37e4a13d0c9103.tar.gz
+
+-------------------------------------------------------------------
+Tue Jun 23 09:25:06 UTC 2020 - Christian Goll
+
+- updated to bugfix release 1.4.1 and added _multibuild file so
+ that cuda versions can be build on commandline
+
+-------------------------------------------------------------------
Old:
----
cpuinfo-89fe1695edf9ee14c22f815f24bac45577a4f135.tar.gz
gloo-7c541247a6fa49e5938e304ab93b6da661823d0f.tar.gz
onnx-fea8568cac61a482ed208748fdc0e1a8e47f62f5.tar.gz
psimd-90a938f30ba414ada2f4b00674ee9631d7d85e19.tar.gz
pthreadpool-13da0b4c21d17f94150713366420baaf1b5a46f4.tar.gz
pytorch-1.4.0.tar.gz
New:
----
XNNPACK-7493bfb9d412e59529bcbced6a902d44cfa8ea1c.tar.gz
_multibuild
cpuinfo-0e6bde92b343c5fbcfe34ecd41abf9515d54b4a7.tar.gz
fix-call-of-onnxInitGraph.patch
fix-mov-operand-for-gcc.patch
gloo-113bde13035594cafdca247be953610b53026553.tar.gz
onnx-9fdae4c68960a2d44cd1cc871c74a6a9d469fa1f.tar.gz
psimd-10b4ffc6ea9e2e11668f86969586f88bc82aaefa.tar.gz
pthreadpool-d465747660ecf9ebbaddf8c3db37e4a13d0c9103.tar.gz
pytorch-1.5.1.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-torch.spec ++++++
--- /var/tmp/diff_new_pack.a452ro/_old 2020-06-30 21:56:39.978796203 +0200
+++ /var/tmp/diff_new_pack.a452ro/_new 2020-06-30 21:56:39.982796216 +0200
@@ -21,8 +21,19 @@
%define skip_python2 1
%define pname torch
+%global flavor @BUILD_FLAVOR@%{nil}
+
+%if "%{flavor}" == "standard"
+%bcond_with cuda
+%endif
+
+%if "%{flavor}" == "cuda-10-2"
+%bcond_without cuda
+%define cudaver 10-2
+%endif
+
Name: python-torch
-Version: 1.4.0
+Version: 1.5.1
Release: 0
Summary: Deep learning framework aka pytorch/Caffe2
License: BSD-2-Clause AND BSD-3-Clause AND MIT AND Zlib AND BSL-1.0 AND Apache-2.0
@@ -31,21 +42,21 @@
Source0: https://github.com/pytorch/pytorch/archive/v%{version}.tar.gz#/%{srcname}-%{version}.tar.gz
Source1: releases.html
#License10: BSD-3-Clause
-Source10: https://github.com/facebookincubator/gloo/archive/7c541247a6fa49e5938e304ab9...
+Source10: https://github.com/facebookincubator/gloo/archive/113bde13035594cafdca247be9...
#License12: BSD-2-Clause
-Source12: https://github.com/pytorch/cpuinfo/archive/89fe1695edf9ee14c22f815f24bac4557...
+Source12: https://github.com/pytorch/cpuinfo/archive/0e6bde92b343c5fbcfe34ecd41abf9515...
#License13: BSL-1.0
Source13: https://github.com/zdevito/sleef/archive/7f523de651585fe25cade462efccca647dc...
#License14: BSD-3-Clause
Source14: https://github.com/pybind/pybind11/archive/25abf7efba0b2990f5a6dfb0a31bc65c0...
# License15: MIT
-Source15: https://github.com/onnx/onnx/archive/fea8568cac61a482ed208748fdc0e1a8e47f62f...
+Source15: https://github.com/onnx/onnx/archive/9fdae4c68960a2d44cd1cc871c74a6a9d469fa1...
#License16: BSD-2-Clause
-Source16: https://github.com/Maratyszcza/pthreadpool/archive/13da0b4c21d17f94150713366...
+Source16: https://github.com/Maratyszcza/pthreadpool/archive/d465747660ecf9ebbaddf8c3d...
# License17: MIT
Source17: https://github.com/Maratyszcza/FXdiv/archive/b742d1143724d646cd0f914646f1240...
# License18: MIT
-Source18: https://github.com/Maratyszcza/psimd/archive/90a938f30ba414ada2f4b00674ee963...
+Source18: https://github.com/Maratyszcza/psimd/archive/10b4ffc6ea9e2e11668f86969586f88...
# License19: MIT
Source19: https://github.com/Maratyszcza/FP16/archive/febbb1c163726b5db24bed55cc9dc425...
#License20: Apache-2.0
@@ -54,9 +65,13 @@
Source21: https://github.com/houseroad/foxi/archive/97fe555430a857581b9b826ecd955e4f0a...
# License22: MIT
Source22: https://github.com/pytorch/QNNPACK/archive/7d2a4e9931a82adc3814275b6219a03e2...
+# License: BSD-3-Clause
+Source23: https://github.com/google/XNNPACK/archive/7493bfb9d412e59529bcbced6a902d44cf...
Patch0: removed-peachpy-depedency.patch
Patch1: skip-third-party-check.patch
+Patch2: fix-call-of-onnxInitGraph.patch
+Patch3: fix-mov-operand-for-gcc.patch
# A python call to cmake fails with a return code of 1 on this arch, disable it for now.
ExcludeArch: %ix86
@@ -96,6 +111,20 @@
BuildRequires: protobuf-c
BuildRequires: protobuf-devel
BuildRequires: snappy-devel
+%if %{with cuda}
+BuildRequires: cuda-compiler-%cudaver
+BuildRequires: cuda-cudart-dev-%cudaver
+BuildRequires: cuda-libraries-dev-%cudaver
+BuildRequires: cuda-misc-headers-%cudaver
+BuildRequires: cuda-nsight-%cudaver
+BuildRequires: cuda-toolkit-%cudaver
+%if 0%{?suse_version} > 1500
+BuildRequires: gcc7
+BuildRequires: gcc7-c++
+%endif
+BuildRequires: libcudnn7-devel
+BuildRequires: libnccl-devel
+%endif
BuildRoot: %{_tmppath}/%{name}-%{version}-build
Requires: python-future
Requires: python-leveldb
@@ -106,6 +135,10 @@
Provides: python-caffe2 = %version
Provides: python-pytorch = %version
+%if "%flavor" == ""
+ExclusiveArch: do_not_build
+%endif
+
%python_subpackages
%description
@@ -176,39 +209,42 @@
%make_depend_src %{SOURCE20} gemmlowp/gemmlowp
%make_depend_src %{SOURCE21}
%make_depend_src %{SOURCE22}
-# link system eigen to right place
-rmdir eigen
-ln -s /usr/include/eigen3 eigen
-cd ..
+%make_depend_src %{SOURCE23}
%build
-#export CC=gcc-7
-#export CXX=g++-7
-export USE_NNPACK=0
-export USE_CUDNN=0
-export USE_TEST=0
-export USE_LEVELDB=ON
-export USE_LMDB=ON
-export USE_FBGEMM=0
-export USE_SYSTEM_LIB="tbb,fbgemm,fbgemm/third_party/asmjit,onnx/third_party/benchmark"
-export BUILD_CUSTOM_PROTOBUF=OFF
-export BUILD_TEST=0
+%define buildvars \
+ export USE_NNPACK=OFF \
+ %if %{with cuda} \
+ export USE_CUDNN=ON \
+ export USE_SYSTEM_NCCL=ON \
+ export PATH="/usr/local/cuda-10.1/bin:$PATH" \
+ export CPLUS_INCLUDE_PATH="/usr/local/cuda-10.1/include" \
+ export C_INCLUDE_PATH="/usr/local/cuda-10.1/include" \
+ export LD_LIBRARY_PATH="/usr/local/cuda-10.1/lib" \
+ export NCCL_INCLUDE_DIR="/usr/include/" \
+ %if 0%{?suse_version} > 1500 \
+ export CC=gcc-7 \
+ export CXX=g++-7 \
+ %endif \
+ %else \
+ export USE_CUDNN=OFF \
+ %endif \
+ export USE_TEST=OFF \
+ export USE_LEVELDB=ON \
+ export USE_LMDB=ON \
+ export USE_FBGEMM=OFF \
+ export USE_SYSTEM_LIB="tbb,fbgemm,fbgemm/third_party/asmjit,onnx/third_party/benchmark" \
+ export USE_SYSTEM_EIGEN_INSTALL=ON \
+ export BUILD_CUSTOM_PROTOBUF=OFF \
+ export BUILD_TEST=OFF \
+ export MAX_JOBS=%{?jobs} \
+
+%buildvars
%limit_build -m 2000
-export MAX_JOBS=%{?jobs}
%python_build
%install
-export USE_NNPACK=0
-export USE_CUDNN=0
-export USE_TEST=0
-export USE_LEVELDB=ON
-export USE_LMDB=ON
-export USE_FBGEMM=0
-export USE_SYSTEM_LIB="tbb,fbgemm,fbgemm/third_party/asmjit,onnx/third_party/benchmark"
-export BUILD_CUSTOM_PROTOBUF=OFF
-export BUILD_TEST=1
-%limit_build -m 2000
-export MAX_JOBS=%{?jobs}
+%buildvars
%python_install
%python_expand %fdupes %{buildroot}%{$python_sitearch}
++++++ _multibuild ++++++
<multibuild>
<package>standard</package>
</multibuild>
++++++ cpuinfo-89fe1695edf9ee14c22f815f24bac45577a4f135.tar.gz -> cpuinfo-0e6bde92b343c5fbcfe34ecd41abf9515d54b4a7.tar.gz ++++++
++++ 2375 lines of diff (skipped)
++++++ fix-call-of-onnxInitGraph.patch ++++++
From 872d5e67e06e8fbde32d31dd91e07fc137677d9d Mon Sep 17 00:00:00 2001
From: Christian Goll
Date: Tue, 23 Jun 2020 16:55:25 +0200
Subject: [PATCH] fix call of onnxInitGraph() Removed max_seq_size_ as
onnxInitGraph() does not need this argument any more
---
caffe2/opt/onnxifi_op.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/caffe2/opt/onnxifi_op.h b/caffe2/opt/onnxifi_op.h
index c45ad7c9f8..b5df81ef03 100644
--- a/caffe2/opt/onnxifi_op.h
+++ b/caffe2/opt/onnxifi_op.h
@@ -256,7 +256,6 @@ class OnnxifiOp final : public Operator<Context> {
weight_descs.size(),
weight_descs.data(),
&graph,
- static_cast(max_seq_size_),
defered_blob_reader),
ONNXIFI_STATUS_SUCCESS);
--
2.25.0
++++++ fix-mov-operand-for-gcc.patch ++++++
From 5c318611978a7f9add5b889ad70e4af5b10a9c00 Mon Sep 17 00:00:00 2001
From: Edward Swarthout
Date: Sat, 8 Feb 2020 12:53:07 -0600
Subject: [PATCH] QNNPACK: q8gemm/8x8-dq-aarch64-neon.S fix mov operand for gcc
Unlike clang, GNU assembler does not support 4s on neon mov, so use 16b.
Fixes:
8x8-dq-aarch64-neon.S: Assembler messages:
8x8-dq-aarch64-neon.S:657:
Error: operand mismatch -- `mov V8.4s,V9.4s'
Info: did you mean this?
Info: mov v8.8b, v9.8b
Info: other valid variant(s):
Info: mov v8.16b, v9.16b
Signed-off-by: Edward Swarthout
---
.../cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S b/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S
index 7dc861110186a..60ad8d1d4b340 100644
--- a/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S
+++ b/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S
@@ -659,14 +659,14 @@ BEGIN_FUNCTION pytorch_q8gemm_dq_ukernel_8x8__aarch64_neon
SUB x1, x1, 4
- MOV V8.4s, V9.4s
- MOV v10.4s, v11.4s
- MOV v12.4s, V13.4s
- MOV V14.4s, V15.4s
- MOV V16.4s, V17.4s
- MOV V18.4s, V19.4s
- MOV V20.4s, V21.4s
- MOV V22.4s, V23.4s
+ MOV V8.16b, V9.16b
+ MOV v10.16b, v11.16b
+ MOV v12.16b, V13.16b
+ MOV V14.16b, V15.16b
+ MOV V16.16b, V17.16b
+ MOV V18.16b, V19.16b
+ MOV V20.16b, V21.16b
+ MOV V22.16b, V23.16b
5:
CMP x1, 2
++++++ gloo-7c541247a6fa49e5938e304ab93b6da661823d0f.tar.gz -> gloo-113bde13035594cafdca247be953610b53026553.tar.gz ++++++
++++ 2209 lines of diff (skipped)
++++++ onnx-fea8568cac61a482ed208748fdc0e1a8e47f62f5.tar.gz -> onnx-9fdae4c68960a2d44cd1cc871c74a6a9d469fa1f.tar.gz ++++++
++++ 9774 lines of diff (skipped)
++++++ psimd-90a938f30ba414ada2f4b00674ee9631d7d85e19.tar.gz -> psimd-10b4ffc6ea9e2e11668f86969586f88bc82aaefa.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/psimd-90a938f30ba414ada2f4b00674ee9631d7d85e19/LICENSE new/psimd-10b4ffc6ea9e2e11668f86969586f88bc82aaefa/LICENSE
--- old/psimd-90a938f30ba414ada2f4b00674ee9631d7d85e19/LICENSE 2018-09-06 18:11:46.000000000 +0200
+++ new/psimd-10b4ffc6ea9e2e11668f86969586f88bc82aaefa/LICENSE 2019-12-26 20:22:39.000000000 +0100
@@ -2,6 +2,7 @@
Copyright (c) 2017 Facebook Inc.
Copyright (c) 2014-2017 Georgia Institute of Technology
+Copyright 2019 Google LLC
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/psimd-90a938f30ba414ada2f4b00674ee9631d7d85e19/include/psimd.h new/psimd-10b4ffc6ea9e2e11668f86969586f88bc82aaefa/include/psimd.h
--- old/psimd-90a938f30ba414ada2f4b00674ee9631d7d85e19/include/psimd.h 2018-09-06 18:11:46.000000000 +0200
+++ new/psimd-10b4ffc6ea9e2e11668f86969586f88bc82aaefa/include/psimd.h 2019-12-26 20:22:39.000000000 +0100
@@ -295,20 +295,84 @@
return *((const psimd_f32*) address);
}
+ PSIMD_INTRINSIC psimd_s8 psimd_load_splat_s8(const void* address) {
+ return psimd_splat_s8(*((const int8_t*) address));
+ }
+
+ PSIMD_INTRINSIC psimd_u8 psimd_load_splat_u8(const void* address) {
+ return psimd_splat_u8(*((const uint8_t*) address));
+ }
+
+ PSIMD_INTRINSIC psimd_s16 psimd_load_splat_s16(const void* address) {
+ return psimd_splat_s16(*((const int16_t*) address));
+ }
+
+ PSIMD_INTRINSIC psimd_u16 psimd_load_splat_u16(const void* address) {
+ return psimd_splat_u16(*((const uint16_t*) address));
+ }
+
+ PSIMD_INTRINSIC psimd_s32 psimd_load_splat_s32(const void* address) {
+ return psimd_splat_s32(*((const int32_t*) address));
+ }
+
+ PSIMD_INTRINSIC psimd_u32 psimd_load_splat_u32(const void* address) {
+ return psimd_splat_u32(*((const uint32_t*) address));
+ }
+
+ PSIMD_INTRINSIC psimd_f32 psimd_load_splat_f32(const void* address) {
+ return psimd_splat_f32(*((const float*) address));
+ }
+
+ PSIMD_INTRINSIC psimd_s32 psimd_load1_s32(const void* address) {
+ return (psimd_s32) { *((const int32_t*) address), 0, 0, 0 };
+ }
+
+ PSIMD_INTRINSIC psimd_u32 psimd_load1_u32(const void* address) {
+ return (psimd_u32) { *((const uint32_t*) address), 0, 0, 0 };
+ }
+
PSIMD_INTRINSIC psimd_f32 psimd_load1_f32(const void* address) {
return (psimd_f32) { *((const float*) address), 0.0f, 0.0f, 0.0f };
}
+ PSIMD_INTRINSIC psimd_s32 psimd_load2_s32(const void* address) {
+ const int32_t* address_s32 = (const int32_t*) address;
+ return (psimd_s32) { address_s32[0], address_s32[1], 0, 0 };
+ }
+
+ PSIMD_INTRINSIC psimd_u32 psimd_load2_u32(const void* address) {
+ const uint32_t* address_u32 = (const uint32_t*) address;
+ return (psimd_u32) { address_u32[0], address_u32[1], 0, 0 };
+ }
+
PSIMD_INTRINSIC psimd_f32 psimd_load2_f32(const void* address) {
const float* address_f32 = (const float*) address;
return (psimd_f32) { address_f32[0], address_f32[1], 0.0f, 0.0f };
}
+ PSIMD_INTRINSIC psimd_s32 psimd_load3_s32(const void* address) {
+ const int32_t* address_s32 = (const int32_t*) address;
+ return (psimd_s32) { address_s32[0], address_s32[1], address_s32[2], 0 };
+ }
+
+ PSIMD_INTRINSIC psimd_u32 psimd_load3_u32(const void* address) {
+ const uint32_t* address_u32 = (const uint32_t*) address;
+ return (psimd_u32) { address_u32[0], address_u32[1], address_u32[2], 0 };
+ }
+
PSIMD_INTRINSIC psimd_f32 psimd_load3_f32(const void* address) {
const float* address_f32 = (const float*) address;
return (psimd_f32) { address_f32[0], address_f32[1], address_f32[2], 0.0f };
}
+ PSIMD_INTRINSIC psimd_s32 psimd_load4_s32(const void* address) {
+ return psimd_load_s32(address);
+ }
+
+ PSIMD_INTRINSIC psimd_u32 psimd_load4_u32(const void* address) {
+ return psimd_load_u32(address);
+ }
+
PSIMD_INTRINSIC psimd_f32 psimd_load4_f32(const void* address) {
return psimd_load_f32(address);
}
@@ -403,16 +467,50 @@
*((psimd_f32*) address) = value;
}
+ PSIMD_INTRINSIC void psimd_store1_s32(void* address, psimd_s32 value) {
+ *((int32_t*) address) = value[0];
+ }
+
+ PSIMD_INTRINSIC void psimd_store1_u32(void* address, psimd_u32 value) {
+ *((uint32_t*) address) = value[0];
+ }
+
PSIMD_INTRINSIC void psimd_store1_f32(void* address, psimd_f32 value) {
*((float*) address) = value[0];
}
+ PSIMD_INTRINSIC void psimd_store2_s32(void* address, psimd_s32 value) {
+ int32_t* address_s32 = (int32_t*) address;
+ address_s32[0] = value[0];
+ address_s32[1] = value[1];
+ }
+
+ PSIMD_INTRINSIC void psimd_store2_u32(void* address, psimd_u32 value) {
+ uint32_t* address_u32 = (uint32_t*) address;
+ address_u32[0] = value[0];
+ address_u32[1] = value[1];
+ }
+
PSIMD_INTRINSIC void psimd_store2_f32(void* address, psimd_f32 value) {
float* address_f32 = (float*) address;
address_f32[0] = value[0];
address_f32[1] = value[1];
}
+ PSIMD_INTRINSIC void psimd_store3_s32(void* address, psimd_s32 value) {
+ int32_t* address_s32 = (int32_t*) address;
+ address_s32[0] = value[0];
+ address_s32[1] = value[1];
+ address_s32[2] = value[2];
+ }
+
+ PSIMD_INTRINSIC void psimd_store3_u32(void* address, psimd_u32 value) {
+ uint32_t* address_u32 = (uint32_t*) address;
+ address_u32[0] = value[0];
+ address_u32[1] = value[1];
+ address_u32[2] = value[2];
+ }
+
PSIMD_INTRINSIC void psimd_store3_f32(void* address, psimd_f32 value) {
float* address_f32 = (float*) address;
address_f32[0] = value[0];
@@ -420,6 +518,14 @@
address_f32[2] = value[2];
}
+ PSIMD_INTRINSIC void psimd_store4_s32(void* address, psimd_s32 value) {
+ psimd_store_s32(address, value);
+ }
+
+ PSIMD_INTRINSIC void psimd_store4_u32(void* address, psimd_u32 value) {
+ psimd_store_u32(address, value);
+ }
+
PSIMD_INTRINSIC void psimd_store4_f32(void* address, psimd_f32 value) {
psimd_store_f32(address, value);
}
@@ -553,65 +659,103 @@
#endif
}
+ /* Quasi-Fused Multiply-Add */
+ PSIMD_INTRINSIC psimd_f32 psimd_qfma_f32(psimd_f32 a, psimd_f32 b, psimd_f32 c) {
+ #if defined(__aarch64__) || defined(__ARM_NEON__) && defined(__ARM_FEATURE_FMA)
+ return (psimd_f32) vfmaq_f32((float32x4_t) a, (float32x4_t) b, (float32x4_t) c);
+ #elif (defined(__x86_64__) || defined(__i386__) || defined(__i686__)) && defined(__FMA__)
+ return (psimd_f32) _mm_fmadd_ps((__m128) c, (__m128) a, (__m128) b);
+ #elif (defined(__x86_64__) || defined(__i386__) || defined(__i686__)) && defined(__FMA4__)
+ return (psimd_f32) _mm_macc_ps((__m128) c, (__m128) a, (__m128) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_f32) __builtin_wasm_qfma_f32x4(a, b, c);
+ #else
+ return a + b * c;
+ #endif
+ }
+
+ PSIMD_INTRINSIC psimd_f32 psimd_div_f32(psimd_f32 a, psimd_f32 b) {
+ return a / b;
+ }
+
/* Vector and */
PSIMD_INTRINSIC psimd_f32 psimd_andmask_f32(psimd_s32 mask, psimd_f32 v) {
return (psimd_f32) (mask & (psimd_s32) v);
}
+ /* Vector and-not */
+ PSIMD_INTRINSIC psimd_f32 psimd_andnotmask_f32(psimd_s32 mask, psimd_f32 v) {
+ return (psimd_f32) (~mask & (psimd_s32) v);
+ }
+
/* Vector blend */
PSIMD_INTRINSIC psimd_s8 psimd_blend_s8(psimd_s8 mask, psimd_s8 a, psimd_s8 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_s8) vbslq_s8((uint8x16_t) mask, (int8x16_t) a, (int8x16_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_s8) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
}
- PSIMD_INTRINSIC psimd_u8 psimd_blend_u8(psimd_u8 mask, psimd_u8 a, psimd_u8 b) {
+ PSIMD_INTRINSIC psimd_u8 psimd_blend_u8(psimd_s8 mask, psimd_u8 a, psimd_u8 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_u8) vbslq_u8((uint8x16_t) mask, (uint8x16_t) a, (uint8x16_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_u8) __builtin_wasm_bitselect(a, b, mask);
#else
- return (mask & a) | (~mask & b);
+ return (psimd_u8) ((mask & (psimd_s8) a) | (~mask & (psimd_s8) b));
#endif
}
PSIMD_INTRINSIC psimd_s16 psimd_blend_s16(psimd_s16 mask, psimd_s16 a, psimd_s16 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_s16) vbslq_s16((uint16x8_t) mask, (int16x8_t) a, (int16x8_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_s16) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
}
- PSIMD_INTRINSIC psimd_u16 psimd_blend_u16(psimd_u16 mask, psimd_u16 a, psimd_u16 b) {
+ PSIMD_INTRINSIC psimd_u16 psimd_blend_u16(psimd_s16 mask, psimd_u16 a, psimd_u16 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_u16) vbslq_u16((uint16x8_t) mask, (uint16x8_t) a, (uint16x8_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_u16) __builtin_wasm_bitselect(a, b, mask);
#else
- return (mask & a) | (~mask & b);
+ return (psimd_u16) ((mask & (psimd_s16) a) | (~mask & (psimd_s16) b));
#endif
}
PSIMD_INTRINSIC psimd_s32 psimd_blend_s32(psimd_s32 mask, psimd_s32 a, psimd_s32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_s32) vbslq_s32((uint32x4_t) mask, (int32x4_t) a, (int32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_s32) __builtin_wasm_bitselect(a, b, mask);
#else
return (mask & a) | (~mask & b);
#endif
}
- PSIMD_INTRINSIC psimd_u32 psimd_blend_u32(psimd_u32 mask, psimd_u32 a, psimd_u32 b) {
+ PSIMD_INTRINSIC psimd_u32 psimd_blend_u32(psimd_s32 mask, psimd_u32 a, psimd_u32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_u32) vbslq_u32((uint32x4_t) mask, (uint32x4_t) a, (uint32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_u32) __builtin_wasm_bitselect(a, b, mask);
#else
- return (mask & a) | (~mask & b);
+ return (psimd_u32) ((mask & (psimd_s32) a) | (~mask & (psimd_s32) b));
#endif
}
PSIMD_INTRINSIC psimd_f32 psimd_blend_f32(psimd_s32 mask, psimd_f32 a, psimd_f32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_f32) vbslq_f32((uint32x4_t) mask, (float32x4_t) a, (float32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return (psimd_f32) __builtin_wasm_bitselect(a, b, mask);
#else
- return (psimd_f32) psimd_blend_s32(mask, (psimd_s32) a, (psimd_s32) b);
+ return (psimd_f32) ((mask & (psimd_s32) a) | (~mask & (psimd_s32) b));
#endif
}
@@ -621,7 +765,7 @@
}
PSIMD_INTRINSIC psimd_u8 psimd_signblend_u8(psimd_s8 x, psimd_u8 a, psimd_u8 b) {
- return psimd_blend_u8((psimd_u8) (x >> psimd_splat_s8(7)), a, b);
+ return psimd_blend_u8((x >> psimd_splat_s8(7)), a, b);
}
PSIMD_INTRINSIC psimd_s16 psimd_signblend_s16(psimd_s16 x, psimd_s16 a, psimd_s16 b) {
@@ -629,7 +773,7 @@
}
PSIMD_INTRINSIC psimd_u16 psimd_signblend_u16(psimd_s16 x, psimd_u16 a, psimd_u16 b) {
- return psimd_blend_u16((psimd_u16) (x >> psimd_splat_s16(15)), a, b);
+ return psimd_blend_u16((x >> psimd_splat_s16(15)), a, b);
}
PSIMD_INTRINSIC psimd_s32 psimd_signblend_s32(psimd_s32 x, psimd_s32 a, psimd_s32 b) {
@@ -637,7 +781,7 @@
}
PSIMD_INTRINSIC psimd_u32 psimd_signblend_u32(psimd_s32 x, psimd_u32 a, psimd_u32 b) {
- return psimd_blend_u32((psimd_u32) (x >> psimd_splat_s32(31)), a, b);
+ return psimd_blend_u32((x >> psimd_splat_s32(31)), a, b);
}
PSIMD_INTRINSIC psimd_f32 psimd_signblend_f32(psimd_f32 x, psimd_f32 a, psimd_f32 b) {
@@ -648,7 +792,7 @@
/* Vector absolute value */
PSIMD_INTRINSIC psimd_f32 psimd_abs_f32(psimd_f32 v) {
const psimd_s32 mask = (psimd_s32) psimd_splat_f32(-0.0f);
- return (psimd_f32) ((psimd_s32) v & mask);
+ return (psimd_f32) ((psimd_s32) v & ~mask);
}
/* Vector negation */
@@ -709,6 +853,8 @@
PSIMD_INTRINSIC psimd_f32 psimd_max_f32(psimd_f32 a, psimd_f32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_f32) vmaxq_f32((float32x4_t) a, (float32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return __builtin_wasm_max_f32x4(a, b);
#else
return psimd_blend_f32(a > b, a, b);
#endif
@@ -766,6 +912,8 @@
PSIMD_INTRINSIC psimd_f32 psimd_min_f32(psimd_f32 a, psimd_f32 b) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
return (psimd_f32) vminq_f32((float32x4_t) a, (float32x4_t) b);
+ #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__)
+ return __builtin_wasm_min_f32x4(a, b);
#else
return psimd_blend_f32(a < b, a, b);
#endif
++++++ pthreadpool-13da0b4c21d17f94150713366420baaf1b5a46f4.tar.gz -> pthreadpool-d465747660ecf9ebbaddf8c3db37e4a13d0c9103.tar.gz ++++++
++++ 5728 lines of diff (skipped)
++++++ pytorch-1.4.0.tar.gz -> pytorch-1.5.1.tar.gz ++++++
/work/SRC/openSUSE:Factory/python-torch/pytorch-1.4.0.tar.gz /work/SRC/openSUSE:Factory/.python-torch.new.3060/pytorch-1.5.1.tar.gz differ: char 13, line 1
++++++ releases.html ++++++
++++ 4843 lines (skipped)
++++ between /work/SRC/openSUSE:Factory/python-torch/releases.html
++++ and /work/SRC/openSUSE:Factory/.python-torch.new.3060/releases.html