Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package xsimd for openSUSE:Factory checked in at 2024-07-11 20:29:23 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/xsimd (Old) and /work/SRC/openSUSE:Factory/.xsimd.new.17339 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "xsimd" Thu Jul 11 20:29:23 2024 rev:8 rq:1186600 version:13.0.0 Changes: -------- --- /work/SRC/openSUSE:Factory/xsimd/xsimd.changes 2024-05-29 19:35:35.335917186 +0200 +++ /work/SRC/openSUSE:Factory/.xsimd.new.17339/xsimd.changes 2024-07-11 20:29:31.776822818 +0200 @@ -1,0 +2,7 @@ +Thu Jul 4 20:21:59 UTC 2024 - Christophe Marin <christophe@krop.fr> + +- Add upstream changes fixing some krita issues: + * 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch + * 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch + +------------------------------------------------------------------- New: ---- 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch BETA DEBUG BEGIN: New:- Add upstream changes fixing some krita issues: * 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch * 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch New: * 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch * 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch BETA DEBUG END: ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ xsimd.spec ++++++ --- /var/tmp/diff_new_pack.vLxnmb/_old 2024-07-11 20:29:34.844936200 +0200 +++ /var/tmp/diff_new_pack.vLxnmb/_new 2024-07-11 20:29:34.844936200 +0200 @@ -24,6 +24,9 @@ Group: Development/Libraries/C and C++ URL: https://xsimd.readthedocs.io/en/latest/ Source0: https://github.com/xtensor-stack/xsimd/archive/refs/tags/%{version}.tar.gz#/%{name}-%{version}.tar.gz +# PATCH-FIX-UPSTREAM -- https://mail.kde.org/pipermail/distributions/2024-July/001511.html +Patch0: 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch +Patch1: 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch BuildRequires: cmake BuildRequires: doctest-devel BuildRequires: doxygen ++++++ 0001-Fix-xsimd-available_architectures-.has-for-sve-and-r.patch ++++++ From 4f91d4a44eb9476572cf49a96cbe658eb871f47c Mon Sep 17 00:00:00 2001 From: Dmitry Kazakov <dimula73@gmail.com> Date: Fri, 14 Jun 2024 10:19:55 +0200 Subject: [PATCH 1/2] Fix xsimd::available_architectures().has() for sve and rvv archs Ideally the patch CPU detection code should also check if the length of SVE and RVV is actually supported by the current CPU implementation (i.e. ZCR_Elx.LEN register for SVE and something else for RVV), but I don't have such CPUs/emulators handy, so I cannot add such checks. Given that xsimd::available_architectures().has() is a new feature of XSIMD13 and the length check has never been present in XSIMD, this bug is not a regression at least. The patch also adds a unittest that reproduces the error the patch fixes --- include/xsimd/config/xsimd_cpuid.hpp | 12 ++++++++++-- test/test_arch.cpp | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index f22089b..30a9da2 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -42,6 +42,10 @@ namespace xsimd #define ARCH_FIELD_EX(arch, field_name) \ unsigned field_name; \ XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } + +#define ARCH_FIELD_EX_REUSE(arch, field_name) \ + XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } + #define ARCH_FIELD(name) ARCH_FIELD_EX(name, name) ARCH_FIELD(sse2) @@ -72,8 +76,12 @@ namespace xsimd ARCH_FIELD(neon) ARCH_FIELD(neon64) ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64) - ARCH_FIELD(sve) - ARCH_FIELD(rvv) + ARCH_FIELD_EX(detail::sve<512>, sve) + ARCH_FIELD_EX_REUSE(detail::sve<256>, sve) + ARCH_FIELD_EX_REUSE(detail::sve<128>, sve) + ARCH_FIELD_EX(detail::rvv<512>, rvv) + ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv) + ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv) ARCH_FIELD(wasm) #undef ARCH_FIELD diff --git a/test/test_arch.cpp b/test/test_arch.cpp index b420733..f1f50d5 100644 --- a/test/test_arch.cpp +++ b/test/test_arch.cpp @@ -38,6 +38,16 @@ struct check_supported } }; +struct check_cpu_has_intruction_set +{ + template <class Arch> + void operator()(Arch arch) const + { + static_assert(std::is_same<decltype(xsimd::available_architectures().has(arch)), bool>::value, + "cannot test instruction set availability on CPU"); + } +}; + struct check_available { template <class Arch> @@ -71,6 +81,11 @@ TEST_CASE("[multi arch support]") xsimd::supported_architectures::for_each(check_supported {}); } + SUBCASE("xsimd::available_architectures::has") + { + xsimd::all_architectures::for_each(check_cpu_has_intruction_set {}); + } + SUBCASE("xsimd::default_arch::name") { constexpr char const* name = xsimd::default_arch::name(); -- 2.45.2 ++++++ 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch ++++++ From c2974c874e14557490eab76d2eebf9f8b9eb88f1 Mon Sep 17 00:00:00 2001 From: Dmitry Kazakov <dimula73@gmail.com> Date: Tue, 28 May 2024 22:21:08 +0200 Subject: [PATCH 2/2] Fix detection of SSE/AVX/AVX512 when they are explicitly disabled by OS Some CPU vulnerability mitigations may disable AVX functionality on the hardware level via the XCR0 register. We should check that manually to verify that OS actually allows us to use this feature. See https://bugs.kde.org/show_bug.cgi?id=484622 Fix #1025 --- include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------ 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 30a9da2..8021fce 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -122,6 +122,35 @@ namespace xsimd #endif #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) + + auto get_xcr0_low = []() noexcept + { + uint32_t xcr0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + xcr0 = (uint32_t)_xgetbv(0); + +#elif defined(__GNUC__) + + __asm__( + "xorl %%ecx, %%ecx\n" + "xgetbv\n" + : "=a"(xcr0) + : +#if defined(__i386__) + : "ecx", "edx" +#else + : "rcx", "rdx" +#endif + ); + +#else /* _MSC_VER < 1400 */ +#error "_MSC_VER < 1400 is not supported" +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + return xcr0; + }; + auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept { @@ -156,19 +185,43 @@ namespace xsimd get_cpuid(regs1, 0x1); - sse2 = regs1[3] >> 26 & 1; - sse3 = regs1[2] >> 0 & 1; - ssse3 = regs1[2] >> 9 & 1; - sse4_1 = regs1[2] >> 19 & 1; - sse4_2 = regs1[2] >> 20 & 1; - fma3_sse42 = regs1[2] >> 12 & 1; + // OS can explicitly disable the usage of SSE/AVX extensions + // by setting an appropriate flag in CR0 register + // + // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html + + unsigned sse_state_os_enabled = 1; + unsigned avx_state_os_enabled = 1; + unsigned avx512_state_os_enabled = 1; + + // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit + // 18] to enable XSETBV/XGETBV instructions to access XCR0 and + // to support processor extended state management using + // XSAVE/XRSTOR. + bool osxsave = regs1[2] >> 27 & 1; + if (osxsave) + { + + uint32_t xcr0 = get_xcr0_low(); + + sse_state_os_enabled = xcr0 >> 1 & 1; + avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled; + avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled; + } + + sse2 = regs1[3] >> 26 & sse_state_os_enabled; + sse3 = regs1[2] >> 0 & sse_state_os_enabled; + ssse3 = regs1[2] >> 9 & sse_state_os_enabled; + sse4_1 = regs1[2] >> 19 & sse_state_os_enabled; + sse4_2 = regs1[2] >> 20 & sse_state_os_enabled; + fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled; - avx = regs1[2] >> 28 & 1; + avx = regs1[2] >> 28 & avx_state_os_enabled; fma3_avx = avx && fma3_sse42; int regs8[4]; get_cpuid(regs8, 0x80000001); - fma4 = regs8[2] >> 16 & 1; + fma4 = regs8[2] >> 16 & avx_state_os_enabled; // sse4a = regs[2] >> 6 & 1; @@ -176,23 +229,23 @@ namespace xsimd int regs7[4]; get_cpuid(regs7, 0x7); - avx2 = regs7[1] >> 5 & 1; + avx2 = regs7[1] >> 5 & avx_state_os_enabled; int regs7a[4]; get_cpuid(regs7a, 0x7, 0x1); - avxvnni = regs7a[0] >> 4 & 1; + avxvnni = regs7a[0] >> 4 & avx_state_os_enabled; fma3_avx2 = avx2 && fma3_sse42; - avx512f = regs7[1] >> 16 & 1; - avx512cd = regs7[1] >> 28 & 1; - avx512dq = regs7[1] >> 17 & 1; - avx512bw = regs7[1] >> 30 & 1; - avx512er = regs7[1] >> 27 & 1; - avx512pf = regs7[1] >> 26 & 1; - avx512ifma = regs7[1] >> 21 & 1; - avx512vbmi = regs7[2] >> 1 & 1; - avx512vnni_bw = regs7[2] >> 11 & 1; + avx512f = regs7[1] >> 16 & avx512_state_os_enabled; + avx512cd = regs7[1] >> 28 & avx512_state_os_enabled; + avx512dq = regs7[1] >> 17 & avx512_state_os_enabled; + avx512bw = regs7[1] >> 30 & avx512_state_os_enabled; + avx512er = regs7[1] >> 27 & avx512_state_os_enabled; + avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; + avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; + avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; + avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; avx512vnni_vbmi = avx512vbmi && avx512vnni_bw; #endif } -- 2.45.2