Hello community,
here is the log from the commit of package libatlas3 for openSUSE:Factory checked in at 2015-08-11 08:26:22
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/libatlas3 (Old)
and /work/SRC/openSUSE:Factory/.libatlas3.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "libatlas3"
Changes:
--------
--- /work/SRC/openSUSE:Factory/libatlas3/libatlas3.changes 2015-06-09 12:25:13.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.libatlas3.new/libatlas3.changes 2015-08-11 08:26:23.000000000 +0200
@@ -1,0 +2,40 @@
+Sun Aug 9 13:01:20 UTC 2015 - p.drouand@gmail.com
+
+- Update to version 3.10.2
+ * Fixed all errataed bugs:
+ + Failure to init workspace can cause NaNs in SYRK
+ + Complex row-major Q-type factorizations produce bad TAU
+ + Failure to cast causes integer overflow on 64-byt platforms
+ + Missing IBM S390 assembly file
+ * Fixed Make.bin to have threaded latime built to do parallel cache flushing
+ * Extended extract string lengths as patched by SAGE folks
+ * Backported fixes & some arch support to configure framework, including
+ host of Itanium and UST1 stuff provided by SAGE folks
+ NOTE: 3.10.2 is terribly out of date, and was released only because the
+ threading rewrite it taking too long. If possible, you should use a
+ developer release after testing that it works for your particular
+ platform. In particular, developer releases are *much* faster for any
+ x86 that uses AVX or later SIMD ISA, or any machine with ncores >= 8.
+ The developer release also supports ARM architectures better (though
+ performance is not hugely better if you can get stable installed).
+
+-------------------------------------------------------------------
+Wed Aug 5 13:05:41 UTC 2015 - normand@linux.vnet.ibm.com
+
+- For ppc64/ppc64le architectures:
+ Add support of Power8 cpu
+ Do not support lvx files for ppc64le (temporarily)
+ In spec create power8 archives files if do not exist yet
+ POWER864VSX from POWER764VSX and
+ POWER864LEVSX from POWER764LEVSX
+ removed patch:
+ xlf.command.not.found.patch
+ libatlas.ppc64le-abiv2.patch
+ new patches:
+ issue_64.patch
+ atlas.3.10.1-ppc64le_abiv2.patch
+ atlas-new_archdef_for_ppc64le.patch
+ atlas.3.10.1-add_power8_cpu.patch
+ atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
+
+-------------------------------------------------------------------
Old:
----
atlas3.10.1.tar.bz2
libatlas.ppc64le-abiv2.patch
xlf.command.not.found.patch
New:
----
atlas-new_archdef_for_ppc64le.patch
atlas.3.10.1-add_power8_cpu.patch
atlas.3.10.1-ppc64le_abiv2.patch
atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
atlas3.10.2.tar.bz2
issue_64.patch
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ libatlas3.spec ++++++
--- /var/tmp/diff_new_pack.sslbli/_old 2015-08-11 08:26:24.000000000 +0200
+++ /var/tmp/diff_new_pack.sslbli/_new 2015-08-11 08:26:24.000000000 +0200
@@ -19,7 +19,7 @@
%define enable_native_atlas 0
Name: libatlas3
-Version: 3.10.1
+Version: 3.10.2
Release: 0
Summary: Automatically Tuned Linear Algebra Software
License: BSD-3-Clause and GPL-2.0
@@ -33,12 +33,16 @@
Source5: %name-rpmlintrc
Patch0: atlas-suse-shared.patch
Patch1: atlas-hack.patch
-# for ppc64le
-# http://sourceforge.net/p/math-atlas/mailman/message/32471499/
+# for ppc64 ppc64le
+# https://bugzilla.redhat.com/show_bug.cgi?id=1080073#c40
Patch10: getdoublearr.stripwhite.patch
-Patch11: xlf.command.not.found.patch
+Patch11: issue_64.patch
Patch12: initialize_malloc_memory.invtrsm.wms.oct23.patch
-Patch13: libatlas.ppc64le-abiv2.patch
+Patch13: atlas.3.10.1-ppc64le_abiv2.patch
+Patch14: atlas-new_archdef_for_ppc64le.patch
+Patch15: atlas.3.10.1-add_power8_cpu.patch
+# for ppc64le tempo patch
+Patch16: atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
BuildRoot: %{_tmppath}/%{name}-%{version}-build
BuildRequires: gcc-fortran
@@ -194,16 +198,51 @@
%ifarch x86_64 i586
%patch1
%endif
-%ifarch ppc64le
+%ifarch ppc64le ppc64
%patch10 -p1
%patch11 -p1
%patch12 -p1
%patch13 -p1
+%patch14 -p1
+%patch15 -p1
+%endif
+%ifarch ppc64le
+%patch16 -p1
%endif
cp %{SOURCE2} doc
cp %{SOURCE3} %{SOURCE4} CONFIG/ARCHS/
+# if Power8 archdef do not exist yet
+# then use the Power7 one that may be the same.
+# do that for BE and LE:
+%ifarch ppc64 ppc64le
+P8archdef='POWER864VSX.tar.bz2'
+P7archdef='POWER764VSX.tar.bz2'
+if [ ! -e CONFIG/ARCHS/${P8archdef} ]; then
+ cp CONFIG/ARCHS/${P7archdef} /tmp/
+ pushd /tmp
+ tar -xjf ${P7archdef}
+ rm -rf POWER864VSX
+ mv POWER764VSX POWER864VSX
+ tar -cjf ${P8archdef} POWER864VSX
+ popd
+ mv /tmp/${P8archdef} CONFIG/ARCHS/
+fi
+P8archdef='POWER864LEVSX.tar.bz2'
+P7archdef='POWER764LEVSX.tar.bz2'
+if [ ! -e CONFIG/ARCHS/${P8archdef} ]; then
+ cp CONFIG/ARCHS/${P7archdef} /tmp/
+ pushd /tmp
+ tar -xjf ${P7archdef}
+ rm -rf POWER864LEVSX
+ mv POWER764LEVSX POWER864LEVSX
+ tar -cjf ${P8archdef} POWER864LEVSX
+ popd
+ mv /tmp/${P8archdef} CONFIG/ARCHS/
+fi
+%endif
+
%build
for type in %{types}; do
if [ "$type" = "base" ]; then
@@ -239,12 +278,6 @@
sed -i 's#-m64#-m32#g' Make.inc
%endif
-# use the provided archdef file for ppc64le
-# and force its usage in INSTFLAGS.
-%ifarch ppc64le
- sed -i 's#\(ARCH = POWER.64\)VSX#\1LEVSX#' Make.inc
- sed -i 's#\(INSTFLAGS =.*\) -a 0#\1 -a 1#' Make.inc
-%endif
make build %{?_smp_mflags}
cd lib
make shared %{?_smp_mflags}
++++++ atlas-new_archdef_for_ppc64le.patch ++++++
Subject: atlas new archdef for ppc64le
From: Michel Normand
Date: Sun, 13 Jun 2014 18:02:47 +0200
Need to define different archdef names
for ppc64 (that is Big Endian) and ppc64le (that is Little Endian).
This is already done upstream in atlas 3.11.30 with issue
https://sourceforge.net/p/math-atlas/patches/66/
Required at least as long as I need the bypass of
atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
Signed-off-by: Michel Normand
---
CONFIG/src/SpewMakeInc.c | 4 ++++
1 file changed, 4 insertions(+)
Index: ATLAS/CONFIG/src/SpewMakeInc.c
===================================================================
--- ATLAS.orig/CONFIG/src/SpewMakeInc.c
+++ ATLAS/CONFIG/src/SpewMakeInc.c
@@ -542,6 +542,10 @@ int main(int nargs, char **args)
fprintf(fpout, "# -------------------------------------------------\n");
fprintf(fpout, " ARCH = %s", machnam[mach]);
fprintf(fpout, "%d", ptrbits);
+ /* for ppc64le archi add 'LE' characters */
+ #if defined(__powerpc64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+ fprintf(fpout, "%s", "LE");
+ #endif
if (ISAX)
fprintf(fpout, "%s", ISAXNAM[ISAX]);
if (!USEIEEE)
++++++ atlas.3.10.1-add_power8_cpu.patch ++++++
From: Michel Normand
Subject: atlas.3.10.1 add power8 cpu
Date: Thu, 18 Sep 2014 15:13:24 +0200
atlas.3.10.1 add Power8 cpu
tracked upstream by issue 67
https://sourceforge.net/p/math-atlas/patches/67/
Signed-off-by: Michel Normand
---
CONFIG/ARCHS/Make.ext | 7 +++++++
CONFIG/include/atlconf.h | 6 +++---
CONFIG/src/atlcomp.txt | 6 ++++++
CONFIG/src/backend/archinfo_aix.c | 2 ++
CONFIG/src/backend/archinfo_linux.c | 1 +
include/atlas_pca.h | 2 +-
6 files changed, 20 insertions(+), 4 deletions(-)
Index: ATLAS/CONFIG/ARCHS/Make.ext
===================================================================
--- ATLAS.orig/CONFIG/ARCHS/Make.ext
+++ ATLAS/CONFIG/ARCHS/Make.ext
@@ -33,6 +33,7 @@ files = AMD64K10h32SSE3.tar.bz2 AMD64K10
MIPSR1xK64.tar.bz2 Makefile P432SSE2.tar.bz2 P4E32SSE3.tar.bz2 \
P4E64SSE3.tar.bz2 PIII32SSE1.tar.bz2 POWER432.tar.bz2 \
POWER464.tar.bz2 POWER564.tar.bz2 POWER764VSX.tar.bz2 \
+ POWER864VSX.tar.bz2 \
PPCG432AltiVec.tar.bz2 PPCG532AltiVec.tar.bz2 PPCG564AltiVec.tar.bz2 \
PPRO32.tar.bz2 USIII32.tar.bz2 USIII64.tar.bz2 USIV32.tar.bz2 \
USIV64.tar.bz2 UST232.tar.bz2 UST264.tar.bz2 atlas_test1.1.3.tar.bz2 \
@@ -302,6 +303,12 @@ POWER764VSX.tar.bz2 : $(basdr)/POWER764V
/tmp/POWER764VSX.tar POWER764VSX
bzip2 /tmp/POWER764VSX.tar
mv /tmp/POWER764VSX.tar.bz2 ./.
+POWER864VSX.tar.bz2 : $(basdr)/POWER864VSX
+ - rm -f /tmp/POWER864VSX.tar /tmp/POWER864VSX.tar.bz2
+ cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \
+ /tmp/POWER864VSX.tar POWER864VSX
+ bzip2 /tmp/POWER864VSX.tar
+ mv /tmp/POWER864VSX.tar.bz2 ./.
IBMz1032.tar.bz2 : $(basdr)/IBMz1032
- rm -f /tmp/IBMz1032.tar /tmp/IBMz1032.tar.bz2
cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \
Index: ATLAS/CONFIG/include/atlconf.h
===================================================================
--- ATLAS.orig/CONFIG/include/atlconf.h
+++ ATLAS/CONFIG/include/atlconf.h
@@ -18,10 +18,10 @@ enum OSTYPE {OSOther=0, OSLinux, OSSunOS
enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
AFARM, AFS390};
-#define NMACH 47
+#define NMACH 48
static char *machnam[NMACH] =
{"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5",
- "POWER6", "POWER7", "IBMz9", "IBMz10", "IBMz196",
+ "POWER6", "POWER7", "POWER8", "IBMz9", "IBMz10", "IBMz196",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Atom", "P4", "P4E",
@@ -30,7 +30,7 @@ static char *machnam[NMACH] =
"USI", "USII", "USIII", "USIV", "UST2", "UnknownUS",
"MIPSR1xK", "MIPSICE9", "ARMv7"};
enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5,
- IbmPwr6, IbmPwr7,
+ IbmPwr6, IbmPwr7, IbmPwr8,
IbmZ9, IbmZ10, IbmZ196, /* s390(x) in Linux */
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
Index: ATLAS/CONFIG/src/atlcomp.txt
===================================================================
--- ATLAS.orig/CONFIG/src/atlcomp.txt
+++ ATLAS/CONFIG/src/atlcomp.txt
@@ -186,6 +186,10 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc
'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2'
MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc
'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 -mvrsave'
+MACH=POWER8 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
+ 'gcc' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave -funroll-all-loops'
+MACH=POWER8 OS=ALL LVL=1010 COMPS=f77
+ 'gfortran' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave -funroll-all-loops'
MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave -funroll-all-loops'
MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
@@ -206,6 +210,8 @@ MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dm
'gcc' '-mcpu=power4 -mtune=power4 -O3 -fno-schedule-insns -fno-rerun-loop-opt'
MACH=POWER4 OS=ALL LVL=1010 COMPS=f77
'xlf' '-qtune=pwr4 -qarch=pwr4 -O3 -qmaxmem=-1 -qfloat=hsflt'
+MACH=POWER8 OS=ALL LVL=1010 COMPS=f77
+ 'xlf' '-qtune=pwr8 -qarch=pwr8 -O3 -qmaxmem=-1 -qfloat=hsflt'
#
# IBM System z or zEnterprise.
# These compiler flags given by IBM; -O3 -funroll-loops are chosen because
Index: ATLAS/CONFIG/src/backend/archinfo_linux.c
===================================================================
--- ATLAS.orig/CONFIG/src/backend/archinfo_linux.c
+++ ATLAS/CONFIG/src/backend/archinfo_linux.c
@@ -77,6 +77,7 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "7455")) mach = PPCG4;
else if (strstr(res, "PPC970FX")) mach = PPCG5;
else if (strstr(res, "PPC970MP")) mach = PPCG5;
+ else if (strstr(res, "POWER8")) mach = IbmPwr8;
else if (strstr(res, "POWER7")) mach = IbmPwr7;
else if (strstr(res, "POWER6")) mach = IbmPwr6;
else if (strstr(res, "POWER5")) mach = IbmPwr5;
Index: ATLAS/include/atlas_pca.h
===================================================================
--- ATLAS.orig/include/atlas_pca.h
+++ ATLAS/include/atlas_pca.h
@@ -26,7 +26,7 @@
#endif
#elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \
defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \
- defined(ATL_ARCH_POWER7)
+ defined(ATL_ARCH_POWER7) || defined(ATL_ARCH_POWER8)
#ifdef __GNUC__
#define ATL_membarrier __asm__ __volatile__ ("dcs")
/* #define ATL_USEPCA 1 */
Index: ATLAS/CONFIG/src/backend/archinfo_aix.c
===================================================================
--- ATLAS.orig/CONFIG/src/backend/archinfo_aix.c
+++ ATLAS/CONFIG/src/backend/archinfo_aix.c
@@ -67,6 +67,8 @@ enum MACHTYPE ProbeArch()
{
if (strstr(res, "PowerPC_POWER5"))
mach = IbmPwr5;
+ else if (strstr(res, "PowerPC_POWER8"))
+ mach = IbmPwr8;
else if (strstr(res, "PowerPC_POWER7"))
mach = IbmPwr7;
else if (strstr(res, "PowerPC_POWER6"))
++++++ atlas.3.10.1-ppc64le_abiv2.patch ++++++
From: Michel Normand
Subject: atlas.ppc64le abiv2
Date: Mon, 14 Apr 2014 18:03:06 +0200
References: http://sourceforge.net/p/math-atlas/mailman/message/32471499/
atlas.ppc64le abiv2
* do not use opd section for ABI V2
* define TOC in r2 in function call
TODO: may be not required everywhere.
based on work of Guy and Thierry
TODO: still have to work on stack FSIZE
TODO: for ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
need to better understand the change about ld pC0
Signed-off-by: Michel Normand
---
CONFIG/src/backend/probe_AltiVec.S | 2 +-
CONFIG/src/backend/probe_VSX.S | 2 +-
src/threads/ATL_DecAtomicCount_ppc.S | 2 +-
src/threads/ATL_ResetAtomicCount_ppc.S | 2 +-
tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c | 9 ++++++++-
tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c | 9 ++++++++-
tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c | 9 ++++++++-
tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c | 20 ++++++++++++++++++--
tune/blas/gemm/CASES/ATL_smm4x4x128_av.c | 23 ++++++++++++++++++++++-
9 files changed, 68 insertions(+), 10 deletions(-)
Index: ATLAS/CONFIG/src/backend/probe_AltiVec.S
===================================================================
--- ATLAS.orig/CONFIG/src/backend/probe_AltiVec.S
+++ ATLAS/CONFIG/src/backend/probe_AltiVec.S
@@ -6,7 +6,7 @@
*
*/
.text
-#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux)
+#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux) && _CALL_ELF != 2
.align 2
.globl ATL_asmdecor(do_vsum)
.section ".opd","aw"
Index: ATLAS/CONFIG/src/backend/probe_VSX.S
===================================================================
--- ATLAS.orig/CONFIG/src/backend/probe_VSX.S
+++ ATLAS/CONFIG/src/backend/probe_VSX.S
@@ -6,7 +6,7 @@
*
*/
.text
-#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux)
+#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux) && _CALL_ELF != 2
.align 2
.globl ATL_asmdecor(do_vsum)
.section ".opd","aw"
Index: ATLAS/src/threads/ATL_DecAtomicCount_ppc.S
===================================================================
--- ATLAS.orig/src/threads/ATL_DecAtomicCount_ppc.S
+++ ATLAS/src/threads/ATL_DecAtomicCount_ppc.S
@@ -4,7 +4,7 @@
.globl _ATL_DecAtomicCount
_ATL_DecAtomicCount:
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
Index: ATLAS/src/threads/ATL_ResetAtomicCount_ppc.S
===================================================================
--- ATLAS.orig/src/threads/ATL_ResetAtomicCount_ppc.S
+++ ATLAS/src/threads/ATL_ResetAtomicCount_ppc.S
@@ -4,7 +4,7 @@
.globl _ATL_ResetAtomicCount
_ATL_ResetAtomicCount:
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
@@ -181,7 +181,7 @@ void ATL_USERMM(const int M, const int N
.globl Mjoin(_,ATL_USERMM)
Mjoin(_,ATL_USERMM):
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -258,8 +258,15 @@ ATL_USERMM:
eqv r0, r0, r0 /* all 1s */
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
#if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+/* ABIv2 */
+ ld pC0, FSIZE+104(r1)
+ ld ldc, FSIZE+112(r1)
+#else
+/* ABIv1 */
ld pC0, FSIZE+120(r1)
ld ldc, FSIZE+128(r1)
+#endif
#elif defined(ATL_AS_OSX_PPC)
lwz pC0, FSIZE+60(r1)
lwz ldc, FSIZE+64(r1)
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
@@ -279,7 +279,7 @@ void ATL_USERMM(const int M, const int N
#endif
.text
#ifdef ATL_GAS_LINUX_PPC
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* No idea what this does, but seg fault without it (I think it is
* partially resp for making code callable from both static & PIC code)
@@ -405,8 +405,15 @@ Mjoin(_,ATL_USERMM):
*/
#ifdef ATL_GAS_LINUX_PPC
#ifdef ATL_USE64BITS
+ #if _CALL_ELF == 2
+ /* ABIv2 */
+ ld pC0, 104(r1)
+ ld incCn, 112(r1)
+ #else
+ /* ABIv1 */
ld pC0, 120(r1)
ld incCn, 128(r1)
+ #endif
#else
lwz incCn, FSIZE+8(r1)
#endif
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
@@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM):
.globl Mjoin(_,ATL_USERMM)
Mjoin(_,ATL_USERMM):
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -324,8 +324,15 @@ ATL_USERMM:
#endif
#ifdef ATL_USE64BITS
+#if _CALL_ELF == 2
+/* ABIv2 */
+ ld pC0, 104(r1)
+ ld incCn, 112(r1)
+#else
+/* ABIv1 */
ld pC0, 120(r1)
ld incCn, 128(r1)
+#endif
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
lwz pC0, 68(r1)
lwz incCn, 72(r1)
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
const TYPE beta, TYPE *C, const int ldc)
(r10) 8(r1)
*******************************************************************************
-64 bit ABIs:
+64 bit ABIv1s:
r3 r4 r5 r6/f1
void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
r7 r8 r9 r10
const TYPE *A, const int lda, const TYPE *B, const int ldb,
f2 120(r1) 128(r1)
const TYPE beta, TYPE *C, const int ldc)
+
+64 bit ABIv2s:
+ r3 r4 r5 r6/f1
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
+ r7 r8 r9 r10
+ const TYPE *A, const int lda, const TYPE *B, const int ldb,
+ f2 104(r1) 112(r1)
+ const TYPE beta, TYPE *C, const int ldc)
#endif
#ifdef ATL_AS_AIX_PPC
.csect .text[PR]
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
.globl Mjoin(_,ATL_USERMM)
Mjoin(_,ATL_USERMM):
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -257,9 +265,17 @@ ATL_USERMM:
#endif
#endif
+
#if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+/* ABIv2 */
+ ld pC0, 104(r1)
+ ld incCn, 112(r1)
+#else
+/* ABIv1 */
ld pC0, 120(r1)
ld incCn, 128(r1)
+#endif
#elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
lwz pC0, 68(r1)
lwz incCn, 72(r1)
Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
@@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N
.globl Mjoin(_,ATL_USERMM)
Mjoin(_,ATL_USERMM):
#else
- #if defined(ATL_USE64BITS)
+ #if defined(ATL_USE64BITS) && _CALL_ELF != 2
/*
* Official Program Descripter section, seg fault w/o it on Linux/PPC64
*/
@@ -221,8 +221,15 @@ ATL_USERMM:
* kernel instead
*/
#if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+/* ABIv2 */
+ ld r10, 104(r1)
+ ld r5, 112(r1)
+#else
+/* ABIv1 */
ld r10, 120(r1)
ld r5, 128(r1)
+#endif
#elif defined(ATL_AS_OSX_PPC)
lwz r10, 60(r1)
lwz r5, 64(r1)
@@ -285,8 +292,15 @@ ATL_USERMM:
eqv r0, r0, r0 /* all 1s */
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
#if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+ /* ABIv2 */
+ ld pC0, FSIZE+104(r1)
+ ld ldc, FSIZE+112(r1)
+#else
+ /* ABIv1 */
ld pC0, FSIZE+120(r1)
ld ldc, FSIZE+128(r1)
+#endif
#elif defined(ATL_AS_OSX_PPC)
lwz pC0, FSIZE+60(r1)
lwz ldc, FSIZE+64(r1)
@@ -4258,8 +4272,15 @@ UNALIGNED_C:
eqv r0, r0, r0 /* all 1s */
ATL_WriteVRSAVE(r0) /* signal we use all vector regs */
#if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+ /* ABIv2 */
+ ld pC0, FSIZE+104(r1)
+ ld ldc, FSIZE+112(r1)
+#else
+ /* ABIv1 */
ld pC0, FSIZE+120(r1)
ld ldc, FSIZE+128(r1)
+#endif
#elif defined(ATL_AS_OSX_PPC)
lwz pC0, FSIZE+60(r1)
lwz ldc, FSIZE+64(r1)
++++++ atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch ++++++
From: Michel Normand
Subject: atlas.3.10.2 ppc64le do not use files with lvx
Date: Tue, 12 Aug 2014 16:07:06 +0200
ppc64le do not use files with lvx
This is a temporary patch as long as the related files
are not ported yet to ppc64 little-endian.
Warning: patch to be applied only for ppc64le architecture
and will also need atlas-new_archdef_for_ppc64le.patch
Signed-off-by: Michel Normand
---
tune/blas/gemm/CASES/ccases.flg | 6 +-----
tune/blas/gemm/CASES/dcases.flg | 8 +-------
tune/blas/gemm/CASES/dcases.vnb | 4 ----
tune/blas/gemm/CASES/scases.flg | 9 +--------
tune/blas/gemm/CASES/scases.vnb | 3 ---
tune/blas/gemm/CASES/zcases.flg | 8 +-------
6 files changed, 4 insertions(+), 34 deletions(-)
Index: ATLAS/tune/blas/gemm/CASES/ccases.flg
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ccases.flg
+++ ATLAS/tune/blas/gemm/CASES/ccases.flg
@@ -1,5 +1,5 @@
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
-24
+22
304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
gcc
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O
@@ -48,13 +48,9 @@ gcc
328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c "R. Clint Whaley" \
gcc
-fomit-frame-pointer -O2 -fno-tree-loop-optimize
-329 192 4 4 4 1 16 4 4 4 ATL_cmm4x4x128_av.c "R. Clint Whaley" \
-gcc
--x assembler-with-cpp
331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c "R. Clint Whaley" \
gcc
-x assembler-with-cpp -mips4
-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c "IBM"
333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \
gcc
-x assembler-with-cpp -mfpu=vfpv3
Index: ATLAS/tune/blas/gemm/CASES/scases.flg
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/scases.flg
+++ ATLAS/tune/blas/gemm/CASES/scases.flg
@@ -1,5 +1,5 @@
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
-25
+22
304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
gcc
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O
@@ -48,16 +48,9 @@ gcc
328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c "R. Clint Whaley" \
gcc
-fomit-frame-pointer -O2 -fno-tree-loop-optimize
-329 192 4 4 4 1 16 4 4 4 ATL_smm4x4x128_av.c "R. Clint Whaley" \
-gcc
--x assembler-with-cpp
-330 200 92 92 92 1 16 92 92 92 ATL_smm4x4x128_av.c "R. Clint Whaley" \
-gcc
--x assembler-with-cpp
331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c "R. Clint Whaley" \
gcc
-x assembler-with-cpp -mips4
-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c "IBM"
333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \
gcc
-x assembler-with-cpp -mfpu=vfpv3
Index: ATLAS/tune/blas/gemm/CASES/scases.vnb
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/scases.vnb
+++ ATLAS/tune/blas/gemm/CASES/scases.vnb
@@ -31,9 +31,6 @@
# Defaults: TA='t', TB='n', SSE=0, X87=0, LDBOT=1, RTKU=0, AOUTER=0,
# KBMAX=KU, KBMIN=KU, BETAN1=0, RTMN=1
#
-ID=1 ROUT='ATL_smm4x4x128_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=4 \
- LDKB=1 LDBOT=1 KBMIN=4 KBMAX=128 ASM=GAS_PPC \
- COMP='gcc' FLAGS='-x assembler-with-cpp'
ID=2 ROUT='ATL_smm4x4x16_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=16 \
LDKB=1 LDBOT=0 KBMIN=16 KBMAX=2048 ASM=GAS_SPARC \
COMP='gcc' FLAGS='-x assembler-with-cpp'
Index: ATLAS/tune/blas/gemm/CASES/dcases.flg
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/dcases.flg
+++ ATLAS/tune/blas/gemm/CASES/dcases.flg
@@ -1,5 +1,5 @@
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
-32
+30
306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
gcc
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns -fno-schedule-insns2
@@ -79,12 +79,6 @@ gcc
336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c "R. Clint Whaley" \
gcc
-x assembler-with-cpp -mips4
-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \
-gcc
--x assembler-with-cpp
-338 192 8 4 2 1 0 8 4 2 ATL_dmm8x4x2_vsx.c "IBM" \
-gcc
--O3 -mvsx
339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \
gcc
-x assembler-with-cpp -mfpu=vfpv3
Index: ATLAS/tune/blas/gemm/CASES/dcases.vnb
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/dcases.vnb
+++ ATLAS/tune/blas/gemm/CASES/dcases.vnb
@@ -53,10 +53,6 @@ ID=6 ROUT='ATL_dmm4x1x90_x87.c' AUTH='R
ID=7 ROUT='ATL_dmm8x1x120_sse2.c' AUTH='R. Clint Whaley' \
MU=8 NU=1 KU=1 KBMAX=512 ASM=GAS_x8664 BETAN1=1 \
COMP='gcc' FLAGS='-m64 -x assembler-with-cpp'
-ID=70 ROUT='ATL_dmm4x4x80_ppc.c' AUTH='R. Clint Whaley' TA='T', TB='N' \
- MU=4 NU=4 KU=1 KBMIN=1 KBMAX=80 ASM=GAS_PPC BETAN1=0 LDBOT=0 \
- LDAB=0 LDISKB=1 RTN=1 RTM=1 RTK=0 \
- COMP='gcc' FLAGS='-x assembler-with-cpp'
ID=80 ROUT='ATL_dmm4x4x16r8_US.c' AUTH='R. Clint Whaley' TA='T', TB='N' \
MU=4 NU=4 KU=24 KBMIN=24 KBMAX=512 ASM=GAS_SPARC BETAN1=0 \
LDAB=0 RTK=1 RTN=1 RTM=1 LDBOT=0 LDISKB=1 LDAB=1 \
Index: ATLAS/tune/blas/gemm/CASES/zcases.flg
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/zcases.flg
+++ ATLAS/tune/blas/gemm/CASES/zcases.flg
@@ -1,5 +1,5 @@
<ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
-31
+29
306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c "R. Clint Whaley" \
gcc
-mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns -fno-schedule-insns2
@@ -76,12 +76,6 @@ gcc
336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c "R. Clint Whaley" \
gcc
-x assembler-with-cpp -mips4
-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \
-gcc
--x assembler-with-cpp
-338 192 8 4 2 1 0 8 4 2 ATL_dmm8x4x2_vsx.c "IBM" \
-gcc
--O3 -mvsx
339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \
gcc
-x assembler-with-cpp -mfpu=vfpv3
++++++ atlas3.10.1.tar.bz2 -> atlas3.10.2.tar.bz2 ++++++
++++ 10483 lines of diff (skipped)
++++++ issue_64.patch ++++++
From: Michel Normand
Subject: issue 64
Date: Mon, 07 Jul 2014 17:15:03 +0200
issue 64, patch as suggested by Clint
but not tested by myself.
Signed-off-by: Michel Normand
---
tune/blas/level3/invtrsm.c | 3 +++
1 file changed, 3 insertions(+)
Index: ATLAS/tune/blas/level3/invtrsm.c
===================================================================
--- ATLAS.orig/tune/blas/level3/invtrsm.c
+++ ATLAS/tune/blas/level3/invtrsm.c
@@ -257,6 +257,9 @@ static void MakeHEDiagDom
int j;
const int lda2=(lda SHIFT), ldap1=((lda+1)SHIFT);
+ /* as per issue 64 */
+ Mjoin(PATL,gegen)(N, N, A, lda, N*N+lda);
+
if (Order == CblasRowMajor)
{
if (Uplo == CblasLower) Uplo = CblasUpper;