Hello community,
here is the log from the commit of package gcc41
checked in at Thu Feb 8 23:01:45 CET 2007.
--------
--- gcc41/cross-alpha-gcc-icecream-backend.changes 2007-01-29 20:14:46.000000000 +0100
+++ /mounts/work_src_done/STABLE/gcc41/cross-alpha-gcc-icecream-backend.changes 2007-02-07 10:23:25.621288000 +0100
@@ -1,0 +2,12 @@
+Tue Feb 6 12:06:14 CET 2007 - rguenther@suse.de
+
+- Add patch for PR20218, visibility fixes.
+- Add patch for wrong parentheses warnings.
+- Filter -ffortify and -fstack-protector from RPM_OPT_FLAGS.
+
+-------------------------------------------------------------------
+Tue Feb 6 12:07:00 CET 2007 - jw@suse.de
+
+- typo in binutils check fixed.
+
+-------------------------------------------------------------------
@@ -4 +16 @@
-- cross-avr now checks the binutils changelog rather than an
+- cross-avr now checks the binutils changelog rather than an
@@ -7,0 +20,8 @@
+Mon Jan 22 18:18:23 CET 2007 - rguenther@suse.de
+
+- Fix building ada with gcc42 in beta.
+- Include ada in testing.
+- Do not package libffi.la.
+- Add patches for AMD Family 10 and Power6 support.
+
+-------------------------------------------------------------------
cross-arm-gcc-icecream-backend.changes: same change
cross-avr-gcc.changes: same change
cross-hppa-gcc-icecream-backend.changes: same change
cross-i386-gcc-icecream-backend.changes: same change
cross-ia64-gcc-icecream-backend.changes: same change
cross-ppc64-gcc-icecream-backend.changes: same change
cross-ppc-gcc-icecream-backend.changes: same change
cross-s390-gcc-icecream-backend.changes: same change
cross-s390x-gcc-icecream-backend.changes: same change
cross-x86_64-gcc-icecream-backend.changes: same change
gcc41.changes: same change
libgcj41.changes: same change
New:
----
fix-ada-build.diff
gcc-4.1.0-power6.diff
gcc-amdfam10-suse-10.patch
gcc-amdfam10-suse-11.patch
gcc-amdfam10-suse-12.patch
gcc-amdfam10-suse-1.patch
gcc-amdfam10-suse-20.patch
gcc-amdfam10-suse-2.patch
gcc-amdfam10-suse-3.patch
gcc-amdfam10-suse-4.patch
gcc-amdfam10-suse-5.patch
gcc-amdfam10-suse-6.patch
gcc-amdfam10-suse-7.patch
gcc-amdfam10-suse-8.patch
gcc-amdfam10-suse-9.patch
parentheses-mathematical-5.diff
pr20218.patch
pr30113.patch
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ cross-alpha-gcc-icecream-backend.spec ++++++
--- /var/tmp/diff_new_pack.ch3703/_old 2007-02-08 22:57:00.000000000 +0100
+++ /var/tmp/diff_new_pack.ch3703/_new 2007-02-08 22:57:00.000000000 +0100
@@ -30,7 +30,7 @@
URL: http://gcc.gnu.org/
License: BSD License and BSD-like, GNU General Public License (GPL)
Version: 4.1.2_20070115
-Release: 2
+Release: 4
%define gcc_version %(echo %version | sed 's/_.*//')
%define snapshot_date %(echo %version | sed 's/[34]\.[0-4]\.[0-6]//' | sed 's/_/-/')
%define binsuffix -4.1
@@ -72,7 +72,10 @@
Patch97: nov189571-2.diff
Patch98: nvl199137.patch
Patch100: pr28755.patch
+Patch101: pr30113.patch
Patch105: pr29166.diff
+Patch106: fix-ada-build.diff
+Patch107: pr20218.patch
# A set of patches from the RH srpm
Patch51: gcc41-ia64-stack-protector.patch
Patch55: gcc41-java-slow_pthread_self.patch
@@ -94,6 +97,23 @@
Patch69: gcc41-ssse3.patch
Patch70: pr27880.diff
Patch72: Wprofile-mismatch.patch
+Patch73: parentheses-mathematical-5.diff
+# Greyhound (aka AMD Family 10h) support
+Patch200: gcc-amdfam10-suse-1.patch
+Patch201: gcc-amdfam10-suse-2.patch
+Patch202: gcc-amdfam10-suse-3.patch
+Patch203: gcc-amdfam10-suse-4.patch
+Patch204: gcc-amdfam10-suse-5.patch
+Patch205: gcc-amdfam10-suse-6.patch
+Patch206: gcc-amdfam10-suse-7.patch
+Patch207: gcc-amdfam10-suse-8.patch
+Patch208: gcc-amdfam10-suse-9.patch
+Patch209: gcc-amdfam10-suse-10.patch
+Patch210: gcc-amdfam10-suse-11.patch
+Patch211: gcc-amdfam10-suse-12.patch
+Patch212: gcc-amdfam10-suse-20.patch
+# POWER6 support
+Patch300: gcc-4.1.0-power6.diff
%if "%{gcc_target_arch}" == "avr"
# Joerg Wunsch places his cross-avr-gcc-patches there:
# http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/avr-gcc/files
@@ -167,7 +187,10 @@
%patch97
%patch98 -p1
%patch100
+%patch101
%patch105
+%patch106
+%patch107
%patch51
%patch55
%patch57
@@ -189,6 +212,21 @@
%patch69
%patch70
%patch72
+%patch73
+%patch200
+%patch201
+%patch202
+%patch203
+%patch204
+%patch205
+%patch206
+%patch207
+%patch208
+%patch209
+%patch210
+%patch211
+%patch212
+%patch300
%if "%{gcc_target_arch}" == "avr"
%patch1000
%patch1001
@@ -207,7 +245,7 @@
mkdir obj-%{GCCDIST}
cd obj-%{GCCDIST}
RPM_OPT_FLAGS="$RPM_OPT_FLAGS -U_FORTIFY_SOURCE"
-RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g'`
+RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g' -e 's/-fstack-protector//g' -e 's/-ffortify=.//g'`
%ifarch %ix86
# -mcpu is superceded by -mtune but -mtune is not supported by
# our bootstrap compiler. -mcpu gives a warning that stops
@@ -278,6 +316,9 @@
# --with-sysroot=/
CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" XCFLAGS="$RPM_OPT_FLAGS" \
TCFLAGS="$RPM_OPT_FLAGS" GCJFLAGS="$RPM_OPT_FLAGS" \
+%if %{build_ada}
+CC="gcc%{binsuffix}" GNATBIND="gnatbind%{binsuffix}" \
+%endif
../configure \
%if 0%{?gcc_target_arch:1} && 0%{!?gcc_icecream:1}
%else
@@ -393,9 +434,20 @@
/usr/share/icecream-envs
%changelog -n cross-alpha-gcc-icecream-backend
+* Tue Feb 06 2007 - jw@suse.de
+- typo in binutils check fixed.
+* Tue Feb 06 2007 - rguenther@suse.de
+- Add patch for PR20218, visibility fixes.
+- Add patch for wrong parentheses warnings.
+- Filter -ffortify and -fstack-protector from RPM_OPT_FLAGS.
* Mon Jan 29 2007 - jw@suse.de
- cross-avr now checks the binutils changelog rather than an
exact version match.
+* Mon Jan 22 2007 - rguenther@suse.de
+- Fix building ada with gcc42 in beta.
+- Include ada in testing.
+- Do not package libffi.la.
+- Add patches for AMD Family 10 and Power6 support.
* Mon Jan 15 2007 - rguenther@suse.de
- Update to SVN branch head (r120791).
* Mon Jan 08 2007 - schwab@suse.de
cross-arm-gcc-icecream-backend.spec: same change
++++++ cross-avr-gcc.spec ++++++
--- /var/tmp/diff_new_pack.ch3703/_old 2007-02-08 22:57:00.000000000 +0100
+++ /var/tmp/diff_new_pack.ch3703/_new 2007-02-08 22:57:00.000000000 +0100
@@ -27,7 +27,7 @@
URL: http://gcc.gnu.org/
License: BSD License and BSD-like, GNU General Public License (GPL)
Version: 4.1.2_20070115
-Release: 3
+Release: 5
%define gcc_version %(echo %version | sed 's/_.*//')
%define snapshot_date %(echo %version | sed 's/[34]\.[0-4]\.[0-6]//' | sed 's/_/-/')
%define binsuffix -4.1
@@ -69,7 +69,10 @@
Patch97: nov189571-2.diff
Patch98: nvl199137.patch
Patch100: pr28755.patch
+Patch101: pr30113.patch
Patch105: pr29166.diff
+Patch106: fix-ada-build.diff
+Patch107: pr20218.patch
# A set of patches from the RH srpm
Patch51: gcc41-ia64-stack-protector.patch
Patch55: gcc41-java-slow_pthread_self.patch
@@ -91,6 +94,23 @@
Patch69: gcc41-ssse3.patch
Patch70: pr27880.diff
Patch72: Wprofile-mismatch.patch
+Patch73: parentheses-mathematical-5.diff
+# Greyhound (aka AMD Family 10h) support
+Patch200: gcc-amdfam10-suse-1.patch
+Patch201: gcc-amdfam10-suse-2.patch
+Patch202: gcc-amdfam10-suse-3.patch
+Patch203: gcc-amdfam10-suse-4.patch
+Patch204: gcc-amdfam10-suse-5.patch
+Patch205: gcc-amdfam10-suse-6.patch
+Patch206: gcc-amdfam10-suse-7.patch
+Patch207: gcc-amdfam10-suse-8.patch
+Patch208: gcc-amdfam10-suse-9.patch
+Patch209: gcc-amdfam10-suse-10.patch
+Patch210: gcc-amdfam10-suse-11.patch
+Patch211: gcc-amdfam10-suse-12.patch
+Patch212: gcc-amdfam10-suse-20.patch
+# POWER6 support
+Patch300: gcc-4.1.0-power6.diff
%if "%{gcc_target_arch}" == "avr"
# Joerg Wunsch places his cross-avr-gcc-patches there:
# http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/avr-gcc/files
@@ -161,7 +181,10 @@
%patch97
%patch98 -p1
%patch100
+%patch101
%patch105
+%patch106
+%patch107
%patch51
%patch55
%patch57
@@ -183,6 +206,21 @@
%patch69
%patch70
%patch72
+%patch73
+%patch200
+%patch201
+%patch202
+%patch203
+%patch204
+%patch205
+%patch206
+%patch207
+%patch208
+%patch209
+%patch210
+%patch211
+%patch212
+%patch300
%if "%{gcc_target_arch}" == "avr"
%patch1000
%patch1001
@@ -201,7 +239,7 @@
mkdir obj-%{GCCDIST}
cd obj-%{GCCDIST}
RPM_OPT_FLAGS="$RPM_OPT_FLAGS -U_FORTIFY_SOURCE"
-RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g'`
+RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g' -e 's/-fstack-protector//g' -e 's/-ffortify=.//g'`
%ifarch %ix86
# -mcpu is superceded by -mtune but -mtune is not supported by
# our bootstrap compiler. -mcpu gives a warning that stops
@@ -272,6 +310,9 @@
# --with-sysroot=/
CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" XCFLAGS="$RPM_OPT_FLAGS" \
TCFLAGS="$RPM_OPT_FLAGS" GCJFLAGS="$RPM_OPT_FLAGS" \
+%if %{build_ada}
+CC="gcc%{binsuffix}" GNATBIND="gnatbind%{binsuffix}" \
+%endif
../configure \
%if 0%{?gcc_target_arch:1} && 0%{!?gcc_icecream:1}
%else
@@ -336,10 +377,10 @@
%endif
--host=%{GCCDIST}
# COMMON-END
-if [ -z "`rpm -q --changelog binutils | grep 'Update to binutils 2.17.50.0.8.'`" ]; then
+if [ -z "`rpm -q --changelog binutils | grep 'Update to binutils-2.17.50.0.8.'`" ]; then
set +x
echo "Our gcc-4.1.2-20061129 (or later) needs support for 'ldi r30,lo8(gs(1f))'"
- echo "Please update to binutils 2.17.50.0.8 or later!"
+ echo "Please update to binutils-2.17.50.0.8 or later!"
rpm -q binutils
exit 1;
fi
@@ -363,9 +404,20 @@
%{_prefix}
%changelog -n cross-avr-gcc
+* Tue Feb 06 2007 - jw@suse.de
+- typo in binutils check fixed.
+* Tue Feb 06 2007 - rguenther@suse.de
+- Add patch for PR20218, visibility fixes.
+- Add patch for wrong parentheses warnings.
+- Filter -ffortify and -fstack-protector from RPM_OPT_FLAGS.
* Mon Jan 29 2007 - jw@suse.de
- cross-avr now checks the binutils changelog rather than an
exact version match.
+* Mon Jan 22 2007 - rguenther@suse.de
+- Fix building ada with gcc42 in beta.
+- Include ada in testing.
+- Do not package libffi.la.
+- Add patches for AMD Family 10 and Power6 support.
* Mon Jan 15 2007 - rguenther@suse.de
- Update to SVN branch head (r120791).
* Mon Jan 08 2007 - schwab@suse.de
++++++ cross-hppa-gcc-icecream-backend.spec ++++++
--- /var/tmp/diff_new_pack.ch3703/_old 2007-02-08 22:57:00.000000000 +0100
+++ /var/tmp/diff_new_pack.ch3703/_new 2007-02-08 22:57:00.000000000 +0100
@@ -30,7 +30,7 @@
URL: http://gcc.gnu.org/
License: BSD License and BSD-like, GNU General Public License (GPL)
Version: 4.1.2_20070115
-Release: 2
+Release: 4
%define gcc_version %(echo %version | sed 's/_.*//')
%define snapshot_date %(echo %version | sed 's/[34]\.[0-4]\.[0-6]//' | sed 's/_/-/')
%define binsuffix -4.1
@@ -72,7 +72,10 @@
Patch97: nov189571-2.diff
Patch98: nvl199137.patch
Patch100: pr28755.patch
+Patch101: pr30113.patch
Patch105: pr29166.diff
+Patch106: fix-ada-build.diff
+Patch107: pr20218.patch
# A set of patches from the RH srpm
Patch51: gcc41-ia64-stack-protector.patch
Patch55: gcc41-java-slow_pthread_self.patch
@@ -94,6 +97,23 @@
Patch69: gcc41-ssse3.patch
Patch70: pr27880.diff
Patch72: Wprofile-mismatch.patch
+Patch73: parentheses-mathematical-5.diff
+# Greyhound (aka AMD Family 10h) support
+Patch200: gcc-amdfam10-suse-1.patch
+Patch201: gcc-amdfam10-suse-2.patch
+Patch202: gcc-amdfam10-suse-3.patch
+Patch203: gcc-amdfam10-suse-4.patch
+Patch204: gcc-amdfam10-suse-5.patch
+Patch205: gcc-amdfam10-suse-6.patch
+Patch206: gcc-amdfam10-suse-7.patch
+Patch207: gcc-amdfam10-suse-8.patch
+Patch208: gcc-amdfam10-suse-9.patch
+Patch209: gcc-amdfam10-suse-10.patch
+Patch210: gcc-amdfam10-suse-11.patch
+Patch211: gcc-amdfam10-suse-12.patch
+Patch212: gcc-amdfam10-suse-20.patch
+# POWER6 support
+Patch300: gcc-4.1.0-power6.diff
%if "%{gcc_target_arch}" == "avr"
# Joerg Wunsch places his cross-avr-gcc-patches there:
# http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/avr-gcc/files
@@ -167,7 +187,10 @@
%patch97
%patch98 -p1
%patch100
+%patch101
%patch105
+%patch106
+%patch107
%patch51
%patch55
%patch57
@@ -189,6 +212,21 @@
%patch69
%patch70
%patch72
+%patch73
+%patch200
+%patch201
+%patch202
+%patch203
+%patch204
+%patch205
+%patch206
+%patch207
+%patch208
+%patch209
+%patch210
+%patch211
+%patch212
+%patch300
%if "%{gcc_target_arch}" == "avr"
%patch1000
%patch1001
@@ -207,7 +245,7 @@
mkdir obj-%{GCCDIST}
cd obj-%{GCCDIST}
RPM_OPT_FLAGS="$RPM_OPT_FLAGS -U_FORTIFY_SOURCE"
-RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g'`
+RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g' -e 's/-fstack-protector//g' -e 's/-ffortify=.//g'`
%ifarch %ix86
# -mcpu is superceded by -mtune but -mtune is not supported by
# our bootstrap compiler. -mcpu gives a warning that stops
@@ -278,6 +316,9 @@
# --with-sysroot=/
CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" XCFLAGS="$RPM_OPT_FLAGS" \
TCFLAGS="$RPM_OPT_FLAGS" GCJFLAGS="$RPM_OPT_FLAGS" \
+%if %{build_ada}
+CC="gcc%{binsuffix}" GNATBIND="gnatbind%{binsuffix}" \
+%endif
../configure \
%if 0%{?gcc_target_arch:1} && 0%{!?gcc_icecream:1}
%else
@@ -393,9 +434,20 @@
/usr/share/icecream-envs
%changelog -n cross-hppa-gcc-icecream-backend
+* Tue Feb 06 2007 - jw@suse.de
+- typo in binutils check fixed.
+* Tue Feb 06 2007 - rguenther@suse.de
+- Add patch for PR20218, visibility fixes.
+- Add patch for wrong parentheses warnings.
+- Filter -ffortify and -fstack-protector from RPM_OPT_FLAGS.
* Mon Jan 29 2007 - jw@suse.de
- cross-avr now checks the binutils changelog rather than an
exact version match.
+* Mon Jan 22 2007 - rguenther@suse.de
+- Fix building ada with gcc42 in beta.
+- Include ada in testing.
+- Do not package libffi.la.
+- Add patches for AMD Family 10 and Power6 support.
* Mon Jan 15 2007 - rguenther@suse.de
- Update to SVN branch head (r120791).
* Mon Jan 08 2007 - schwab@suse.de
cross-i386-gcc-icecream-backend.spec: same change
cross-ia64-gcc-icecream-backend.spec: same change
cross-ppc64-gcc-icecream-backend.spec: same change
cross-ppc-gcc-icecream-backend.spec: same change
cross-s390-gcc-icecream-backend.spec: same change
cross-s390x-gcc-icecream-backend.spec: same change
cross-x86_64-gcc-icecream-backend.spec: same change
++++++ gcc41.spec ++++++
--- /var/tmp/diff_new_pack.ch3703/_old 2007-02-08 22:57:00.000000000 +0100
+++ /var/tmp/diff_new_pack.ch3703/_new 2007-02-08 22:57:00.000000000 +0100
@@ -19,7 +19,7 @@
# on those that work
# Note that AdaCore only supports %ix86, x86_64 and ia64
%ifarch %ix86 x86_64 ia64 hppa ppc s390
-%define build_ada 0%{!?building_libjava:1}
+%define build_ada 1
%else
# alpha ppc64 s390x
%define build_ada 0
@@ -44,7 +44,7 @@
%endif
BuildRequires: bison flex gettext-devel glibc-devel-32bit texinfo
%if %{build_ada}
-BuildRequires: gcc-ada
+BuildRequires: gcc-ada gcc41-ada
%endif
%if %{build_libjava}
BuildRequires: gtk2-devel libart_lgpl-devel update-desktop-files
@@ -111,7 +111,7 @@
URL: http://gcc.gnu.org/
License: GNU General Public License (GPL)
Version: 4.1.2_20070115
-Release: 3
+Release: 5
%define gcc_version %(echo %version | sed 's/_.*//')
%define snapshot_date %(echo %version | sed 's/[34]\.[0-4]\.[0-6]//' | sed 's/_/-/')
%define binsuffix -4.1
@@ -160,7 +160,10 @@
Patch97: nov189571-2.diff
Patch98: nvl199137.patch
Patch100: pr28755.patch
+Patch101: pr30113.patch
Patch105: pr29166.diff
+Patch106: fix-ada-build.diff
+Patch107: pr20218.patch
# A set of patches from the RH srpm
Patch51: gcc41-ia64-stack-protector.patch
Patch55: gcc41-java-slow_pthread_self.patch
@@ -182,6 +185,23 @@
Patch69: gcc41-ssse3.patch
Patch70: pr27880.diff
Patch72: Wprofile-mismatch.patch
+Patch73: parentheses-mathematical-5.diff
+# Greyhound (aka AMD Family 10h) support
+Patch200: gcc-amdfam10-suse-1.patch
+Patch201: gcc-amdfam10-suse-2.patch
+Patch202: gcc-amdfam10-suse-3.patch
+Patch203: gcc-amdfam10-suse-4.patch
+Patch204: gcc-amdfam10-suse-5.patch
+Patch205: gcc-amdfam10-suse-6.patch
+Patch206: gcc-amdfam10-suse-7.patch
+Patch207: gcc-amdfam10-suse-8.patch
+Patch208: gcc-amdfam10-suse-9.patch
+Patch209: gcc-amdfam10-suse-10.patch
+Patch210: gcc-amdfam10-suse-11.patch
+Patch211: gcc-amdfam10-suse-12.patch
+Patch212: gcc-amdfam10-suse-20.patch
+# POWER6 support
+Patch300: gcc-4.1.0-power6.diff
%if "%{gcc_target_arch}" == "avr"
# Joerg Wunsch places his cross-avr-gcc-patches there:
# http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/avr-gcc/files
@@ -975,7 +995,10 @@
%patch97
%patch98 -p1
%patch100
+%patch101
%patch105
+%patch106
+%patch107
%patch51
%patch55
%patch57
@@ -997,6 +1020,21 @@
%patch69
%patch70
%patch72
+%patch73
+%patch200
+%patch201
+%patch202
+%patch203
+%patch204
+%patch205
+%patch206
+%patch207
+%patch208
+%patch209
+%patch210
+%patch211
+%patch212
+%patch300
%if "%{gcc_target_arch}" == "avr"
%patch1000
%patch1001
@@ -1015,7 +1053,7 @@
mkdir obj-%{GCCDIST}
cd obj-%{GCCDIST}
RPM_OPT_FLAGS="$RPM_OPT_FLAGS -U_FORTIFY_SOURCE"
-RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g'`
+RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g' -e 's/-fstack-protector//g' -e 's/-ffortify=.//g'`
%ifarch %ix86
# -mcpu is superceded by -mtune but -mtune is not supported by
# our bootstrap compiler. -mcpu gives a warning that stops
@@ -1086,6 +1124,9 @@
# --with-sysroot=/
CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" XCFLAGS="$RPM_OPT_FLAGS" \
TCFLAGS="$RPM_OPT_FLAGS" GCJFLAGS="$RPM_OPT_FLAGS" \
+%if %{build_ada}
+CC="gcc%{binsuffix}" GNATBIND="gnatbind%{binsuffix}" \
+%endif
../configure \
%if 0%{?gcc_target_arch:1} && 0%{!?gcc_icecream:1}
%else
@@ -1194,6 +1235,14 @@
%endif
%endif
done
+rm -f $RPM_BUILD_ROOT%{_libdir}/libffi.la
+%if %{biarch}
+%if %{build_primary_64bit}
+ rm -f $RPM_BUILD_ROOT%{_prefix}/lib/libffi.la
+%else
+ rm -f $RPM_BUILD_ROOT%{_prefix}/lib64/libffi.la
+%endif
+%endif
%if %{biarch}
%if %{build_primary_64bit}
mkdir -p $RPM_BUILD_ROOT%{_prefix}/lib
@@ -1302,13 +1351,13 @@
ln -sf /lib64/libgcc_s.so.%{libgcc_s} $RPM_BUILD_ROOT%{versmainlibdirbi64}/libgcc_s_64.so
chmod a+x $RPM_BUILD_ROOT/lib64/libgcc_s.so.%{libgcc_s}
%endif
-# LIBJAVA-DELETE-END
%endif
%if %{build_ada}
mv $RPM_BUILD_ROOT%{libsubdir}/adalib/lib*-*.so $RPM_BUILD_ROOT%{_libdir}
ln -sf %{_libdir}/$(cd $RPM_BUILD_ROOT%{_libdir} && echo libgnarl-*.so) $RPM_BUILD_ROOT%{libsubdir}/adalib/libgnarl.so
ln -sf %{_libdir}/$(cd $RPM_BUILD_ROOT%{_libdir} && echo libgnat-*.so) $RPM_BUILD_ROOT%{libsubdir}/adalib/libgnat.so
chmod a+x $RPM_BUILD_ROOT%{_libdir}/libgna*-*.so
+# LIBJAVA-DELETE-END
%endif
#ln -sf gcc%{binsuffix} $RPM_BUILD_ROOT%{_prefix}/bin/cc%{binsuffix}
#ln -sf g++%{binsuffix} $RPM_BUILD_ROOT%{_prefix}/bin/c++%{binsuffix}
@@ -1554,6 +1603,7 @@
%ifarch %ix86 x86_64
%{libsubdir}/include/mm3dnow.h
%{libsubdir}/include/mmintrin.h
+%{libsubdir}/include/ammintrin.h
%{libsubdir}/include/emmintrin.h
%{libsubdir}/include/pmmintrin.h
%{libsubdir}/include/xmmintrin.h
@@ -1918,6 +1968,8 @@
%files -n libgcj41-devel
%defattr(-,root,root)
%dir %{_prefix}/include/c++
+%dir %{libsubdir}
+%dir %{libsubdir}/include
%dir %{gxxinclude}
%{libsubdir}/include/jawt.h
%{libsubdir}/include/jawt_md.h
@@ -1982,8 +2034,6 @@
%mainlib libffi.so
%mainlib libffi.a
-
-%mainlib libffi.la
%if %{separate_biarch}
%files -n libffi41-devel%{separate_biarch_suffix}
@@ -1992,8 +2042,6 @@
%biarchlib libffi.so
%biarchlib libffi.a
-
-%biarchlib libffi.la
%endif
%endif
%endif
@@ -2006,9 +2054,20 @@
%endif
%changelog -n gcc41
+* Tue Feb 06 2007 - jw@suse.de
+- typo in binutils check fixed.
+* Tue Feb 06 2007 - rguenther@suse.de
+- Add patch for PR20218, visibility fixes.
+- Add patch for wrong parentheses warnings.
+- Filter -ffortify and -fstack-protector from RPM_OPT_FLAGS.
* Mon Jan 29 2007 - jw@suse.de
- cross-avr now checks the binutils changelog rather than an
exact version match.
+* Mon Jan 22 2007 - rguenther@suse.de
+- Fix building ada with gcc42 in beta.
+- Include ada in testing.
+- Do not package libffi.la.
+- Add patches for AMD Family 10 and Power6 support.
* Mon Jan 15 2007 - rguenther@suse.de
- Update to SVN branch head (r120791).
* Mon Jan 08 2007 - schwab@suse.de
++++++ libgcj41.spec ++++++
--- /var/tmp/diff_new_pack.ch3703/_old 2007-02-08 22:57:00.000000000 +0100
+++ /var/tmp/diff_new_pack.ch3703/_new 2007-02-08 22:57:00.000000000 +0100
@@ -33,7 +33,7 @@
# on those that work
# Note that AdaCore only supports %ix86, x86_64 and ia64
%ifarch %ix86 x86_64 ia64 hppa ppc s390
-%define build_ada 0%{!?building_libjava:1}
+%define build_ada 1
%else
# alpha ppc64 s390x
%define build_ada 0
@@ -58,7 +58,7 @@
%endif
BuildRequires: bison flex gettext-devel glibc-devel-32bit texinfo
%if %{build_ada}
-BuildRequires: gcc-ada
+BuildRequires: gcc-ada gcc41-ada
%endif
%if %{build_libjava}
BuildRequires: gtk2-devel libart_lgpl-devel update-desktop-files
@@ -125,7 +125,7 @@
Url: http://gcc.gnu.org/
License: GNU General Public License (GPL), GNU Library General Public License v. 2.0 and 2.1 (LGPL)
Version: 4.1.2_20070115
-Release: 3
+Release: 5
%define gcc_version %(echo %version | sed 's/_.*//')
%define snapshot_date %(echo %version | sed 's/[34]\.[0-4]\.[0-6]//' | sed 's/_/-/')
%define binsuffix -4.1
@@ -174,7 +174,10 @@
Patch97: nov189571-2.diff
Patch98: nvl199137.patch
Patch100: pr28755.patch
+Patch101: pr30113.patch
Patch105: pr29166.diff
+Patch106: fix-ada-build.diff
+Patch107: pr20218.patch
# A set of patches from the RH srpm
Patch51: gcc41-ia64-stack-protector.patch
Patch55: gcc41-java-slow_pthread_self.patch
@@ -196,6 +199,23 @@
Patch69: gcc41-ssse3.patch
Patch70: pr27880.diff
Patch72: Wprofile-mismatch.patch
+Patch73: parentheses-mathematical-5.diff
+# Greyhound (aka AMD Family 10h) support
+Patch200: gcc-amdfam10-suse-1.patch
+Patch201: gcc-amdfam10-suse-2.patch
+Patch202: gcc-amdfam10-suse-3.patch
+Patch203: gcc-amdfam10-suse-4.patch
+Patch204: gcc-amdfam10-suse-5.patch
+Patch205: gcc-amdfam10-suse-6.patch
+Patch206: gcc-amdfam10-suse-7.patch
+Patch207: gcc-amdfam10-suse-8.patch
+Patch208: gcc-amdfam10-suse-9.patch
+Patch209: gcc-amdfam10-suse-10.patch
+Patch210: gcc-amdfam10-suse-11.patch
+Patch211: gcc-amdfam10-suse-12.patch
+Patch212: gcc-amdfam10-suse-20.patch
+# POWER6 support
+Patch300: gcc-4.1.0-power6.diff
%if "%{gcc_target_arch}" == "avr"
# Joerg Wunsch places his cross-avr-gcc-patches there:
# http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/avr-gcc/files
@@ -504,7 +524,10 @@
%patch97
%patch98 -p1
%patch100
+%patch101
%patch105
+%patch106
+%patch107
%patch51
%patch55
%patch57
@@ -526,6 +549,21 @@
%patch69
%patch70
%patch72
+%patch73
+%patch200
+%patch201
+%patch202
+%patch203
+%patch204
+%patch205
+%patch206
+%patch207
+%patch208
+%patch209
+%patch210
+%patch211
+%patch212
+%patch300
%if "%{gcc_target_arch}" == "avr"
%patch1000
%patch1001
@@ -544,7 +582,7 @@
mkdir obj-%{GCCDIST}
cd obj-%{GCCDIST}
RPM_OPT_FLAGS="$RPM_OPT_FLAGS -U_FORTIFY_SOURCE"
-RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g'`
+RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g' -e 's/-fstack-protector//g' -e 's/-ffortify=.//g'`
%ifarch %ix86
# -mcpu is superceded by -mtune but -mtune is not supported by
# our bootstrap compiler. -mcpu gives a warning that stops
@@ -615,6 +653,9 @@
# --with-sysroot=/
CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" XCFLAGS="$RPM_OPT_FLAGS" \
TCFLAGS="$RPM_OPT_FLAGS" GCJFLAGS="$RPM_OPT_FLAGS" \
+%if %{build_ada}
+CC="gcc%{binsuffix}" GNATBIND="gnatbind%{binsuffix}" \
+%endif
../configure \
%if 0%{?gcc_target_arch:1} && 0%{!?gcc_icecream:1}
%else
@@ -723,6 +764,14 @@
%endif
%endif
done
+rm -f $RPM_BUILD_ROOT%{_libdir}/libffi.la
+%if %{biarch}
+%if %{build_primary_64bit}
+ rm -f $RPM_BUILD_ROOT%{_prefix}/lib/libffi.la
+%else
+ rm -f $RPM_BUILD_ROOT%{_prefix}/lib64/libffi.la
+%endif
+%endif
%if %{biarch}
%if %{build_primary_64bit}
mkdir -p $RPM_BUILD_ROOT%{_prefix}/lib
@@ -749,12 +798,6 @@
%endif
done
%endif
-%if %{build_ada}
-mv $RPM_BUILD_ROOT%{libsubdir}/adalib/lib*-*.so $RPM_BUILD_ROOT%{_libdir}
-ln -sf %{_libdir}/$(cd $RPM_BUILD_ROOT%{_libdir} && echo libgnarl-*.so) $RPM_BUILD_ROOT%{libsubdir}/adalib/libgnarl.so
-ln -sf %{_libdir}/$(cd $RPM_BUILD_ROOT%{_libdir} && echo libgnat-*.so) $RPM_BUILD_ROOT%{libsubdir}/adalib/libgnat.so
-chmod a+x $RPM_BUILD_ROOT%{_libdir}/libgna*-*.so
-%endif
#ln -sf gcc%{binsuffix} $RPM_BUILD_ROOT%{_prefix}/bin/cc%{binsuffix}
#ln -sf g++%{binsuffix} $RPM_BUILD_ROOT%{_prefix}/bin/c++%{binsuffix}
rm -f $RPM_BUILD_ROOT%{_prefix}/bin/c++%{binsuffix}
@@ -961,6 +1004,8 @@
%files -n libgcj41-devel
%defattr(-,root,root)
%dir %{_prefix}/include/c++
+%dir %{libsubdir}
+%dir %{libsubdir}/include
%dir %{gxxinclude}
%{libsubdir}/include/jawt.h
%{libsubdir}/include/jawt_md.h
@@ -1025,8 +1070,6 @@
%mainlib libffi.so
%mainlib libffi.a
-
-%mainlib libffi.la
%if %{separate_biarch}
%files -n libffi41-devel%{separate_biarch_suffix}
@@ -1035,8 +1078,6 @@
%biarchlib libffi.so
%biarchlib libffi.a
-
-%biarchlib libffi.la
%endif
%endif
%endif
@@ -1049,9 +1090,20 @@
%endif
%changelog -n libgcj41
+* Tue Feb 06 2007 - jw@suse.de
+- typo in binutils check fixed.
+* Tue Feb 06 2007 - rguenther@suse.de
+- Add patch for PR20218, visibility fixes.
+- Add patch for wrong parentheses warnings.
+- Filter -ffortify and -fstack-protector from RPM_OPT_FLAGS.
* Mon Jan 29 2007 - jw@suse.de
- cross-avr now checks the binutils changelog rather than an
exact version match.
+* Mon Jan 22 2007 - rguenther@suse.de
+- Fix building ada with gcc42 in beta.
+- Include ada in testing.
+- Do not package libffi.la.
+- Add patches for AMD Family 10 and Power6 support.
* Mon Jan 15 2007 - rguenther@suse.de
- Update to SVN branch head (r120791).
* Mon Jan 08 2007 - schwab@suse.de
++++++ change_spec ++++++
--- gcc41/change_spec 2006-11-17 10:25:23.000000000 +0100
+++ /mounts/work_src_done/STABLE/gcc41/change_spec 2007-02-07 10:23:26.004991000 +0100
@@ -93,6 +93,15 @@
}' >> libgcj$base_ver.spec
fi
+ ## non-icecream cross compilers.
+ export outfile
+ for arch in avr; do
+ cross_arch=$arch sh micro-cross.spec.in > cross-$arch-gcc.spec
+ test -f gcc*.changes && ln -f gcc*.changes cross-$arch-gcc.changes
+ done
+
+# exit for beta gcc to avoid generating icecream cross specfiles there
+echo $PWD | grep -i 'beta' && exit 0
if [ $GCC_FOR_OPT != yes ]; then
# disabled: mips, sparc
@@ -104,13 +113,6 @@
test -f gcc*.changes && ln -f gcc*.changes cross-$arch-gcc-icecream-backend.changes
done
- ## non-icecream cross compilers.
- export outfile
- for arch in avr; do
- cross_arch=$arch sh micro-cross.spec.in > cross-$arch-gcc.spec
- test -f gcc*.changes && ln -f gcc*.changes cross-$arch-gcc.changes
- done
-
test -f gcc$base_ver.changes && ln -f gcc$base_ver.changes libgcj$base_ver.changes
fi
exit 0
++++++ fix-ada-build.diff ++++++
Index: Makefile.in
===================================================================
*** Makefile.in (revision 121051)
--- Makefile.in (working copy)
*************** M4 = @M4@
*** 261,266 ****
--- 261,267 ----
MAKEINFO = @MAKEINFO@
EXPECT = @EXPECT@
RUNTEST = @RUNTEST@
+ GNATBIND = @GNATBIND@
# This just becomes part of the MAKEINFO definition passed down to
# sub-makes. It lets flags be given on the command line while still
*************** EXTRA_GCC_FLAGS = \
*** 525,530 ****
--- 526,532 ----
'BUILD_PREFIX=$(BUILD_PREFIX)' \
'BUILD_PREFIX_1=$(BUILD_PREFIX_1)' \
"GCC_FOR_TARGET=$(GCC_FOR_TARGET)" \
+ "GNATBIND=$(GNATBIND)" \
"`echo 'LANGUAGES=$(LANGUAGES)' | sed -e s'/[^=][^=]*=$$/XFOO=/'`" \
"`echo 'STMP_FIXPROTO=$(STMP_FIXPROTO)' | sed -e s'/[^=][^=]*=$$/XFOO=/'`" \
"`echo 'LIMITS_H_TEST=$(LIMITS_H_TEST)' | sed -e s'/[^=][^=]*=$$/XFOO=/'`" \
Index: Makefile.tpl
===================================================================
*** Makefile.tpl (revision 121051)
--- Makefile.tpl (working copy)
*************** M4 = @M4@
*** 264,269 ****
--- 264,270 ----
MAKEINFO = @MAKEINFO@
EXPECT = @EXPECT@
RUNTEST = @RUNTEST@
+ GNATBIND = @GNATBIND@
# This just becomes part of the MAKEINFO definition passed down to
# sub-makes. It lets flags be given on the command line while still
*************** EXTRA_GCC_FLAGS = \
*** 457,462 ****
--- 458,464 ----
'BUILD_PREFIX=$(BUILD_PREFIX)' \
'BUILD_PREFIX_1=$(BUILD_PREFIX_1)' \
"GCC_FOR_TARGET=$(GCC_FOR_TARGET)" \
+ "GNATBIND=$(GNATBIND)" \
"`echo 'LANGUAGES=$(LANGUAGES)' | sed -e s'/[^=][^=]*=$$/XFOO=/'`" \
"`echo 'STMP_FIXPROTO=$(STMP_FIXPROTO)' | sed -e s'/[^=][^=]*=$$/XFOO=/'`" \
"`echo 'LIMITS_H_TEST=$(LIMITS_H_TEST)' | sed -e s'/[^=][^=]*=$$/XFOO=/'`" \
Index: gcc/Makefile.in
===================================================================
*** gcc/Makefile.in (revision 121051)
--- gcc/Makefile.in (working copy)
*************** stage1_copy: stage1_build
*** 4072,4077 ****
--- 4072,4078 ----
stage2_build: stage1_copy
$(MAKE) CC="$(STAGE_CC_WRAPPER) stage1/xgcc$(exeext) -Bstage1/ -B$(build_tooldir)/bin/" CC_FOR_BUILD="$(STAGE_CC_WRAPPER) stage1/xgcc$(exeext) -Bstage1/ -B$(build_tooldir)/bin/" \
STAGE_PREFIX=stage1/ \
+ GNATBIND=stage1/gnatbind \
$(POSTSTAGE1_FLAGS_TO_PASS) \
$(STAGE2_FLAGS_TO_PASS)
$(STAMP) stage2_build
*************** stage2_copy: stage2_build
*** 4085,4090 ****
--- 4086,4092 ----
stageprofile_build: stage1_copy
$(MAKE) CC="$(STAGE_CC_WRAPPER) stage1/xgcc$(exeext) -Bstage1/ -B$(build_tooldir)/bin/" CC_FOR_BUILD="$(STAGE_CC_WRAPPER) stage1/xgcc$(exeext) -Bstage1/ -B$(build_tooldir)/bin/" \
STAGE_PREFIX=stage1/ \
+ GNATBIND=stage1/gnatbind \
$(POSTSTAGE1_FLAGS_TO_PASS) \
$(STAGEPROFILE_FLAGS_TO_PASS)
$(STAMP) stageprofile_build
*************** stageprofile_copy: stageprofile_build
*** 4098,4103 ****
--- 4100,4106 ----
stage3_build: stage2_copy
$(MAKE) CC="$(STAGE_CC_WRAPPER) stage2/xgcc$(exeext) -Bstage2/ -B$(build_tooldir)/bin/" CC_FOR_BUILD="$(STAGE_CC_WRAPPER) stage2/xgcc$(exeext) -Bstage2/ -B$(build_tooldir)/bin/" \
STAGE_PREFIX=stage2/ \
+ GNATBIND=stage2/gnatbind \
$(POSTSTAGE1_FLAGS_TO_PASS) \
$(STAGE2_FLAGS_TO_PASS)
$(STAMP) stage3_build
*************** stage3_build: stage2_copy
*** 4106,4111 ****
--- 4109,4115 ----
stagefeedback_build: stageprofile_copy stage1_copy
$(MAKE) CC="$(STAGE_CC_WRAPPER) stage1/xgcc$(exeext) -Bstage1/ -B$(build_tooldir)/bin/" CC_FOR_BUILD="$(STAGE_CC_WRAPPER) stage1/xgcc$(exeext) -Bstage1/ -B$(build_tooldir)/bin/" \
STAGE_PREFIX=stage1/ \
+ GNATBIND=stage1/gnatbind \
$(POSTSTAGE1_FLAGS_TO_PASS) \
$(STAGEFEEDBACK_FLAGS_TO_PASS)
$(STAMP) stagefeedback_build
*************** stage3_copy: stage3_build
*** 4125,4130 ****
--- 4129,4135 ----
stage4_build: stage3_copy
$(MAKE) CC="$(STAGE_CC_WRAPPER) stage3/xgcc$(exeext) -Bstage3/ -B$(build_tooldir)/bin/" CC_FOR_BUILD="$(STAGE_CC_WRAPPER) stage3/xgcc$(exeext) -Bstage3/ -B$(build_tooldir)/bin/" \
STAGE_PREFIX=stage3/ \
+ GNATBIND=stage3/gnatbind \
$(POSTSTAGE1_FLAGS_TO_PASS) \
$(STAGE2_FLAGS_TO_PASS)
$(STAMP) stage4_build
Index: gcc/ada/Make-lang.in
===================================================================
*** gcc/ada/Make-lang.in (revision 121051)
--- gcc/ada/Make-lang.in (working copy)
*************** ALL_ADA_CFLAGS = $(X_ADA_CFLAGS) $(T_ADA
*** 54,60 ****
ADA_INCLUDES = -I- -I. -Iada -I$(srcdir)/ada
ADA_INCLUDE_DIR = $(libsubdir)/adainclude
ADA_RTL_OBJ_DIR = $(libsubdir)/adalib
- GNATBIND = $(STAGE_PREFIX)gnatbind
ADA_FLAGS_TO_PASS = \
"ADA_FOR_BUILD=$(ADA_FOR_BUILD)" \
"ADA_INCLUDE_DIR=$(ADA_INCLUDE_DIR)" \
--- 54,59 ----
Index: gcc/ada/Makefile.in
===================================================================
*** gcc/ada/Makefile.in (revision 121051)
--- gcc/ada/Makefile.in (working copy)
*************** PWD_COMMAND = $${PWDCMD-pwd}
*** 114,120 ****
INSTALL_DATA_DATE = cp -p
MAKEINFO = makeinfo
TEXI2DVI = texi2dvi
- GNATBIND = $(STAGE_PREFIX)gnatbind
GNATBIND_FLAGS = -static -x
ADA_CFLAGS =
ADAFLAGS = -W -Wall -gnatpg -gnata
--- 114,119 ----
++++++ gcc-4.1.0-power6.diff ++++++
++++ 3965 lines (skipped)
++++++ gcc-amdfam10-suse-10.patch ++++++
Index: gcc/config/i386/athlon.md
===================================================================
--- gcc/config/i386/athlon.md.orig
+++ gcc/config/i386/athlon.md
@@ -257,34 +257,62 @@
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "none,unknown"))))
"athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_idirect_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct,athlon-ieu")
(define_insn_reservation "athlon_ivector" 2
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "none,unknown"))))
"athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu,athlon-ieu")
+
(define_insn_reservation "athlon_idirect_loadmov" 3
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "imov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load")
+
(define_insn_reservation "athlon_idirect_load" 4
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_idirect_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-ieu")
(define_insn_reservation "athlon_ivector_load" 6
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "load"))))
"athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+
(define_insn_reservation "athlon_idirect_movstore" 1
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "imov")
(eq_attr "memory" "store")))
"athlon-direct,athlon-agu,athlon-store")
+
(define_insn_reservation "athlon_idirect_both" 4
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "direct")
@@ -293,6 +321,15 @@
"athlon-direct,athlon-load,
athlon-ieu,athlon-store,
athlon-store")
+(define_insn_reservation "athlon_idirect_both_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-direct,athlon-load,
+ athlon-ieu,athlon-store,
+ athlon-store")
+
(define_insn_reservation "athlon_ivector_both" 6
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "vector")
@@ -302,6 +339,16 @@
athlon-ieu,
athlon-ieu,
athlon-store")
+(define_insn_reservation "athlon_ivector_both_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-vector,athlon-load,
+ athlon-ieu,
+ athlon-ieu,
+ athlon-store")
+
(define_insn_reservation "athlon_idirect_store" 1
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "direct")
@@ -309,6 +356,14 @@
(eq_attr "memory" "store"))))
"athlon-direct,(athlon-ieu+athlon-agu),
athlon-store")
+(define_insn_reservation "athlon_idirect_store_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-direct,(athlon-ieu+athlon-agu),
+ athlon-store")
+
(define_insn_reservation "athlon_ivector_store" 2
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "vector")
@@ -316,6 +371,13 @@
(eq_attr "memory" "store"))))
"athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
athlon-store")
+(define_insn_reservation "athlon_ivector_store_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+ athlon-store")
;; Athlon floatin point unit
(define_insn_reservation "athlon_fldxf" 12
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -971,6 +971,7 @@
"sahf"
[(set_attr "length" "1")
(set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "SI")])
;; Pentium Pro can do steps 1 through 3 in one go.
@@ -1283,7 +1284,8 @@
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
(define_expand "movhi"
[(set (match_operand:HI 0 "nonimmediate_operand" "")
@@ -1400,8 +1402,10 @@
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
(define_insn "*swaphi_2"
[(set (match_operand:HI 0 "register_operand" "+r")
(match_operand:HI 1 "register_operand" "+r"))
@@ -1574,8 +1578,10 @@
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
(define_insn "*swapqi_2"
[(set (match_operand:QI 0 "register_operand" "+q")
(match_operand:QI 1 "register_operand" "+q"))
@@ -2129,7 +2135,8 @@
[(set_attr "type" "imov")
(set_attr "mode" "DI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
(define_expand "movti"
[(set (match_operand:TI 0 "nonimmediate_operand" "")
@@ -4430,7 +4437,8 @@
[(set_attr "length" "2")
(set_attr "mode" "HI")
(set_attr "unit" "i387")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
;; Conversion between fixed point and floating point.
@@ -6860,6 +6868,14 @@
"TARGET_64BIT"
"")
+;; On AMDFAM10
+;; IMUL reg64, reg64, imm8 Direct
+;; IMUL reg64, mem64, imm8 VectorPath
+;; IMUL reg64, reg64, imm32 Direct
+;; IMUL reg64, mem64, imm32 VectorPath
+;; IMUL reg64, reg64 Direct
+;; IMUL reg64, mem64 Direct
+
(define_insn "*muldi3_1_rex64"
[(set (match_operand:DI 0 "register_operand" "=r,r,r")
(mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6882,6 +6898,11 @@
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "DI")])
(define_expand "mulsi3"
@@ -6892,6 +6913,14 @@
""
"")
+;; On AMDFAM10
+;; IMUL reg32, reg32, imm8 Direct
+;; IMUL reg32, mem32, imm8 VectorPath
+;; IMUL reg32, reg32, imm32 Direct
+;; IMUL reg32, mem32, imm32 VectorPath
+;; IMUL reg32, reg32 Direct
+;; IMUL reg32, mem32 Direct
+
(define_insn "*mulsi3_1"
[(set (match_operand:SI 0 "register_operand" "=r,r,r")
(mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6913,6 +6942,11 @@
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "SI")])
(define_insn "*mulsi3_1_zext"
@@ -6938,6 +6972,11 @@
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "SI")])
(define_expand "mulhi3"
@@ -6948,6 +6987,13 @@
"TARGET_HIMODE_MATH"
"")
+;; On AMDFAM10
+;; IMUL reg16, reg16, imm8 VectorPath
+;; IMUL reg16, mem16, imm8 VectorPath
+;; IMUL reg16, reg16, imm16 VectorPath
+;; IMUL reg16, mem16, imm16 VectorPath
+;; IMUL reg16, reg16 Direct
+;; IMUL reg16, mem16 Direct
(define_insn "*mulhi3_1"
[(set (match_operand:HI 0 "register_operand" "=r,r,r")
(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6966,6 +7012,10 @@
(eq_attr "alternative" "1,2")
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(eq_attr "alternative" "0,1")
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "HI")])
(define_expand "mulqi3"
@@ -6976,6 +7026,10 @@
"TARGET_QIMODE_MATH"
"")
+;;On AMDFAM10
+;; MUL reg8 Direct
+;; MUL mem8 Direct
+
(define_insn "*mulqi3_1"
[(set (match_operand:QI 0 "register_operand" "=a")
(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
@@ -6990,6 +7044,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "umulqihi3"
@@ -7016,6 +7071,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "mulqihi3"
@@ -7040,6 +7096,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "umulditi3"
@@ -7066,6 +7123,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
@@ -7093,6 +7151,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "mulditi3"
@@ -7119,6 +7178,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "mulsidi3"
@@ -7145,6 +7205,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "umuldi3_highpart"
@@ -7181,6 +7242,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "umulsi3_highpart"
@@ -7216,6 +7278,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_insn "*umulsi3_highpart_zext"
@@ -7238,6 +7301,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "smuldi3_highpart"
@@ -7273,6 +7337,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "smulsi3_highpart"
@@ -7328,6 +7393,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
;; The patterns that match these are at the end of this file.
@@ -10309,7 +10375,8 @@
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "x86_64_shift_adj"
[(set (reg:CCZ FLAGS_REG)
@@ -10524,7 +10591,8 @@
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "x86_shift_adj_1"
[(set (reg:CCZ FLAGS_REG)
@@ -11284,7 +11352,8 @@
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "ashrdi3"
[(set (match_operand:DI 0 "shiftdi_operand" "")
@@ -15421,7 +15490,8 @@
sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "fpspc,sse")
(set_attr "mode" "SF,SF")
- (set_attr "athlon_decode" "direct,*")])
+ (set_attr "athlon_decode" "direct,*")
+ (set_attr "amdfam10_decode" "direct,*")])
(define_insn "*sqrtsf2_sse"
[(set (match_operand:SF 0 "register_operand" "=x")
@@ -15430,7 +15500,8 @@
"sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "SF")
- (set_attr "athlon_decode" "*")])
+ (set_attr "athlon_decode" "*")
+ (set_attr "amdfam10_decode" "*")])
(define_insn "*sqrtsf2_i387"
[(set (match_operand:SF 0 "register_operand" "=f")
@@ -15439,7 +15510,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "SF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_expand "sqrtdf2"
[(set (match_operand:DF 0 "register_operand" "")
@@ -15459,7 +15531,8 @@
sqrtsd\t{%1, %0|%0, %1}"
[(set_attr "type" "fpspc,sse")
(set_attr "mode" "DF,DF")
- (set_attr "athlon_decode" "direct,*")])
+ (set_attr "athlon_decode" "direct,*")
+ (set_attr "amdfam10_decode" "direct,*")])
(define_insn "*sqrtdf2_sse"
[(set (match_operand:DF 0 "register_operand" "=Y")
@@ -15468,7 +15541,8 @@
"sqrtsd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "*")])
+ (set_attr "athlon_decode" "*")
+ (set_attr "amdfam10_decode" "*")])
(define_insn "*sqrtdf2_i387"
[(set (match_operand:DF 0 "register_operand" "=f")
@@ -15477,7 +15551,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*sqrtextendsfdf2_i387"
[(set (match_operand:DF 0 "register_operand" "=f")
@@ -15488,7 +15563,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "sqrtxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -15498,7 +15574,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*sqrtextendsfxf2_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -15508,7 +15585,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*sqrtextenddfxf2_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -15518,7 +15596,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "fpremxf4"
[(set (match_operand:XF 0 "register_operand" "=f")
++++++ gcc-amdfam10-suse-11.patch ++++++
Index: gcc/config/i386/athlon.md
===================================================================
--- gcc/config/i386/athlon.md.orig
+++ gcc/config/i386/athlon.md
@@ -387,7 +387,7 @@
(eq_attr "mode" "XF"))))
"athlon-vector,athlon-fpload2,athlon-fvector*9")
(define_insn_reservation "athlon_fldxf_k8" 13
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load")
(eq_attr "mode" "XF"))))
@@ -399,7 +399,7 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fany")
(define_insn_reservation "athlon_fld_k8" 2
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fstore")
@@ -411,7 +411,7 @@
(eq_attr "mode" "XF"))))
"athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
(define_insn_reservation "athlon_fstxf_k8" 8
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "store,both")
(eq_attr "mode" "XF"))))
@@ -422,16 +422,16 @@
(eq_attr "memory" "store,both")))
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fst_k8" 2
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store,both")))
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fist" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fistp,fisttp"))
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fmov" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fmov"))
"athlon-direct,athlon-fpsched,athlon-faddmul")
(define_insn_reservation "athlon_fadd_load" 4
@@ -440,12 +440,12 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fadd_load_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fop")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fadd" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fop"))
"athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_fmul_load" 4
@@ -454,16 +454,16 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_fmul_load_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmul")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fmul")
(define_insn_reservation "athlon_fmul" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fmul"))
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fsgn" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fsgn"))
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fdiv_load" 24
@@ -472,7 +472,7 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_fdiv_load_k8" 13
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fmul")
@@ -481,16 +481,16 @@
(eq_attr "type" "fdiv"))
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fdiv_k8" 11
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(eq_attr "type" "fdiv"))
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fpspc_load" 103
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "fpspc")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fpload,athlon-fvector")
(define_insn_reservation "athlon_fpspc" 100
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fpspc"))
"athlon-vector,athlon-fpsched,athlon-fvector")
(define_insn_reservation "athlon_fcmov_load" 7
@@ -503,12 +503,12 @@
(eq_attr "type" "fcmov"))
"athlon-vector,athlon-fpsched,athlon-fvector")
(define_insn_reservation "athlon_fcmov_load_k8" 17
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fcmov")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fploadk8,athlon-fvector")
(define_insn_reservation "athlon_fcmov_k8" 15
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(eq_attr "type" "fcmov"))
"athlon-vector,athlon-fpsched,athlon-fvector")
;; fcomi is vector decoded by uses only one pipe.
@@ -519,13 +519,13 @@
(eq_attr "memory" "load"))))
"athlon-vector,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fcomi_load_k8" 5
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fcmp")
(and (eq_attr "athlon_decode" "vector")
(eq_attr "memory" "load"))))
"athlon-vector,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fcomi" 3
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "athlon_decode" "vector")
(eq_attr "type" "fcmp")))
"athlon-vector,athlon-fpsched,athlon-fadd")
@@ -535,18 +535,18 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fcom_load_k8" 4
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fcmp")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fcom" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fcmp"))
"athlon-direct,athlon-fpsched,athlon-fadd")
;; Never seen by the scheduler because we still don't do post reg-stack
;; scheduling.
;(define_insn_reservation "athlon_fxch" 2
-; (and (eq_attr "cpu" "athlon,k8,generic64")
+; (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
; (eq_attr "type" "fxch"))
; "athlon-direct,athlon-fpsched,athlon-fany")
++++++ gcc-amdfam10-suse-12.patch ++++++
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -938,6 +938,9 @@ const int x86_cmpxchg = ~m_386;
const int x86_xadd = ~m_386;
const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC | m_AMDFAM10;
+/* Use Vector Converts instead of Scalar Converts. Added for AMDFAM10 */
+const int x86_use_vector_converts = m_AMDFAM10;
+
/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
epilogue code. */
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -168,6 +168,7 @@ extern const int x86_use_incdec;
extern const int x86_pad_returns;
extern const int x86_partial_flag_reg_stall;
extern int x86_prefetch_sse;
+extern const int x86_use_vector_converts;
#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
#define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK)
@@ -217,6 +218,7 @@ extern int x86_prefetch_sse;
#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK)
#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK)
#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
+#define TARGET_USE_VECTOR_CONVERTS (x86_use_vector_converts & TUNEMASK)
#define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
#define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK)
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -162,6 +162,11 @@
(UNSPEC_INSERTQI 133)
(UNSPEC_INSERTQ 134)
+ ; Other AMDFAM10 Patterns
+ (UNSPEC_CVTSI2SS_AMDFAM10 140)
+ (UNSPEC_CVTSI2SD_AMDFAM10 141)
+ (UNSPEC_MOVDSI2SF_AMDFAM10 142)
+ (UNSPEC_MOVDSI2DF_AMDFAM10 143)
])
(define_constants
@@ -4474,7 +4479,46 @@
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
"TARGET_80387 || TARGET_SSE_MATH"
- "")
+ "
+ {
+ /* For converting SI to SF, the following code is faster in AMDFAM10
+ mov mem32, reg32
+ movd xmm, mem32
+ cvtdq2ps xmm,xmm
+ */
+
+ if (TARGET_USE_VECTOR_CONVERTS && !optimize_size
+ && (GET_CODE (operands[1]) != MEM) && TARGET_SSE_MATH
+ && optimize )
+ {
+ rtx tmp;
+ tmp = assign_386_stack_local (SImode, SLOT_TEMP);
+ emit_move_insn (tmp, operands[1]);
+ emit_insn (gen_sse2_movdsi2sf_amdfam10 (operands[0], tmp));
+ emit_insn (gen_sse2_cvtdq2ps_amdfam10 (operands[0], operands[0]));
+ DONE;
+ }
+ }
+ ")
+
+(define_insn "sse2_cvtdq2ps_amdfam10"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
+ UNSPEC_CVTSI2SS_AMDFAM10))]
+ "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+ "cvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_movdsi2sf_amdfam10"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SI 1 "memory_operand" "m")]
+ UNSPEC_MOVDSI2SF_AMDFAM10))]
+ "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+ "movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
(define_insn "*floatsisf2_mixed"
[(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f")
@@ -4589,7 +4633,45 @@
[(set (match_operand:DF 0 "register_operand" "")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
- "")
+ "
+ {
+ /* For converting SI to DF, the following code is faster in AMDFAM10
+ mov mem32, reg32
+ movd xmm, mem32
+ cvtdq2pd xmm,xmm
+ */
+
+ if (TARGET_USE_VECTOR_CONVERTS && !optimize_size
+ && (GET_CODE (operands[1]) != MEM) && TARGET_SSE_MATH
+ && optimize)
+ {
+ rtx tmp;
+ tmp = assign_386_stack_local (SImode, SLOT_TEMP);
+ emit_move_insn (tmp, operands[1]);
+ emit_insn (gen_sse2_movdsi2df_amdfam10 (operands[0], tmp));
+ emit_insn (gen_sse2_cvtdq2pd_amdfam10 (operands[0], operands[0]));
+ DONE;
+ }
+ }
+ ")
+
+(define_insn "sse2_cvtdq2pd_amdfam10"
+ [(set (match_operand:DF 0 "register_operand" "=Y")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "Y")]
+ UNSPEC_CVTSI2SD_AMDFAM10))]
+ "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+ "cvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_movdsi2df_amdfam10"
+ [(set (match_operand:DF 0 "register_operand" "=Y")
+ (unspec:DF [(match_operand:SI 1 "memory_operand" "m")]
+ UNSPEC_MOVDSI2DF_AMDFAM10))]
+ "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS"
+ "movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
(define_insn "*floatsidf2_mixed"
[(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f")
++++++ gcc-amdfam10-suse-1.patch ++++++
++++ 652 lines (skipped)
++++++ gcc-amdfam10-suse-20.patch ++++++
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in.orig
+++ gcc/Makefile.in
@@ -1974,7 +1974,7 @@ tree-data-ref.o: tree-data-ref.c $(CONFI
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
$(TREE_DATA_REF_H) $(SCEV_H) tree-pass.h tree-chrec.h
tree-vect-analyze.o: tree-vect-analyze.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
- $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(BASIC_BLOCK_H) \
+ $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(TARGET_H) $(BASIC_BLOCK_H) \
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
tree-vectorizer.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) tree-chrec.h
tree-vect-transform.o: tree-vect-transform.c $(CONFIG_H) $(SYSTEM_H) \
Index: gcc/target-def.h
===================================================================
--- gcc/target-def.h.orig
+++ gcc/target-def.h
@@ -352,6 +352,10 @@ Foundation, 51 Franklin Street, Fifth Fl
#define TARGET_VECTOR_MODE_SUPPORTED_P hook_bool_mode_false
#endif
+#ifndef TARGET_VECTOR_MISALIGN_SUPPORTED_P
+#define TARGET_VECTOR_MISALIGN_SUPPORTED_P hook_bool_mode_false
+#endif
+
#ifndef TARGET_VECTOR_OPAQUE_P
#define TARGET_VECTOR_OPAQUE_P hook_bool_tree_false
#endif
@@ -587,6 +591,7 @@ Foundation, 51 Franklin Street, Fifth Fl
TARGET_VALID_POINTER_MODE, \
TARGET_SCALAR_MODE_SUPPORTED_P, \
TARGET_VECTOR_MODE_SUPPORTED_P, \
+ TARGET_VECTOR_MISALIGN_SUPPORTED_P, \
TARGET_VECTOR_OPAQUE_P, \
TARGET_RTX_COSTS, \
TARGET_ADDRESS_COST, \
Index: gcc/target.h
===================================================================
--- gcc/target.h.orig
+++ gcc/target.h
@@ -440,6 +440,9 @@ struct gcc_target
for further details. */
bool (* vector_mode_supported_p) (enum machine_mode mode);
+ /* True if misaligned load-execute vector operations are allowed. */
+ bool (* vector_misalign_supported_p) (enum machine_mode mode);
+
/* True if a vector is opaque. */
bool (* vector_opaque_p) (tree);
Index: gcc/tree-vect-analyze.c
===================================================================
--- gcc/tree-vect-analyze.c.orig
+++ gcc/tree-vect-analyze.c
@@ -25,6 +25,7 @@ Software Foundation, 51 Franklin Street,
#include "tm.h"
#include "ggc.h"
#include "tree.h"
+#include "target.h"
#include "basic-block.h"
#include "diagnostic.h"
#include "tree-flow.h"
@@ -709,7 +710,7 @@ vect_compute_data_ref_alignment (struct
tree stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree ref = DR_REF (dr);
- tree vectype;
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree base, base_addr;
bool base_aligned;
tree misalign;
@@ -718,6 +719,13 @@ vect_compute_data_ref_alignment (struct
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_compute_data_ref_alignment:");
+ /* Check if target cares about alignment at all. */
+ if (targetm.vector_misalign_supported_p ((int) TYPE_MODE (vectype)))
+ {
+ DR_MISALIGNMENT (dr) = 0;
+ return true;
+ }
+
/* Initialize misalignment to unknown. */
DR_MISALIGNMENT (dr) = -1;
@@ -725,7 +733,6 @@ vect_compute_data_ref_alignment (struct
aligned_to = DR_ALIGNED_TO (dr);
base_addr = DR_BASE_ADDRESS (dr);
base = build_fold_indirect_ref (base_addr);
- vectype = STMT_VINFO_VECTYPE (stmt_info);
alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
Index: gcc/tree-vect-transform.c
===================================================================
--- gcc/tree-vect-transform.c.orig
+++ gcc/tree-vect-transform.c
@@ -1693,7 +1693,12 @@ vectorizable_load (tree stmt, block_stmt
vec_dest = vect_create_destination_var (scalar_dest, vectype);
data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
- if (aligned_access_p (dr))
+ /* If misaligned load-execute is supported,
+ treat it as aligned when the target supports it. */
+ if ((targetm.vector_misalign_supported_p ((int) TYPE_MODE (vectype))
+ && alignment_support_cheme == dr_aligned)
+ || (!targetm.vector_misalign_supported_p ((int) TYPE_MODE (vectype))
+ && aligned_access_p (dr)))
data_ref = build_fold_indirect_ref (data_ref);
else
{
Index: gcc/tree-vectorizer.c
===================================================================
--- gcc/tree-vectorizer.c.orig
+++ gcc/tree-vectorizer.c
@@ -1577,20 +1577,25 @@ vect_supportable_dr_alignment (struct da
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
enum machine_mode mode = (int) TYPE_MODE (vectype);
+ /* Definitely aligned access. */
if (aligned_access_p (dr))
return dr_aligned;
/* Possibly unaligned access. */
-
if (DR_IS_READ (dr))
{
+ /* Check if target cares about alignment at all. */
+ if (targetm.vector_misalign_supported_p (mode))
+ return dr_unaligned_supported;
+
+ /* Check if target can do software pipeline. */
if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
&& (!targetm.vectorize.builtin_mask_for_load
|| targetm.vectorize.builtin_mask_for_load ()))
return dr_unaligned_software_pipeline;
+ /* If can't software pipeline the loads, check for misaligned loads. */
if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
- /* Can't software pipeline the loads, but can at least do them. */
return dr_unaligned_supported;
}
Index: gcc/config/i386/crtmisalignsse.c
===================================================================
--- /dev/null
+++ gcc/config/i386/crtmisalignsse.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2006 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file. (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * As a special exception, if you link this library with files
+ * compiled with GCC to produce an executable, this does not cause
+ * the resulting executable to be covered by the GNU General Public License.
+ * This exception does not however invalidate any other reasons why
+ * the executable file might be covered by the GNU General Public License.
+ */
+
+#define CPUID_LARGEST_FN (0x00000000) /* Vendor ID and largest supported basic function. */
+#define CPUID_ID (0x00000001) /* Family, model and stepping. */
+#define CPUID_LARGEST_EX (0x80000000) /* Largest supported extended function. */
+#define CPUID_EX_FEATURES (0x80000001) /* Extended features. */
+
+#define CPUID_SSEMISALIGN (1 << 7) /* Misaliged SSE support. */
+
+#define CPUID_AMD0 (0x68747541) /* Auth */
+#define CPUID_AMD1 (0x69746E65) /* enti */
+#define CPUID_AMD2 (0x444D4163) /* cAMD */
+
+#define CPUID_FAMILY_EX (20) /* Extended CPU family. */
+#define CPUID_FAMILY_EX_MASK (255)
+#define CPUID_FAMILY (8) /* CPU family. */
+#define CPUID_FAMILY_MASK (15)
+#define CPUID_MODEL_EX (16) /* Extended CPU model. */
+#define CPUID_MODEL_EX_MASK (15)
+#define CPUID_MODEL (4) /* CPU model. */
+#define CPUID_MODEL_MASK (15)
+#define CPUID_STEP (0) /* CPU stepping. */
+#define CPUID_STEP_MASK (15)
+
+#define FAMILY_AMD_GH (0x10) /* GH */
+#define MODEL_AMD_GH_A (0) /* GH rev. A */
+
+#define MXCSR_MM_A (1 << 16) /* Misaligned SSE mask (GH rev. A). */
+#define MXCSR_MM (1 << 17) /* Misaligned SSE mask. */
+
+#define FLAG_ID (1 << 21) /* CPUID support. */
+
+#include
+#include
+
+#define CPUID(a) /* Execute CPUID. */ \
+ __asm __volatile ("xchgl %%ebx, %1;" /* Save %ebx. */ \
+ "cpuid;" \
+ "xchgl %%ebx, %1;" /* Restore %ebx. */ \
+ : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) \
+ : "a" (a));
+
+static void __attribute__ ((constructor))
+check_misalign_sse (void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t cpuid [3], family, model, step;
+
+#ifndef __x86_64__
+ /* All 64-bit targets support CPUID;
+ only check explicitly for 32-bit ones. */
+
+ /* Check if we can use CPUID. */
+ __asm __volatile ("pushf;" /* Save flags. */
+ "mov (%%esp), %0;" /* Copy flags into register. */
+ "mov %0, %1;" /* Save'em. */
+ "xor %2, %0;" /* Flip ID bit. */
+ "push %0;" /* Try modified flags. */
+ "popf;"
+ "pushf;" /* Read flags into register again. */
+ "pop %0;"
+ "popf;" /* Restore flags. */
+ : "=&r" (eax), "=&r" (ebx)
+ : "i" (FLAG_ID)
+ : "memory");
+
+ /* Check if flipping ID bit stuck. */
+ if (((eax ^ ebx) & FLAG_ID) == 0)
+ return;
+#endif
+
+ /* Get the highest supported basic function. */
+ CPUID (CPUID_LARGEST_FN);
+
+ /* Check if basic functions used below are supported. */
+ if (eax < CPUID_ID)
+ return;
+
+ cpuid[0] = ebx;
+ cpuid[1] = edx;
+ cpuid[2] = ecx;
+
+ /* Get the highest supported extended function (saving vendor string). */
+ CPUID (CPUID_LARGEST_EX);
+
+ /* Check if basic functions used below are supported. */
+ if (eax < CPUID_EX_FEATURES)
+ return;
+
+ /* Get the extended features. */
+ CPUID (CPUID_EX_FEATURES);
+
+ /* Check if misaligned SSE is supported. */
+ if ((ecx & CPUID_SSEMISALIGN) == 0)
+ return;
+
+ /* Get the CPU ID. */
+ CPUID (CPUID_ID);
+
+ family = ((eax >> CPUID_FAMILY_EX) & CPUID_FAMILY_EX_MASK) /* calculate family */
+ + ((eax >> CPUID_FAMILY) & CPUID_FAMILY_MASK);
+ model = (((eax >> CPUID_MODEL_EX) & CPUID_MODEL_EX_MASK) << CPUID_MODEL) /* calculate model */
+ + ((eax >> CPUID_MODEL) & CPUID_MODEL_MASK);
+ step = ((eax >> CPUID_STEP) & CPUID_STEP_MASK); /* calculate stepping */
+
+ /* Enable misaligned SSE. */
+ uint32_t mxcsr = __builtin_ia32_stmxcsr ();
+
+ if ( cpuid [0] == CPUID_AMD0 && cpuid [1] == CPUID_AMD1 && cpuid [2] == CPUID_AMD2 \
+ && family == FAMILY_AMD_GH && model == MODEL_AMD_GH_A)
+ mxcsr |= MXCSR_MM_A; /* AMD GH rev. A */
+ else
+ mxcsr |= MXCSR_MM; /* Others */
+
+ __builtin_ia32_ldmxcsr (mxcsr);
+}
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -1219,6 +1219,7 @@ static void ix86_setup_incoming_varargs
tree, int *, int);
static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
static bool ix86_vector_mode_supported_p (enum machine_mode);
+static bool ix86_vector_misalign_supported_p (enum machine_mode);
static int ix86_address_cost (rtx);
static bool ix86_cannot_force_const_mem (rtx);
@@ -1446,6 +1447,9 @@ static void x86_64_elf_select_section (t
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
+#undef TARGET_VECTOR_MISALIGN_SUPPORTED_P
+#define TARGET_VECTOR_MISALIGN_SUPPORTED_P ix86_vector_misalign_supported_p
+
#ifdef HAVE_AS_TLS
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
@@ -1500,8 +1504,8 @@ ix86_handle_option (size_t code, const c
case OPT_msse:
if (!value)
{
- target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
- target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
+ target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A | MASK_SSE_MISALIGN);
+ target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A | MASK_SSE_MISALIGN;
}
return true;
@@ -1589,7 +1593,8 @@ override_options (void)
PTA_SSSE3 = 256,
PTA_POPCNT= 512,
PTA_ABM = 1024,
- PTA_SSE4A = 2048
+ PTA_SSE4A = 2048,
+ PTA_SSE_MISALIGN = 4096
} flags;
}
const processor_alias_table[] =
@@ -1643,8 +1648,8 @@ override_options (void)
{"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
{"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
{"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
- | PTA_3DNOW_A | PTA_SSE | PTA_SSE2| PTA_SSE3 | PTA_POPCNT
- | PTA_ABM | PTA_SSE4A},
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2| PTA_SSE3 | PTA_SSE4A
+ | PTA_POPCNT | PTA_ABM},
};
int const pta_size = ARRAY_SIZE (processor_alias_table);
@@ -1799,6 +1804,9 @@ override_options (void)
if (processor_alias_table[i].flags & PTA_SSE4A
&& !(target_flags_explicit & MASK_SSE4A))
target_flags |= MASK_SSE4A;
+ if (processor_alias_table[i].flags & PTA_SSE_MISALIGN
+ && !(target_flags_explicit & MASK_SSE_MISALIGN))
+ target_flags |= MASK_SSE_MISALIGN;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
error ("CPU you selected does not support x86-64 "
"instruction set");
@@ -18878,6 +18886,17 @@ ix86_vector_mode_supported_p (enum machi
return true;
if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
return true;
+
+ return false;
+}
+
+/* Implements target hook vector_mode_misalign_p. */
+static bool
+ix86_vector_misalign_supported_p (enum machine_mode mode)
+{
+ if (TARGET_SSE_MISALIGN && ix86_vector_mode_supported_p (mode))
+ return true;
+
return false;
}
Index: gcc/config/i386/i386.opt
===================================================================
--- gcc/config/i386/i386.opt.orig
+++ gcc/config/i386/i386.opt
@@ -205,6 +205,10 @@ msse4a
Target Report Mask(SSE4A)
Support new AMDFAM10 SSE4A built-in functions and code generation
+mssemisalign
+Target Report Mask(SSE_MISALIGN)
+Support misaligned memory in vector operations.
+
mpopcnt
Target Report Mask(POPCNT)
Support new AMDFAM10 Advanced Bit Manipulation (ABM) popcount built-in functions and code generation
Index: gcc/config/i386/linux.h
===================================================================
--- gcc/config/i386/linux.h.orig
+++ gcc/config/i386/linux.h
@@ -130,6 +130,7 @@ Boston, MA 02110-1301, USA. */
#undef ENDFILE_SPEC
#define ENDFILE_SPEC \
"%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{mssemisalign:crtmisalignsse.o%s} \
%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
/* A C statement (sans semicolon) to output to the stdio stream
Index: gcc/config/i386/linux64.h
===================================================================
--- gcc/config/i386/linux64.h.orig
+++ gcc/config/i386/linux64.h
@@ -68,6 +68,7 @@ Boston, MA 02110-1301, USA. */
#undef ENDFILE_SPEC
#define ENDFILE_SPEC \
"%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{mssemisalign:crtmisalignsse.o%s} \
%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
#define MULTILIB_DEFAULTS { "m64" }
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md.orig
+++ gcc/config/i386/sse.md
@@ -70,11 +70,23 @@
else
return "pxor\t%0, %0";
case 1:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return TARGET_SSE_MISALIGN && MEM_P (operands[1])
+ ? "movups\t{%1, %0|%0, %1}"
+ : "movaps\t{%1, %0|%0, %1}";
+ else
+ return TARGET_SSE_MISALIGN && MEM_P (operands[1])
+ ? "movdqu\t{%1, %0|%0, %1}"
+ : "movdqa\t{%1, %0|%0, %1}";
case 2:
if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ return TARGET_SSE_MISALIGN
+ ? "movups\t{%1, %0|%0, %1}"
+ : "movaps\t{%1, %0|%0, %1}";
else
- return "movdqa\t{%1, %0|%0, %1}";
+ return TARGET_SSE_MISALIGN
+ ? "movdqu\t{%1, %0|%0, %1}"
+ : "movdqa\t{%1, %0|%0, %1}";
default:
gcc_unreachable ();
}
@@ -83,7 +95,6 @@
(set (attr "mode")
(cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
(const_string "V4SF")
-
(eq_attr "alternative" "0,1")
(if_then_else
(ne (symbol_ref "optimize_size")
@@ -156,11 +167,23 @@
else
return "xorpd\t%0, %0";
case 1:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return TARGET_SSE_MISALIGN && MEM_P (operands[1])
+ ? "movups\t{%1, %0|%0, %1}"
+ : "movaps\t{%1, %0|%0, %1}";
+ else
+ return TARGET_SSE_MISALIGN && MEM_P (operands[1])
+ ? "movupd\t{%1, %0|%0, %1}"
+ : "movapd\t{%1, %0|%0, %1}";
case 2:
if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ return TARGET_SSE_MISALIGN
+ ? "movups\t{%1, %0|%0, %1}"
+ : "movaps\t{%1, %0|%0, %1}";
else
- return "movapd\t{%1, %0|%0, %1}";
+ return TARGET_SSE_MISALIGN
+ ? "movupd\t{%1, %0|%0, %1}"
+ : "movapd\t{%1, %0|%0, %1}";
default:
gcc_unreachable ();
}
Index: gcc/config/i386/t-crtfm
===================================================================
--- gcc/config/i386/t-crtfm.orig
+++ gcc/config/i386/t-crtfm
@@ -1,6 +1,11 @@
-EXTRA_PARTS += crtfastmath.o
+EXTRA_PARTS += crtfastmath.o crtmisalignsse.o
$(T)crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c $(GCC_PASSES)
$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -msse -c \
$(srcdir)/config/i386/crtfastmath.c \
-o $(T)crtfastmath$(objext)
+
+$(T)crtmisalignsse.o: $(srcdir)/config/i386/crtmisalignsse.c $(GCC_PASSES) stmp-int-hdrs
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -msse -c \
+ $(srcdir)/config/i386/crtmisalignsse.c \
+ -o $(T)crtmisalignsse$(objext)
Index: gcc/config/i386/t-linux64
===================================================================
--- gcc/config/i386/t-linux64.orig
+++ gcc/config/i386/t-linux64
@@ -12,7 +12,7 @@ LIBGCC = stmp-multilib
INSTALL_LIBGCC = install-multilib
EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o \
- crtbeginT.o crtfastmath.o
+ crtbeginT.o crtfastmath.o crtmisalignsse.o
# The pushl in CTOR initialization interferes with frame pointer elimination.
# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi.orig
+++ gcc/doc/invoke.texi
@@ -525,7 +525,7 @@ Objective-C and Objective-C++ Dialects}.
-mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
--m96bit-long-double -mregparm=@var{num} -msseregparm @gol
+-m96bit-long-double -mregparm=@var{num} -msseregparm -mssemisalign @gol
-momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol
-mcmodel=@var{code-model} @gol
-m32 -m64 -mlarge-data-threshold=@var{num}}
@@ -9319,6 +9319,14 @@ function by using the function attribute
modules with the same value, including any libraries. This includes
the system libraries and startup modules.
+@item -mssemisalign
+@opindex mssemisalign
+Allow the vectorizer to use operands in memory even if not
+guaranteed to be aligned.
+
+@strong{Warning:} if you use this switch then the resulting modules
+must be run only on processors which support this feature.
+
@item -mpreferred-stack-boundary=@var{num}
@opindex mpreferred-stack-boundary
Attempt to keep the stack boundary aligned to a 2 raised to @var{num}
++++++ gcc-amdfam10-suse-2.patch ++++++
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -480,6 +480,64 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
};
+struct processor_costs amdfam10_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers (On K8 -332)
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers (On K8 -223)
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 4, 3}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ /* On K8
+ MOVD reg64, xmmreg Double FSTORE 4
+ MOVD reg32, xmmreg Double FSTORE 4
+ On AMDFAM10
+ MOVD reg64, xmmreg Double FADD 3 1/1 1/1
+ MOVD reg32, xmmreg Double FADD 3 1/1 1/1 */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 5, /* Branch cost */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (19), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+};
+
static const
struct processor_costs pentium4_cost = {
COSTS_N_INSNS (1), /* cost of an add instruction */
@@ -708,6 +766,7 @@ const struct processor_costs *ix86_cost
#define m_PENT4 (1<= loadcost)
cost -= loadcost;
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -139,6 +139,7 @@ extern const struct processor_costs *ix8
#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
+#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
#define TUNEMASK (1 << ix86_tune)
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
@@ -368,6 +369,8 @@ extern int x86_prefetch_sse;
} \
else if (TARGET_K8) \
builtin_define ("__tune_k8__"); \
+ else if (TARGET_AMDFAM10) \
+ builtin_define ("__tune_amdfam10__"); \
else if (TARGET_PENTIUM4) \
builtin_define ("__tune_pentium4__"); \
else if (TARGET_NOCONA) \
@@ -439,6 +442,11 @@ extern int x86_prefetch_sse;
builtin_define ("__k8"); \
builtin_define ("__k8__"); \
} \
+ else if (ix86_arch == PROCESSOR_AMDFAM10) \
+ { \
+ builtin_define ("__amdfam10"); \
+ builtin_define ("__amdfam10__"); \
+ } \
else if (ix86_arch == PROCESSOR_PENTIUM4) \
{ \
builtin_define ("__pentium4"); \
@@ -470,13 +478,14 @@ extern int x86_prefetch_sse;
#define TARGET_CPU_DEFAULT_prescott 15
#define TARGET_CPU_DEFAULT_nocona 16
#define TARGET_CPU_DEFAULT_generic 17
+#define TARGET_CPU_DEFAULT_amdfam10 18
#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
"pentiumpro", "pentium2", "pentium3", \
"pentium4", "k6", "k6-2", "k6-3",\
"athlon", "athlon-4", "k8", \
"pentium-m", "prescott", "nocona", \
- "generic"}
+ "generic", "amdfam10"}
#ifndef CC1_SPEC
#define CC1_SPEC "%(cc1_cpu) "
@@ -2130,6 +2139,7 @@ enum processor_type
PROCESSOR_NOCONA,
PROCESSOR_GENERIC32,
PROCESSOR_GENERIC64,
+ PROCESSOR_AMDFAM10,
PROCESSOR_max
};
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -201,7 +201,7 @@
;; Processor type. This attribute must exactly match the processor_type
;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,generic32,generic64"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,generic32,generic64,amdfam10"
(const (symbol_ref "ix86_tune")))
;; A basic instruction type. Refinements due to arguments to be
@@ -20287,7 +20287,7 @@
(mult:DI (match_operand:DI 1 "memory_operand" "")
(match_operand:DI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& (GET_CODE (operands[2]) != CONST_INT
|| !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
[(set (match_dup 3) (match_dup 1))
@@ -20301,7 +20301,7 @@
(mult:SI (match_operand:SI 1 "memory_operand" "")
(match_operand:SI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& (GET_CODE (operands[2]) != CONST_INT
|| !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
[(set (match_dup 3) (match_dup 1))
@@ -20316,7 +20316,7 @@
(mult:SI (match_operand:SI 1 "memory_operand" "")
(match_operand:SI 2 "immediate_operand" ""))))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& (GET_CODE (operands[2]) != CONST_INT
|| !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
[(set (match_dup 3) (match_dup 1))
@@ -20334,7 +20334,7 @@
(match_operand:DI 2 "const_int_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:DI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
@@ -20350,7 +20350,7 @@
(match_operand:SI 2 "const_int_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:SI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
@@ -20366,7 +20366,7 @@
(match_operand:HI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:HI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
(clobber (reg:CC FLAGS_REG))])]
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md.orig
+++ gcc/config/i386/sse.md
@@ -4499,6 +4499,7 @@
[(set_attr "type" "sselog1")
(set_attr "mode" "DI")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; AMD SSE4A instructions
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc.orig
+++ gcc/config.gcc
@@ -2396,6 +2396,9 @@ if test x$with_cpu = x ; then
;;
i686-*-* | i786-*-*)
case ${target_noncanonical} in
+ amdfam10-*)
+ with_cpu=amdfam10
+ ;;
k8-*|opteron-*|athlon_64-*)
with_cpu=k8
;;
@@ -2433,6 +2436,9 @@ if test x$with_cpu = x ; then
;;
x86_64-*-*)
case ${target_noncanonical} in
+ amdfam10-*)
+ with_cpu=amdfam10
+ ;;
k8-*|opteron-*|athlon_64-*)
with_cpu=k8
;;
@@ -2662,7 +2668,7 @@ case "${target}" in
esac
# OK
;;
- "" | k8 | opteron | athlon64 | athlon-fx | nocona | generic)
+ "" | k8 | opteron | athlon64 | athlon-fx | nocona | generic | amdfam10 )
# OK
;;
*)
++++++ gcc-amdfam10-suse-3.patch ++++++
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -851,12 +851,71 @@ const int x86_arch_always_fancy_math_387
this option on P4 brings over 20% SPECfp regression, while enabling it on
K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
of moves. */
+
+/*
+Code generation for scalar reg-reg moves:
+ if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
+ movaps reg, reg
+ else
+ movss reg, reg
+
+ if (x86_sse_partial_reg_dependency == true)
+ movapd reg, reg
+ else
+ movsd reg, reg
+
+Code generation for scalar loads of double precision data:
+ if (x86_sse_split_regs == true)
+ movlpd mem, reg (gas syntax)
+ else
+ movsd mem, reg
+
+Code generation for unaligned packed loads of single precision data:
+ if (x86_sse_partial_reg_dependency == true)
+ {
+ if (x86_sse_unaligned_move_optimal)
+ {
+ movups mem, reg
+ }
+ else
+ {
+ xorps reg3, reg3
+ movaps reg3, reg2
+ movlps mem, reg2
+ movhps mem+8, reg2
+ }
+ }
+ else
+ {
+ movlps mem, reg
+ movhps mem+8, reg
+ }
+Code generation for unaligned packed loads of double precision data:
+ if (x86_sse_split_regs == true)
+ {
+ movlpd mem, reg
+ movhpd mem+8, reg
+ }
+ else
+ {
+ if (x86_sse_unaligned_move_optimal)
+ {
+ movupd mem, reg
+ }
+ else
+ {
+ movsd mem, reg2
+ movhpd mem+8, reg2
+ }
+ }
+*/
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_AMDFAM10;
/* Set for machines where the type and dependencies are resolved on SSE
register parts instead of whole registers, so we may maintain just
lower part of scalar values in proper format leaving the upper part
undefined. */
const int x86_sse_split_regs = m_ATHLON_K8;
+const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
const int x86_sse_typeless_stores = m_ATHLON_K8 | m_AMDFAM10;
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
const int x86_use_ffreep = m_ATHLON_K8 | m_AMDFAM10;
@@ -8923,6 +8982,13 @@ ix86_expand_vector_move_misalign (enum m
}
else
{
+ if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
+ {
+ op0 = gen_lowpart (V2DFmode, op0);
+ op1 = gen_lowpart (V2DFmode, op1);
+ emit_insn (gen_sse2_movupd (op0, op1));
+ return;
+ }
/* ??? Not sure about the best option for the Intel chips.
The following would seem to satisfy; the register is
entirely cleared, breaking the dependency chain. We
@@ -8942,7 +9008,16 @@ ix86_expand_vector_move_misalign (enum m
else
{
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
+ {
+ if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
+ }
emit_move_insn (op0, CONST0_RTX (mode));
+ }
else
emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -158,6 +158,7 @@ extern const int x86_accumulate_outgoing
extern const int x86_epilogue_using_move, x86_decompose_lea;
extern const int x86_arch_always_fancy_math_387, x86_shift1;
extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
+extern const int x86_sse_unaligned_move_optimal;
extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
extern const int x86_use_ffreep;
extern const int x86_inter_unit_moves, x86_schedule;
@@ -207,6 +208,8 @@ extern int x86_prefetch_sse;
#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK)
#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
(x86_sse_partial_reg_dependency & TUNEMASK)
+#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
+ (x86_sse_unaligned_move_optimal & TUNEMASK)
#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK)
#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK)
#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK)
++++++ gcc-amdfam10-suse-4.patch ++++++
Index: gcc/config/i386/athlon.md
===================================================================
--- gcc/config/i386/athlon.md.orig
+++ gcc/config/i386/athlon.md
@@ -808,13 +808,13 @@
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_ssemul_load_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "ssemul")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fploadk8,athlon-fmul")
(define_insn_reservation "athlon_ssemul" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "ssemul")
(eq_attr "mode" "SF,DF")))
"athlon-direct,athlon-fpsched,athlon-fmul")
@@ -828,6 +828,11 @@
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fmul")
(define_insn_reservation "athlon_ssemulvector" 5
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssemul"))
@@ -836,6 +841,10 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "ssemul"))
"athlon-double,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "ssemul"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
;; divsd timings. divss is faster
(define_insn_reservation "athlon_ssediv_load" 20
(and (eq_attr "cpu" "athlon")
@@ -844,13 +853,13 @@
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fpload,athlon-fmul*17")
(define_insn_reservation "athlon_ssediv_load_k8" 22
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "ssediv")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fploadk8,athlon-fmul*17")
(define_insn_reservation "athlon_ssediv" 20
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "ssediv")
(eq_attr "mode" "SF,DF")))
"athlon-direct,athlon-fpsched,athlon-fmul*17")
@@ -864,6 +873,11 @@
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fmul*17")
(define_insn_reservation "athlon_ssedivvector" 39
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssediv"))
@@ -872,3 +886,8 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "ssediv"))
"athlon-double,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "ssediv"))
+ "athlon-direct,athlon-fmul*17")
+
++++++ gcc-amdfam10-suse-5.patch ++++++
Index: gcc/config/i386/athlon.md
===================================================================
--- gcc/config/i386/athlon.md.orig
+++ gcc/config/i386/athlon.md
@@ -29,6 +29,8 @@
(const_string "vector")]
(const_string "direct")))
+(define_attr "amdfam10_decode" "direct,vector,double"
+ (const_string "direct"))
;;
;; decode0 decode1 decode2
;; \ | /
@@ -684,12 +686,25 @@
(and (eq_attr "mode" "DF")
(eq_attr "memory" "load")))))
"athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "athlon_decode" "direct")
(eq_attr "mode" "DF"))))
"athlon-direct,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (eq_attr "mode" "DF"))))
+ "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
;; cvtps2pd. Model same way the other double decoded FP conversions.
(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
(and (eq_attr "cpu" "k8,athlon,generic64")
@@ -698,12 +713,25 @@
(and (eq_attr "mode" "V2DF,V4SF,TI")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "mode" "V2DF,V4SF,TI")
+ (eq_attr "memory" "load")))))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
(and (eq_attr "cpu" "k8,athlon,generic64")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "athlon_decode" "double")
(eq_attr "mode" "V2DF,V4SF,TI"))))
"athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (eq_attr "mode" "V2DF,V4SF,TI"))))
+ "athlon-direct,athlon-fpsched,athlon-fstore")
;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath)
;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
@@ -713,6 +741,13 @@
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load")))))
"athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
;; cvtsi2ss mem, reg is doublepath
(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
(and (eq_attr "cpu" "athlon")
@@ -728,6 +763,13 @@
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fploadk8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
(and (eq_attr "cpu" "k8,athlon,generic64")
@@ -736,6 +778,13 @@
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "none")))))
"athlon-double,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
;; cvtsi2ss reg, reg is doublepath
(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
(and (eq_attr "cpu" "athlon,k8,generic64")
@@ -744,6 +793,13 @@
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "none")))))
"athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
(and (eq_attr "cpu" "k8,athlon,generic64")
@@ -752,6 +808,13 @@
(and (eq_attr "mode" "SF")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fploadk8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
(and (eq_attr "cpu" "athlon,k8,generic64")
@@ -760,6 +823,13 @@
(and (eq_attr "mode" "SF")
(eq_attr "memory" "none")))))
"athlon-vector,athlon-fpsched,(athlon-fvector*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "type" "ssecvt")
@@ -767,6 +837,13 @@
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
;; ??? Why it is fater than cvtsd2ss?
(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
@@ -776,6 +853,13 @@
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "none")))))
"athlon-vector,athlon-fpsched,athlon-fvector*2")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
(and (eq_attr "cpu" "athlon,k8,generic64")
@@ -784,6 +868,13 @@
(and (eq_attr "mode" "SI,DI")
(eq_attr "memory" "load")))))
"athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
(and (eq_attr "cpu" "athlon")
@@ -799,6 +890,29 @@
(and (eq_attr "mode" "SI,DI")
(eq_attr "memory" "none")))))
"athlon-double,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "TI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "TI")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
(define_insn_reservation "athlon_ssemul_load" 4
@@ -890,4 +1004,3 @@
(and (eq_attr "cpu" "amdfam10")
(eq_attr "type" "ssediv"))
"athlon-direct,athlon-fmul*17")
-
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -4132,7 +4132,8 @@
"cvttss2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncdfdi_sse"
[(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -4141,7 +4142,8 @@
"cvttsd2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncsfsi_sse"
[(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -4150,7 +4152,8 @@
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncdfsi_sse"
[(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -4159,7 +4162,8 @@
"cvttsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
;; Avoid vector decoded forms of the instruction.
(define_peephole2
@@ -4471,6 +4475,7 @@
(set_attr "mode" "SF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,vector,double")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_sse"
@@ -4481,6 +4486,7 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_i387"
@@ -4514,6 +4520,7 @@
(set_attr "mode" "SF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,vector,double")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_sse"
@@ -4524,6 +4531,7 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_i387"
@@ -4582,6 +4590,7 @@
(set_attr "mode" "DF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_sse"
@@ -4592,6 +4601,7 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_i387"
@@ -4625,6 +4635,7 @@
(set_attr "mode" "DF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_sse"
@@ -4635,6 +4646,7 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_i387"
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md.orig
+++ gcc/config/i386/sse.md
@@ -963,6 +963,7 @@
"cvtsi2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "sse_cvtsi2ssq"
@@ -976,6 +977,7 @@
"cvtsi2ssq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "sse_cvtss2si"
@@ -989,6 +991,7 @@
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
(set_attr "mode" "SI")])
(define_insn "sse_cvtss2siq"
@@ -1002,6 +1005,7 @@
"cvtss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
(set_attr "mode" "DI")])
(define_insn "sse_cvttss2si"
@@ -1014,6 +1018,7 @@
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
(set_attr "mode" "SI")])
(define_insn "sse_cvttss2siq"
@@ -1026,6 +1031,7 @@
"cvttss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
(set_attr "mode" "DI")])
(define_insn "sse2_cvtdq2ps"
@@ -1921,7 +1927,8 @@
"cvtsi2sd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,direct")])
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")])
(define_insn "sse2_cvtsi2sdq"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
@@ -1934,7 +1941,8 @@
"cvtsi2sdq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,direct")])
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")])
(define_insn "sse2_cvtsd2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -1947,6 +1955,7 @@
"cvtsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
(set_attr "mode" "SI")])
(define_insn "sse2_cvtsd2siq"
@@ -1960,6 +1969,7 @@
"cvtsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
(set_attr "mode" "DI")])
(define_insn "sse2_cvttsd2si"
@@ -1972,7 +1982,8 @@
"cvttsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SI")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "sse2_cvttsd2siq"
[(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -1984,7 +1995,8 @@
"cvttsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DI")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "sse2_cvtdq2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -2015,7 +2027,8 @@
"TARGET_SSE2"
"cvtpd2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "TI")
+ (set_attr "amdfam10_decode" "double")])
(define_expand "sse2_cvttpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "")
@@ -2033,7 +2046,8 @@
"TARGET_SSE2"
"cvttpd2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "TI")
+ (set_attr "amdfam10_decode" "double")])
(define_insn "sse2_cvtsd2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
@@ -2047,20 +2061,22 @@
"cvtsd2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "sse2_cvtss2sd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
(parallel [(const_int 0) (const_int 1)])))
- (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 1 "register_operand" "0,0")
(const_int 1)))]
"TARGET_SSE2"
"cvtss2sd\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "DF")])
(define_expand "sse2_cvtpd2ps"
@@ -2081,7 +2097,8 @@
"TARGET_SSE2"
"cvtpd2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "V4SF")
+ (set_attr "amdfam10_decode" "double")])
(define_insn "sse2_cvtps2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -2092,7 +2109,8 @@
"TARGET_SSE2"
"cvtps2pd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V2DF")
+ (set_attr "amdfam10_decode" "direct")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
++++++ gcc-amdfam10-suse-6.patch ++++++
Index: gcc/config/i386/athlon.md
===================================================================
--- gcc/config/i386/athlon.md.orig
+++ gcc/config/i386/athlon.md
@@ -580,6 +580,11 @@
(and (eq_attr "type" "sselog,sselog1")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sselog,sselog1")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
(define_insn_reservation "athlon_sselog" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "sselog,sselog1"))
@@ -588,6 +593,11 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "sselog,sselog1"))
"athlon-double,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_sselog_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "sselog,sselog1"))
+ "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
+
;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
(define_insn_reservation "athlon_ssecmp_load" 2
(and (eq_attr "cpu" "athlon")
@@ -596,13 +606,13 @@
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_ssecmp_load_k8" 4
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "ssecmp")
(and (eq_attr "mode" "SF,DF,DI,TI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_ssecmp" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "ssecmp")
(eq_attr "mode" "SF,DF,DI,TI")))
"athlon-direct,athlon-fpsched,athlon-fadd")
@@ -616,6 +626,11 @@
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_ssecmpvector" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssecmp"))
@@ -624,6 +639,10 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "ssecmp"))
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "ssecmp"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_ssecomi_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssecomi")
@@ -634,10 +653,20 @@
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_ssecomi" 4
(and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "ssecmp"))
"athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_amdfam10" 3
+ (and (eq_attr "cpu" "amdfam10")
+;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
+ (eq_attr "type" "ssecomi"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "sseadd")
@@ -645,13 +674,13 @@
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "sseadd")
(and (eq_attr "mode" "SF,DF,DI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_sseadd" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "sseadd")
(eq_attr "mode" "SF,DF,DI")))
"athlon-direct,athlon-fpsched,athlon-fadd")
@@ -665,6 +694,11 @@
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_sseaddvector" 5
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "sseadd"))
@@ -673,6 +707,10 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "sseadd"))
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "sseadd"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
;; Conversions behaves very irregularly and the scheduling is critical here.
;; Take each instruction separately. Assume that the mode is always set to the
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -974,7 +974,7 @@
(set_attr "mode" "SI")])
;; Pentium Pro can do steps 1 through 3 in one go.
-
+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
(define_insn "*cmpfp_i_mixed"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP (match_operand 0 "register_operand" "f#x,x#f")
@@ -988,7 +988,8 @@
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_i_sse"
[(set (reg:CCFP FLAGS_REG)
@@ -1003,7 +1004,8 @@
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_i_i387"
[(set (reg:CCFP FLAGS_REG)
@@ -1022,7 +1024,8 @@
(const_string "DF")
]
(const_string "XF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_mixed"
[(set (reg:CCFPU FLAGS_REG)
@@ -1037,7 +1040,8 @@
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_sse"
[(set (reg:CCFPU FLAGS_REG)
@@ -1052,7 +1056,8 @@
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_387"
[(set (reg:CCFPU FLAGS_REG)
@@ -1071,7 +1076,8 @@
(const_string "DF")
]
(const_string "XF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
;; Move instructions.
++++++ gcc-amdfam10-suse-7.patch ++++++
Index: gcc/config/i386/athlon.md
===================================================================
--- gcc/config/i386/athlon.md.orig
+++ gcc/config/i386/athlon.md
@@ -1042,3 +1042,8 @@
(and (eq_attr "cpu" "amdfam10")
(eq_attr "type" "ssediv"))
"athlon-direct,athlon-fmul*17")
+(define_insn_reservation "athlon_sseins_amdfam10" 5
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseins")
+ (eq_attr "mode" "TI")))
+ "athlon-vector,athlon-fpsched,athlon-faddmul")
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -215,7 +215,7 @@
str,cld,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
sselog,sselog1,sseiadd,sseishft,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
@@ -229,7 +229,7 @@
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
(const_string "i387")
(eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md.orig
+++ gcc/config/i386/sse.md
@@ -4583,7 +4583,7 @@
{
return "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}";
}
- [(set_attr "type" "sse")
+ [(set_attr "type" "sseins")
(set_attr "mode" "TI")])
(define_insn "sse4a_insertq"
@@ -4595,5 +4595,5 @@
{
return "insertq\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "sse")
+ [(set_attr "type" "sseins")
(set_attr "mode" "TI")])
++++++ gcc-amdfam10-suse-8.patch ++++++
Index: gcc/config/i386/athlon.md
===================================================================
--- gcc/config/i386/athlon.md.orig
+++ gcc/config/i386/athlon.md
@@ -518,6 +518,23 @@
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fstore")
+;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
+;; loads generated are direct path, latency of 2 and do not use any FP
+;; executions units. No seperate entries for movlpx/movhpx loads, which
+;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
+;; as they will not be generated.
+(define_insn_reservation "athlon_sseld_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8")
+;; On AMDFAM10 MMX data loads generated are direct path, latency of 4
+;; and can use any FP executions units
+(define_insn_reservation "athlon_mmxld_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "mmxmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8, athlon-fany")
(define_insn_reservation "athlon_mmxssest" 3
(and (eq_attr "cpu" "k8,generic64")
(and (eq_attr "type" "mmxmov,ssemov")
@@ -535,6 +552,25 @@
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "store,both")))
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+;; On AMDFAM10 all double, single and integer packed SSEx data stores
+;; generated are all double path, latency of 2 and use the FSTORE FP
+;; execution unit. No entries seperate for movupx/movdqu, which are
+;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
+;; as they will not be generated.
+(define_insn_reservation "athlon_ssest_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "store,both"))))
+ "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
+;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
+;; data stores generated are all direct path, latency of 2 and use
+;; the FSTORE FP execution unit
+(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_movaps_k8" 2
(and (eq_attr "cpu" "k8,generic64")
(and (eq_attr "type" "ssemov")
++++++ gcc-amdfam10-suse-9.patch ++++++
Index: gcc/config/i386/athlon.md
===================================================================
--- gcc/config/i386/athlon.md.orig
+++ gcc/config/i386/athlon.md
@@ -133,18 +133,22 @@
;; Jump instructions are executed in the branch unit completely transparent to us
(define_insn_reservation "athlon_branch" 0
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "ibr"))
"athlon-direct,athlon-ieu")
(define_insn_reservation "athlon_call" 0
(and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "call,callv"))
"athlon-vector,athlon-ieu")
+(define_insn_reservation "athlon_call_amdfam10" 0
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "call,callv"))
+ "athlon-double,athlon-ieu")
;; Latency of push operation is 3 cycles, but ESP value is available
;; earlier
(define_insn_reservation "athlon_push" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "push"))
"athlon-direct,athlon-agu,athlon-store")
(define_insn_reservation "athlon_pop" 4
@@ -155,12 +159,16 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "pop"))
"athlon-double,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_pop_amdfam10" 3
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "pop"))
+ "athlon-direct,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "leave"))
"athlon-vector,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave_k8" 3
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(eq_attr "type" "leave"))
"athlon-double,(athlon-ieu+athlon-load)")
@@ -169,6 +177,11 @@
(and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "lea"))
"athlon-direct,athlon-agu,nothing")
+;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
+(define_insn_reservation "athlon_lea_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "lea"))
+ "athlon-direct,athlon-agu,nothing")
;; Mul executes in special multiplier unit attached to IEU0
(define_insn_reservation "athlon_imul" 5
@@ -178,29 +191,35 @@
"athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
;; ??? Widening multiply is vector or double.
(define_insn_reservation "athlon_imul_k8_DI" 4
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none,unknown"))))
"athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
(define_insn_reservation "athlon_imul_k8" 3
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none,unknown")))
"athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_amdfam10_HI" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
(define_insn_reservation "athlon_imul_mem" 8
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
(define_insn_reservation "athlon_imul_mem_k8_DI" 7
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load,both"))))
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
(define_insn_reservation "athlon_imul_mem_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
@@ -211,21 +230,23 @@
;; other instructions.
;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
;; of the other code
+;; Using the same heuristics for amdfam10 as K8 with idiv
(define_insn_reservation "athlon_idiv" 6
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "none,unknown")))
"athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
(define_insn_reservation "athlon_idiv_mem" 9
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "load,both")))
"athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
;; The parallelism of string instructions is not documented. Model it same way
;; as idiv to create smaller automata. This probably does not matter much.
+;; Using the same heuristics for amdfam10 as K8 with idiv
(define_insn_reservation "athlon_str" 6
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "str")
(eq_attr "memory" "load,both,store")))
"athlon-vector,athlon-load,athlon-ieu0*6")
++++++ gcc.spec.in ++++++
--- gcc41/gcc.spec.in 2007-01-15 13:29:13.000000000 +0100
+++ /mounts/work_src_done/STABLE/gcc41/gcc.spec.in 2007-02-07 10:23:25.670221000 +0100
@@ -27,7 +27,7 @@
# on those that work
# Note that AdaCore only supports %ix86, x86_64 and ia64
%ifarch %ix86 x86_64 ia64 hppa ppc s390
-%define build_ada 0%{!?building_libjava:1}
+%define build_ada 1
%else
# alpha ppc64 s390x
%define build_ada 0
@@ -57,7 +57,7 @@
Name: gcc@base_ver@@gcc_suffix@
BuildRequires: glibc-devel-32bit flex bison gettext-devel texinfo
%if %{build_ada}
-BuildRequires: gcc-ada
+BuildRequires: gcc-ada gcc@base_ver@-ada
%endif
%if %{build_libjava}
BuildRequires: gtk2-devel libart_lgpl-devel update-desktop-files
@@ -185,7 +185,10 @@
Patch97: nov189571-2.diff
Patch98: nvl199137.patch
Patch100: pr28755.patch
+Patch101: pr30113.patch
Patch105: pr29166.diff
+Patch106: fix-ada-build.diff
+Patch107: pr20218.patch
# A set of patches from the RH srpm
Patch51: gcc41-ia64-stack-protector.patch
Patch55: gcc41-java-slow_pthread_self.patch
@@ -207,6 +210,24 @@
Patch69: gcc41-ssse3.patch
Patch70: pr27880.diff
Patch72: Wprofile-mismatch.patch
+Patch73: parentheses-mathematical-5.diff
+# Greyhound (aka AMD Family 10h) support
+Patch200: gcc-amdfam10-suse-1.patch
+Patch201: gcc-amdfam10-suse-2.patch
+Patch202: gcc-amdfam10-suse-3.patch
+Patch203: gcc-amdfam10-suse-4.patch
+Patch204: gcc-amdfam10-suse-5.patch
+Patch205: gcc-amdfam10-suse-6.patch
+Patch206: gcc-amdfam10-suse-7.patch
+Patch207: gcc-amdfam10-suse-8.patch
+Patch208: gcc-amdfam10-suse-9.patch
+Patch209: gcc-amdfam10-suse-10.patch
+Patch210: gcc-amdfam10-suse-11.patch
+Patch211: gcc-amdfam10-suse-12.patch
+Patch212: gcc-amdfam10-suse-20.patch
+# POWER6 support
+Patch300: gcc-4.1.0-power6.diff
+
%if "%{gcc_target_arch}" == "avr"
# Joerg Wunsch places his cross-avr-gcc-patches there:
@@ -753,7 +774,10 @@
%patch97
%patch98 -p1
%patch100
+%patch101
%patch105
+%patch106
+%patch107
%patch51
%patch55
%patch57
@@ -775,6 +799,22 @@
%patch69
%patch70
%patch72
+%patch73
+%patch200
+%patch201
+%patch202
+%patch203
+%patch204
+%patch205
+%patch206
+%patch207
+%patch208
+%patch209
+%patch210
+%patch211
+%patch212
+%patch300
+
%if "%{gcc_target_arch}" == "avr"
%patch1000
@@ -796,7 +836,7 @@
mkdir obj-%{GCCDIST}
cd obj-%{GCCDIST}
RPM_OPT_FLAGS="$RPM_OPT_FLAGS -U_FORTIFY_SOURCE"
-RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g'`
+RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/-fno-rtti//g' -e 's/-fno-exceptions//g' -e 's/-Wmissing-format-attribute//g' -e 's/-fstack-protector//g' -e 's/-ffortify=.//g'`
%ifarch %ix86
# -mcpu is superceded by -mtune but -mtune is not supported by
# our bootstrap compiler. -mcpu gives a warning that stops
@@ -875,6 +915,9 @@
CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" XCFLAGS="$RPM_OPT_FLAGS" \
TCFLAGS="$RPM_OPT_FLAGS" GCJFLAGS="$RPM_OPT_FLAGS" \
+%if %{build_ada}
+CC="gcc%{binsuffix}" GNATBIND="gnatbind%{binsuffix}" \
+%endif
../configure \
%if 0%{?gcc_target_arch:1} && 0%{!?gcc_icecream:1}
%else
@@ -987,6 +1030,14 @@
%endif
%endif
done
+rm -f $RPM_BUILD_ROOT%{_libdir}/libffi.la
+%if %{biarch}
+%if %{build_primary_64bit}
+ rm -f $RPM_BUILD_ROOT%{_prefix}/lib/libffi.la
+%else
+ rm -f $RPM_BUILD_ROOT%{_prefix}/lib64/libffi.la
+%endif
+%endif
%if %{biarch}
%if %{build_primary_64bit}
@@ -1099,7 +1150,6 @@
ln -sf /lib64/libgcc_s.so.%{libgcc_s} $RPM_BUILD_ROOT%{versmainlibdirbi64}/libgcc_s_64.so
chmod a+x $RPM_BUILD_ROOT/lib64/libgcc_s.so.%{libgcc_s}
%endif
-# LIBJAVA-DELETE-END
%endif
%if %{build_ada}
@@ -1107,6 +1157,7 @@
ln -sf %{_libdir}/$(cd $RPM_BUILD_ROOT%{_libdir} && echo libgnarl-*.so) $RPM_BUILD_ROOT%{libsubdir}/adalib/libgnarl.so
ln -sf %{_libdir}/$(cd $RPM_BUILD_ROOT%{_libdir} && echo libgnat-*.so) $RPM_BUILD_ROOT%{libsubdir}/adalib/libgnat.so
chmod a+x $RPM_BUILD_ROOT%{_libdir}/libgna*-*.so
+# LIBJAVA-DELETE-END
%endif
#ln -sf gcc%{binsuffix} $RPM_BUILD_ROOT%{_prefix}/bin/cc%{binsuffix}
@@ -1362,6 +1413,7 @@
%ifarch %ix86 x86_64
%{libsubdir}/include/mm3dnow.h
%{libsubdir}/include/mmintrin.h
+%{libsubdir}/include/ammintrin.h
%{libsubdir}/include/emmintrin.h
%{libsubdir}/include/pmmintrin.h
%{libsubdir}/include/xmmintrin.h
@@ -1677,6 +1729,8 @@
%files -n libgcj@base_ver@-devel@gcc_suffix@
%defattr(-,root,root)
%dir %{_prefix}/include/c++
+%dir %{libsubdir}
+%dir %{libsubdir}/include
%dir %{gxxinclude}
%{libsubdir}/include/jawt.h
%{libsubdir}/include/jawt_md.h
@@ -1732,14 +1786,12 @@
%{_prefix}/include/ffitarget.h
%mainlib libffi.so
%mainlib libffi.a
-%mainlib libffi.la
%if %{separate_biarch}
%files -n libffi@base_ver@@gcc_suffix@-devel%{separate_biarch_suffix}
%defattr(-,root,root)
%biarchlib libffi.so
%biarchlib libffi.a
-%biarchlib libffi.la
%endif
%endif
%endif
++++++ micro-cross.spec.in ++++++
--- gcc41/micro-cross.spec.in 2007-01-29 20:05:01.000000000 +0100
+++ /mounts/work_src_done/STABLE/gcc41/micro-cross.spec.in 2007-02-07 10:23:26.066980000 +0100
@@ -51,10 +51,10 @@
cat << EOF
-if [ -z "\`rpm -q --changelog binutils | grep 'Update to binutils 2.17.50.0.8.'\`" ]; then
+if [ -z "\`rpm -q --changelog binutils | grep 'Update to binutils-2.17.50.0.8.'\`" ]; then
set +x
echo "Our gcc-4.1.2-20061129 (or later) needs support for 'ldi r30,lo8(gs(1f))'"
- echo "Please update to binutils 2.17.50.0.8 or later!"
+ echo "Please update to binutils-2.17.50.0.8 or later!"
rpm -q binutils
exit 1;
fi
++++++ parentheses-mathematical-5.diff ++++++
--- gcc/c-typeck.c
+++ gcc/c-typeck.c
@@ -2553,13 +2553,23 @@
warning (OPT_Wparentheses,
"suggest parentheses around comparison in operand of &");
}
- /* Similarly, check for cases like 1<=i<=10 that are probably errors. */
- if (TREE_CODE_CLASS (code) == tcc_comparison
- && (TREE_CODE_CLASS (code1) == tcc_comparison
- || TREE_CODE_CLASS (code2) == tcc_comparison))
- warning (OPT_Wparentheses, "comparisons like X<=Y<=Z do not "
- "have their mathematical meaning");
-
+ if (code == EQ_EXPR || code == NE_EXPR)
+ {
+ if (TREE_CODE_CLASS (code1) == tcc_comparison
+ || TREE_CODE_CLASS (code2) == tcc_comparison)
+ warning (OPT_Wparentheses,
+ "suggest parentheses around comparison in operand of %s",
+ code == EQ_EXPR ? "==" : "!=");
+ }
+ else if (TREE_CODE_CLASS (code) == tcc_comparison)
+ {
+ if ((TREE_CODE_CLASS (code1) == tcc_comparison
+ && code1 != NE_EXPR && code1 != EQ_EXPR)
+ || (TREE_CODE_CLASS (code2) == tcc_comparison
+ && code2 != NE_EXPR && code2 != EQ_EXPR))
+ warning (OPT_Wparentheses, "comparisons like X<=Y<=Z do not "
+ "have their mathematical meaning");
+ }
}
unsigned_conversion_warning (result.value, arg1.value);
--- gcc/testsuite/gcc.dg/Wparentheses-2.c
+++ gcc/testsuite/gcc.dg/Wparentheses-2.c
@@ -10,58 +10,112 @@
int
bar (int a, int b, int c)
{
- foo (a <= b <= c); /* { dg-warning "comparison" "correct warning" } */
+ foo (a <= b <= c); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((a <= b) <= c);
foo (a <= (b <= c));
- foo (1 <= 2 <= c); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 <= 2 <= c); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((1 <= 2) <= c);
foo (1 <= (2 <= c));
- foo (1 <= 2 <= 3); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 <= 2 <= 3); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((1 <= 2) <= 3);
foo (1 <= (2 <= 3));
- foo (a > b > c); /* { dg-warning "comparison" "correct warning" } */
+ foo (a > b > c); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((a > b) > c);
foo (a > (b > c));
- foo (1 > 2 > c); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 > 2 > c); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((1 > 2) > c);
foo (1 > (2 > c));
- foo (1 > 2 > 3); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 > 2 > 3); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((1 > 2) > 3);
foo (1 > (2 > 3));
- foo (a < b <= c); /* { dg-warning "comparison" "correct warning" } */
+ foo (a < b <= c); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((a < b) <= c);
foo (a < (b <= c));
- foo (1 < 2 <= c); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 < 2 <= c); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((1 < 2) <= c);
foo (1 < (2 <= c));
- foo (1 < 2 <= 3); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 < 2 <= 3); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((1 < 2) <= 3);
foo (1 < (2 <= 3));
- foo (a <= b > c); /* { dg-warning "comparison" "correct warning" } */
+ foo (a <= b > c); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((a <= b) > c);
foo (a <= (b > c));
- foo (1 <= 2 > c); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 <= 2 > c); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((1 <= 2) > c);
foo (1 <= (2 > c));
- foo (1 <= 2 > 3); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 <= 2 > 3); /* { dg-warning "mathematical meaning" "correct warning" } */
foo ((1 <= 2) > 3);
foo (1 <= (2 > 3));
- foo (a <= b == c); /* { dg-warning "comparison" "correct warning" } */
+ foo (a <= b == c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
foo ((a <= b) == c);
foo (a <= (b == c));
- foo (1 <= 2 == c); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 <= 2 == c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
foo ((1 <= 2) == c);
foo (1 <= (2 == c));
- foo (1 <= 2 == 3); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 <= 2 == 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
foo ((1 <= 2) == 3);
foo (1 <= (2 == 3));
- foo (a != b != c); /* { dg-warning "comparison" "correct warning" } */
+ foo (a != b != c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
foo ((a != b) != c);
foo (a != (b != c));
- foo (1 != 2 != c); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 != 2 != c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
foo ((1 != 2) != c);
foo (1 != (2 != c));
- foo (1 != 2 != 3); /* { dg-warning "comparison" "correct warning" } */
+ foo (1 != 2 != 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
foo ((1 != 2) != 3);
foo (1 != (2 != 3));
+ foo (a < b == c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a < b) == c);
+ foo (a < (b == c));
+ foo (a > b == c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a > b) == c);
+ foo (a > (b == c));
+ foo (a == b < c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a == b) < c);
+ foo (a == (b < c));
+ foo (a == b > c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a == b) > c);
+ foo (a == (b > c));
+ foo (a == b == c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a == b) == c);
+ foo (a == (b == c));
+ foo (1 == 2 == 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 == 2) == 3);
+ foo (1 == (2 == 3));
+ foo (1 < 2 == 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 < 2) == 3);
+ foo (1 < (2 == 3));
+ foo (1 > 2 == 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 > 2) == 3);
+ foo (1 > (2 == 3));
+ foo (1 == 2 < 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 == 2) < 3);
+ foo (1 == (2 < 3));
+ foo (1 == 2 > 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 == 2) > 3);
+ foo (1 == (2 > 3));
+ foo (a < b != c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a < b) != c);
+ foo (a < (b != c));
+ foo (a > b != c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a > b) != c);
+ foo (a > (b != c));
+ foo (a != b < c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a != b) < c);
+ foo (a != (b < c));
+ foo (a != b > c); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((a != b) > c);
+ foo (a != (b > c));
+ foo (1 < 2 != 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 < 2) != 3);
+ foo (1 < (2 != 3));
+ foo (1 > 2 != 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 > 2) != 3);
+ foo (1 > (2 != 3));
+ foo (1 != 2 < 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 != 2) < 3);
+ foo (1 != (2 < 3));
+ foo (1 != 2 > 3); /* { dg-warning "suggest parentheses around comparison" "correct warning" } */
+ foo ((1 != 2) > 3);
+ foo (1 != (2 > 3));
}
++++++ pr20218.patch ++++++
2006-12-07 H.J. Lu
Backport from mainline:
2006-12-11 H.J. Lu
PR middle-end/17982
PR middle-end/20218
* cgraphunit.c (cgraph_optimize): Remove call to
process_pending_assemble_externals.
* config/elfos.h (ASM_OUTPUT_EXTERNAL): New.
* config/ia64/hpux.h (TARGET_ASM_FILE_END): Removed.
* config/ia64/ia64.c (ia64_asm_output_external): Rewritten.
(ia64_hpux_add_extern_decl): Removed.
(ia64_hpux_file_end): Likewise.
(extern_func_list): Likewise.
(extern_func_head): Likewise.
* output.h (assemble_external): Update comments.
(default_elf_asm_output_external): New.
(maybe_assemble_visibility): New.
* toplev.c (compile_file): Update comment.
* varasm.c (assemble_external): Always put it on
pending_assemble_externals.
(maybe_assemble_visibility): Make it extern and return int.
(default_elf_asm_output_external): New.
--- gcc/cgraphunit.c.global 2007-01-09 05:24:35.000000000 -0800
+++ gcc/cgraphunit.c 2007-01-09 05:30:01.000000000 -0800
@@ -1330,8 +1330,6 @@ cgraph_optimize (void)
return;
}
- process_pending_assemble_externals ();
-
/* Frontend may output common variables after the unit has been finalized.
It is safe to deal with them here as they are always zero initialized. */
cgraph_varpool_analyze_pending_decls ();
--- gcc/config/elfos.h.global 2006-11-18 06:33:14.000000000 -0800
+++ gcc/config/elfos.h 2007-01-09 05:29:06.000000000 -0800
@@ -494,3 +494,13 @@ Boston, MA 02110-1301, USA. */
fprintf ((FILE), "\"\n"); \
} \
while (0)
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM
+ any text necessary for declaring the name of an external symbol
+ named NAME whch is referenced in this compilation but not defined.
+ It is needed to properly support non-default visibility. */
+
+#ifndef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+ default_elf_asm_output_external (FILE, DECL, NAME)
+#endif
--- gcc/config/ia64/hpux.h.global 2006-11-18 06:32:57.000000000 -0800
+++ gcc/config/ia64/hpux.h 2007-01-09 05:29:06.000000000 -0800
@@ -144,10 +144,6 @@ do { \
definitions, so do not use them in gthr-posix.h. */
#define GTHREAD_USE_WEAK 0
-/* Put out the needed function declarations at the end. */
-
-#define TARGET_ASM_FILE_END ia64_hpux_file_end
-
#undef CTORS_SECTION_ASM_OP
#define CTORS_SECTION_ASM_OP "\t.section\t.init_array,\t\"aw\",\"init_array\""
--- gcc/config/ia64/ia64.c.global 2006-12-29 18:34:00.000000000 -0800
+++ gcc/config/ia64/ia64.c 2007-01-09 05:29:06.000000000 -0800
@@ -242,10 +242,6 @@ static void ia64_rwreloc_select_rtx_sect
unsigned HOST_WIDE_INT)
ATTRIBUTE_UNUSED;
static unsigned int ia64_section_type_flags (tree, const char *, int);
-static void ia64_hpux_add_extern_decl (tree decl)
- ATTRIBUTE_UNUSED;
-static void ia64_hpux_file_end (void)
- ATTRIBUTE_UNUSED;
static void ia64_init_libfuncs (void)
ATTRIBUTE_UNUSED;
static void ia64_hpux_init_libfuncs (void)
@@ -4983,49 +4979,6 @@ ia64_secondary_reload_class (enum reg_cl
}
-/* Emit text to declare externally defined variables and functions, because
- the Intel assembler does not support undefined externals. */
-
-void
-ia64_asm_output_external (FILE *file, tree decl, const char *name)
-{
- int save_referenced;
-
- /* GNU as does not need anything here, but the HP linker does need
- something for external functions. */
-
- if (TARGET_GNU_AS
- && (!TARGET_HPUX_LD
- || TREE_CODE (decl) != FUNCTION_DECL
- || strstr (name, "__builtin_") == name))
- return;
-
- /* ??? The Intel assembler creates a reference that needs to be satisfied by
- the linker when we do this, so we need to be careful not to do this for
- builtin functions which have no library equivalent. Unfortunately, we
- can't tell here whether or not a function will actually be called by
- expand_expr, so we pull in library functions even if we may not need
- them later. */
- if (! strcmp (name, "__builtin_next_arg")
- || ! strcmp (name, "alloca")
- || ! strcmp (name, "__builtin_constant_p")
- || ! strcmp (name, "__builtin_args_info"))
- return;
-
- if (TARGET_HPUX_LD)
- ia64_hpux_add_extern_decl (decl);
- else
- {
- /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
- restore it. */
- save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
- if (TREE_CODE (decl) == FUNCTION_DECL)
- ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
- (*targetm.asm_out.globalize_label) (file, name);
- TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
- }
-}
-
/* Parse the -mfixed-range= option string. */
static void
@@ -8514,55 +8467,33 @@ ia64_hpux_function_arg_padding (enum mac
return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
}
-/* Linked list of all external functions that are to be emitted by GCC.
- We output the name if and only if TREE_SYMBOL_REFERENCED is set in
- order to avoid putting out names that are never really used. */
-
-struct extern_func_list GTY(())
-{
- struct extern_func_list *next;
- tree decl;
-};
-
-static GTY(()) struct extern_func_list *extern_func_head;
-
-static void
-ia64_hpux_add_extern_decl (tree decl)
-{
- struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
-
- p->decl = decl;
- p->next = extern_func_head;
- extern_func_head = p;
-}
-
-/* Print out the list of used global functions. */
+/* Emit text to declare externally defined variables and functions, because
+ the Intel assembler does not support undefined externals. */
-static void
-ia64_hpux_file_end (void)
+void
+ia64_asm_output_external (FILE *file, tree decl, const char *name)
{
- struct extern_func_list *p;
-
- for (p = extern_func_head; p; p = p->next)
- {
- tree decl = p->decl;
- tree id = DECL_ASSEMBLER_NAME (decl);
-
- gcc_assert (id);
-
- if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
- {
- const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
-
- TREE_ASM_WRITTEN (decl) = 1;
- (*targetm.asm_out.globalize_label) (asm_out_file, name);
- fputs (TYPE_ASM_OP, asm_out_file);
- assemble_name (asm_out_file, name);
- fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
- }
+ /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+ set in order to avoid putting out names that are never really
+ used. */
+ if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+ {
+ /* maybe_assemble_visibility will return 1 if the assembler
+ visibility directive is outputed. */
+ int need_visibility = ((*targetm.binds_local_p) (decl)
+ && maybe_assemble_visibility (decl));
+
+ /* GNU as does not need anything here, but the HP linker does
+ need something for external functions. */
+ if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
+ && TREE_CODE (decl) == FUNCTION_DECL)
+ {
+ ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
+ (*targetm.asm_out.globalize_label) (file, name);
+ }
+ else if (need_visibility && !TARGET_GNU_AS)
+ (*targetm.asm_out.globalize_label) (file, name);
}
-
- extern_func_head = 0;
}
/* Set SImode div/mod functions, init_integral_libfuncs only initializes
--- gcc/output.h.global 2006-11-18 06:33:17.000000000 -0800
+++ gcc/output.h 2007-01-09 05:29:06.000000000 -0800
@@ -274,9 +274,9 @@ extern void assemble_end_function (tree,
initial value (that will be done by the caller). */
extern void assemble_variable (tree, int, int, int);
-/* Output something to declare an external symbol to the assembler.
- (Most assemblers don't need this, so we normally output nothing.)
- Do nothing if DECL is not external. */
+/* Queue for outputing something to declare an external symbol to the
+ assembler. (Most assemblers don't need this, so we normally output
+ nothing.) Do nothing if DECL is not external. */
extern void assemble_external (tree);
/* Assemble code to leave SIZE bytes of zeros. */
@@ -586,6 +586,10 @@ extern void default_file_start (void);
extern void file_end_indicate_exec_stack (void);
extern bool default_valid_pointer_mode (enum machine_mode);
+extern void default_elf_asm_output_external (FILE *file, tree,
+ const char *);
+extern int maybe_assemble_visibility (tree);
+
extern int default_address_cost (rtx);
/* dbxout helper functions */
--- gcc/toplev.c.global 2006-11-18 06:33:17.000000000 -0800
+++ gcc/toplev.c 2007-01-09 05:29:06.000000000 -0800
@@ -1033,9 +1033,7 @@ compile_file (void)
dw2_output_indirect_constants ();
- /* Flush any pending external directives. cgraph did this for
- assemble_external calls from the front end, but the RTL
- expander can also generate them. */
+ /* Flush any pending external directives. */
process_pending_assemble_externals ();
/* Attach a special .ident directive to the end of the file to identify
--- gcc/varasm.c.global 2006-11-18 06:33:17.000000000 -0800
+++ gcc/varasm.c 2007-01-09 05:30:40.000000000 -0800
@@ -126,7 +126,6 @@ static unsigned HOST_WIDE_INT array_size
static unsigned min_align (unsigned, unsigned);
static void output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int);
static void globalize_decl (tree);
-static void maybe_assemble_visibility (tree);
static int in_named_entry_eq (const void *, const void *);
static hashval_t in_named_entry_hash (const void *);
static void initialize_cold_section_name (void);
@@ -1935,11 +1934,10 @@ assemble_external (tree decl ATTRIBUTE_U
if (!DECL_P (decl) || !DECL_EXTERNAL (decl) || !TREE_PUBLIC (decl))
return;
- if (flag_unit_at_a_time)
- pending_assemble_externals = tree_cons (0, decl,
- pending_assemble_externals);
- else
- assemble_external_real (decl);
+ /* We want to output external symbols at very last to check if they
+ are references or not. */
+ pending_assemble_externals = tree_cons (0, decl,
+ pending_assemble_externals);
#endif
}
@@ -4970,13 +4968,18 @@ default_assemble_visibility (tree decl,
/* A helper function to call assemble_visibility when needed for a decl. */
-static void
+int
maybe_assemble_visibility (tree decl)
{
enum symbol_visibility vis = DECL_VISIBILITY (decl);
if (vis != VISIBILITY_DEFAULT)
- targetm.asm_out.visibility (decl, vis);
+ {
+ targetm.asm_out.visibility (decl, vis);
+ return 1;
+ }
+ else
+ return 0;
}
/* Returns 1 if the target configuration supports defining public symbols
@@ -5752,4 +5755,19 @@ file_end_indicate_exec_stack (void)
named_section_flags (".note.GNU-stack", flags);
}
+/* Emit text to declare externally defined symbols. It is needed to
+ properly support non-default visibility. */
+void
+default_elf_asm_output_external (FILE *file ATTRIBUTE_UNUSED,
+ tree decl,
+ const char *name ATTRIBUTE_UNUSED)
+{
+ /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+ set in order to avoid putting out names that are never really
+ used. */
+ if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))
+ && targetm.binds_local_p (decl))
+ maybe_assemble_visibility (decl);
+}
+
#include "gt-varasm.h"
++++++ pr30113.patch ++++++
Subject: Bug 30113
Author: rakdver
Date: Mon Dec 11 21:29:44 2006
New Revision: 119748
URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=119748
Log:
PR rtl-optimization/30113
* loop-iv.c (implies_p): Require the mode of the operands to be
scalar.
Index: gcc/loop-iv.c
===================================================================
*** gcc/loop-iv.c (revision 120791)
--- gcc/loop-iv.c (working copy)
*************** implies_p (rtx a, rtx b)
*** 1497,1503 ****
mode = VOIDmode;
}
! if (mode != VOIDmode
&& rtx_equal_p (op1, opb1)
&& simplify_gen_binary (MINUS, mode, opb0, op0) == const1_rtx)
return true;
--- 1497,1503 ----
mode = VOIDmode;
}
! if (SCALAR_INT_MODE_P (mode)
&& rtx_equal_p (op1, opb1)
&& simplify_gen_binary (MINUS, mode, opb0, op0) == const1_rtx)
return true;
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Remember to have fun...
---------------------------------------------------------------------
To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org
For additional commands, e-mail: opensuse-commit+help@opensuse.org