Hello community,
here is the log from the commit of package mcelog for openSUSE:Factory
checked in at Mon Sep 5 16:38:45 CEST 2011.
--------
--- mcelog/mcelog.changes 2011-06-07 11:53:11.000000000 +0200
+++ mcelog/mcelog.changes 2011-08-18 00:20:41.000000000 +0200
@@ -1,0 +2,28 @@
+Thu Aug 18 00:09:50 CEST 2011 - ro@suse.de
+
+- update to GIT of today (6e4e2a000124f08f1a4e3791c2b02ec9ae6af393)
+- many bugfixes
+- Implement re-parsing of mcelog output in ASCII
+- Add support for non-page aligned EFI Configuration Tables
+- Add --debug-numerrors
+- Add decoder for corrected XEN events to --ascii
+- Correctly log kernel supplied time
+- record the trigger info in the log
+- mcelog: Implement dmi decoding for UEFI
+- mcelog: Add usage information to mcelog for --ignorenodev
+- Fix length calculation of SMBIOS mapping
+- change disclaimer
+- explictly spell out corrected errors
+
+-------------------------------------------------------------------
+Sat Jul 2 21:50:53 UTC 2011 - trenn@suse.de
+
+- Update to latest git version (fate#311830)
+ Unfortunately versions have not been increased, latest tag
+ still is 1.0-pre3 (same as 1 year ago), therefore the date
+ is included in the version. I try to push maintainers to
+ increase the version number.
+- Invert logic of db prefill messages -> info if it works, silent
+ if not
+
+-------------------------------------------------------------------
@@ -11,0 +40,9 @@
+
+-------------------------------------------------------------------
+Tue Apr 6 15:15:45 CEST 2010 - trenn@suse.de
+
+- Update to latest git version having quite some fixes (no features):
+ - Fixed some memleaks and made app valgrind conform
+ - Fixed theoretical DoS attack (bnc#586241)
+ - Added support of additional cpus
+ - Fixed a lot messages (in manpage, in triggers, in README, ...)
calling whatdependson for head-i586
Old:
----
mcelog-1.0pre3.6363f5b719e9.tar.bz2
New:
----
mcelog-1.0pre3.6e4e2a000124.tar.bz2
mcelog_invert_prefill_db_warning.patch
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ mcelog.spec ++++++
--- /var/tmp/diff_new_pack.d44muK/_old 2011-09-05 16:38:26.000000000 +0200
+++ /var/tmp/diff_new_pack.d44muK/_new 2011-09-05 16:38:26.000000000 +0200
@@ -21,8 +21,8 @@
Name: mcelog
License: GPLv2+
Summary: Log Machine Check Events
-Version: 1.0pre3.6363f5b719e9
-Release: 6
+Version: 1.0pre3.6e4e2a000124
+Release: 1
AutoReqProv: on
ExclusiveArch: x86_64
BuildRequires: libesmtp-devel
@@ -33,6 +33,7 @@
Source2: mcelog.sysconfig
Source6: README.email_setup
Patch1: email.patch
+Patch2: mcelog_invert_prefill_db_warning.patch
Group: System/Monitoring
BuildRoot: %{_tmppath}/%{name}-%{version}-build
@@ -55,6 +56,7 @@
%prep
%setup
%patch1 -p1
+%patch2 -p1
%build
export SUSE_ASNEEDED=0
++++++ email.patch ++++++
--- /var/tmp/diff_new_pack.d44muK/_old 2011-09-05 16:38:26.000000000 +0200
+++ /var/tmp/diff_new_pack.d44muK/_new 2011-09-05 16:38:26.000000000 +0200
@@ -1,22 +1,11 @@
----
- Makefile | 10 +++
- email.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- email.h | 32 ++++++++++
- mcelog.c | 93 +++++++++++++++++++++++++++++++
- mcelog.h | 1
- msg.c | 8 ++
- 6 files changed, 325 insertions(+), 3 deletions(-)
-
-Index: mcelog-1.0pre3.6363f5b719e9/Makefile
-===================================================================
---- mcelog-1.0pre3.6363f5b719e9.orig/Makefile
-+++ mcelog-1.0pre3.6363f5b719e9/Makefile
+--- mcelog-1.0pre3.6e4e2a000124/Makefile
++++ mcelog-1.0pre3.6e4e2a000124/Makefile
@@ -1,3 +1,4 @@
+CONFIG_EMAIL := 1
CFLAGS := -g -Os
prefix := /usr
etcprefix :=
-@@ -34,7 +35,8 @@ OBJ := p4.o k8.o mcelog.o dmi.o tsc.o co
+@@ -34,7 +35,8 @@
client.o cache.o sysfs.o yellow.o page.o rbtree.o \
xeon75xx.o sandy-bridge.o
DISKDB_OBJ := diskdb.o dimm.o db.o
@@ -26,7 +15,7 @@
DOC := mce.pdf
ADD_DEFINES :=
-@@ -46,6 +48,12 @@ OBJ += ${DISKDB_OBJ}
+@@ -46,6 +48,12 @@
all: dbquery
endif
@@ -39,10 +28,8 @@
SRC := $(OBJ:.o=.c)
mcelog: ${OBJ}
-Index: mcelog-1.0pre3.6363f5b719e9/email.c
-===================================================================
---- /dev/null
-+++ mcelog-1.0pre3.6363f5b719e9/email.c
+--- mcelog-1.0pre3.6e4e2a000124/email.c
++++ mcelog-1.0pre3.6e4e2a000124/email.c
@@ -0,0 +1,184 @@
+#include
+#include
@@ -228,10 +215,8 @@
+ smtp_destroy_session (session);
+ return 0;
+}
-Index: mcelog-1.0pre3.6363f5b719e9/email.h
-===================================================================
---- /dev/null
-+++ mcelog-1.0pre3.6363f5b719e9/email.h
+--- mcelog-1.0pre3.6e4e2a000124/email.h
++++ mcelog-1.0pre3.6e4e2a000124/email.h
@@ -0,0 +1,32 @@
+#ifndef _MCELOG_EMAIL_H_
+#define _MCELOG_EMAIL_H_
@@ -265,19 +250,17 @@
+#endif
+
+#endif
-Index: mcelog-1.0pre3.6363f5b719e9/mcelog.c
-===================================================================
---- mcelog-1.0pre3.6363f5b719e9.orig/mcelog.c
-+++ mcelog-1.0pre3.6363f5b719e9/mcelog.c
+--- mcelog-1.0pre3.6e4e2a000124/mcelog.c
++++ mcelog-1.0pre3.6e4e2a000124/mcelog.c
@@ -37,6 +37,7 @@
#include
#include
#include
+#include
+ #include
#include "mcelog.h"
#include "paths.h"
- #include "k8.h"
-@@ -58,6 +59,9 @@
+@@ -59,6 +60,9 @@
#include "yellow.h"
#include "page.h"
@@ -287,7 +270,7 @@
enum cputype cputype = CPU_GENERIC;
char *logfn = LOG_DEV_FILENAME;
-@@ -69,7 +73,7 @@ static double cpumhz;
+@@ -70,7 +74,7 @@
static int cpumhz_forced;
int ascii_mode;
int dump_raw_ascii;
@@ -296,7 +279,7 @@
static char *inputfile;
char *processor_flags;
static int foreground;
-@@ -792,6 +796,7 @@ void usage(void)
+@@ -914,6 +918,7 @@
"--num-errors N Only process N errors (for testing)\n"
"--pidfile file Write pid of daemon into file\n"
);
@@ -304,15 +287,15 @@
diskdb_usage();
print_cputypes();
exit(1);
-@@ -855,6 +860,7 @@ static struct option options[] = {
- { "num-errors", 1, NULL, O_NUMERRORS },
+@@ -979,6 +984,7 @@
{ "pidfile", 1, NULL, O_PIDFILE },
+ { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */
DISKDB_OPTIONS
+ EMAIL_OPTIONS
{}
};
-@@ -1026,11 +1032,86 @@ static void drop_cred(void)
+@@ -1153,11 +1159,86 @@
}
}
@@ -399,15 +382,7 @@
if (recordlen == 0) {
Wprintf("no data in mce record\n");
-@@ -1041,19 +1122,23 @@ static void process(int fd, unsigned rec
- if (len < 0)
- err("read");
-
-- for (i = 0; (i < len / (int)recordlen) && !finish; i++) {
-+ for (i = 0; (i < len / (int)recordlen) && !finish; i++) {
- struct mce *mce = (struct mce *)(buf + i*recordlen);
- mce_prepare(mce);
- if (numerrors > 0 && --numerrors == 0)
+@@ -1177,12 +1258,16 @@
finish = 1;
if (!mce_filter(mce, recordlen))
continue;
@@ -424,7 +399,7 @@
flushlog();
}
-@@ -1161,6 +1246,8 @@ int main(int ac, char **av)
+@@ -1293,6 +1378,8 @@
exit(0);
} else if (diskdb_cmd(opt, ac, av)) {
exit(0);
@@ -433,7 +408,7 @@
} else if (opt == 0)
break;
}
-@@ -1169,6 +1256,8 @@ int main(int ac, char **av)
+@@ -1301,6 +1388,8 @@
logfn = av[optind++];
if (av[optind])
usage();
@@ -442,10 +417,18 @@
checkdmi();
general_setup();
-Index: mcelog-1.0pre3.6363f5b719e9/msg.c
-===================================================================
---- mcelog-1.0pre3.6363f5b719e9.orig/msg.c
-+++ mcelog-1.0pre3.6363f5b719e9/msg.c
+--- mcelog-1.0pre3.6e4e2a000124/mcelog.h
++++ mcelog-1.0pre3.6e4e2a000124/mcelog.h
+@@ -120,6 +120,7 @@
+ enum option_ranges {
+ O_COMMON = 500,
+ O_DISKDB = 1000,
++ O_EMAIL = 1500,
+ };
+
+ enum syslog_opt {
+--- mcelog-1.0pre3.6e4e2a000124/msg.c
++++ mcelog-1.0pre3.6e4e2a000124/msg.c
@@ -8,10 +8,13 @@
#include "mcelog.h"
#include "msg.h"
@@ -460,7 +443,7 @@
static char *output_fn;
int need_stdout(void)
-@@ -135,6 +138,11 @@ int Wprintf(char *fmt, ...)
+@@ -135,6 +138,11 @@
n = vfprintf(output_fh ? output_fh : stdout, fmt, ap);
va_end(ap);
}
@@ -472,15 +455,3 @@
return n;
}
-Index: mcelog-1.0pre3.6363f5b719e9/mcelog.h
-===================================================================
---- mcelog-1.0pre3.6363f5b719e9.orig/mcelog.h
-+++ mcelog-1.0pre3.6363f5b719e9/mcelog.h
-@@ -122,6 +122,7 @@ enum cputype {
- enum option_ranges {
- O_COMMON = 500,
- O_DISKDB = 1000,
-+ O_EMAIL = 1500,
- };
-
- enum syslog_opt {
++++++ mcelog-1.0pre3.6363f5b719e9.tar.bz2 -> mcelog-1.0pre3.6e4e2a000124.tar.bz2 ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/CHANGES new/mcelog-1.0pre3.6e4e2a000124/CHANGES
--- old/mcelog-1.0pre3.6363f5b719e9/CHANGES 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/CHANGES 2011-08-18 00:01:26.000000000 +0200
@@ -1,5 +1,7 @@
<newer changes first>
+Add Linux Kongress 2010 paper
+Add Sandy Bridge Support
Write pid file by default in daemon mode
Reopen log files on SIGUSR1 in daemon mode
Default --daemon mode to logging to /var/log/mcelog
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/README new/mcelog-1.0pre3.6e4e2a000124/README
--- old/mcelog-1.0pre3.6363f5b719e9/README 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/README 2011-08-18 00:01:26.000000000 +0200
@@ -30,6 +30,14 @@
The recommended mode is daemon, because several new functions (like page error
predictive failure analysis) require a continuously running daemon.
+Documentation:
+
+The primary reference documentation are the man pages.
+lk10-mcelog.pdf has a overview over the errors mcelog handles
+(originally from Linux Kongress 2010)
+mce.pdf is a very old paper describing the first releases of mcelog
+(some parts are obsolete)
+
For distributors:
Please install a init script by default that runs mcelog in daemon mode.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/dmi.c new/mcelog-1.0pre3.6e4e2a000124/dmi.c
--- old/mcelog-1.0pre3.6363f5b719e9/dmi.c 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/dmi.c 2011-08-18 00:01:26.000000000 +0200
@@ -137,6 +137,52 @@
}
}
+static int get_efi_base_addr(size_t *address)
+{
+ FILE *efi_systab;
+ const char *filename;
+ char linebuf[64];
+ int ret = 0;
+
+ *address = 0; /* Prevent compiler warning */
+
+ /* Linux 2.6.7 and up: /sys/firmware/efi/systab */
+ filename = "/sys/firmware/efi/systab";
+ if ((efi_systab = fopen(filename, "r")) != NULL)
+ goto check_symbol;
+
+ /* Linux up to 2.6.6: /proc/efi/systab */
+ filename = "/proc/efi/systab";
+ if ((efi_systab = fopen(filename, "r")) != NULL)
+ goto check_symbol;
+
+ /* Failed to open EFI interfaces */
+ return ret;
+
+check_symbol:
+ while ((fgets(linebuf, sizeof(linebuf) - 1, efi_systab)) != NULL) {
+ char *addrp = strchr(linebuf, '=');
+ *(addrp++) = '\0';
+
+ if (strcmp(linebuf, "SMBIOS") == 0) {
+ *address = strtoul(addrp, NULL, 0);
+ ret = 1;
+ break;
+ }
+ }
+
+ if (fclose(efi_systab) != 0)
+ perror(filename);
+
+ if (!ret)
+ Eprintf("%s: SMBIOS entry point missing", filename);
+
+ if (verbose)
+ printf("%s: SMBIOS entry point at 0x%08lx\n", filename,
+ (unsigned long)*address);
+ return ret;
+}
+
int opendmi(void)
{
struct anchor *a, *abase;
@@ -146,6 +192,8 @@
unsigned corr;
int err = -1;
const int segsize = 0x10000;
+ size_t entry_point_addr = 0;
+ size_t length = 0;
if (entries)
return 0;
@@ -156,9 +204,40 @@
return -1;
}
- abase = mmap(NULL, segsize-1, PROT_READ, MAP_SHARED, memfd, 0xf0000);
+ /*
+ * On EFI-based systems, the SMBIOS Entry Point structure can be
+ * located by looking in the EFI Configuration Table.
+ */
+ if (get_efi_base_addr(&entry_point_addr)) {
+ size_t addr_start = round_down(entry_point_addr, pagesize);
+ size_t addr_end = round_up(entry_point_addr + 0x20, pagesize);
+ length = addr_end - addr_start;
+
+ /* mmap() the address of SMBIOS structure table entry point. */
+ abase = mmap(NULL, length, PROT_READ, MAP_SHARED, memfd,
+ addr_start);
+ if (abase == (struct anchor *)-1) {
+ Eprintf("Cannot mmap 0x%lx for efi mode: %s",
+ (unsigned long)entry_point_addr,
+ strerror(errno));
+ goto legacy;
+ }
+ a = (struct anchor*)((char*)abase + (entry_point_addr - addr_start));
+ goto fill_entries;
+ }
+
+legacy:
+ /*
+ * On non-EFI systems, the SMBIOS Entry Point structure can be located
+ * by searching for the anchor-string on paragraph (16-byte) boundaries
+ * within the physical memory address range 000F0000h to 000FFFFFh
+ */
+ length = segsize - 1;
+ abase = mmap(NULL, length, PROT_READ, MAP_SHARED, memfd, 0xf0000);
+
if (abase == (struct anchor *)-1) {
- Eprintf("Cannot mmap 0xf0000: %s", strerror(errno));
+ Eprintf("Cannot mmap 0xf0000 for legacy mode: %s",
+ strerror(errno));
goto out;
}
@@ -175,11 +254,13 @@
a = p;
+fill_entries:
if (verbose)
printf("DMI tables at %x, %u bytes, %u entries\n",
a->table, a->length, a->numentries);
corr = a->table - round_down(a->table, pagesize);
- entrieslen = round_up(a->length + pagesize, pagesize);
+ entrieslen = round_up(a->table + a->length, pagesize) -
+ round_down(a->table, pagesize);
entries = mmap(NULL, entrieslen,
PROT_READ, MAP_SHARED, memfd,
round_down(a->table, pagesize));
@@ -195,7 +276,7 @@
err = 0;
out_mmap:
- munmap(abase, 0xffff);
+ munmap(abase, length);
out:
close(memfd);
return err;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/input/xen new/mcelog-1.0pre3.6e4e2a000124/input/xen
--- old/mcelog-1.0pre3.6363f5b719e9/input/xen 1970-01-01 01:00:00.000000000 +0100
+++ new/mcelog-1.0pre3.6e4e2a000124/input/xen 2011-08-18 00:01:26.000000000 +0200
@@ -0,0 +1,3 @@
+(XEN) MCE: The hardware reports a non fatal, correctable incident occurred on CPU 1.
+(XEN) Bank 2: d400008000040150 at 182c480179cf0
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/mcelog.c new/mcelog-1.0pre3.6e4e2a000124/mcelog.c
--- old/mcelog-1.0pre3.6363f5b719e9/mcelog.c 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/mcelog.c 2011-08-18 00:01:26.000000000 +0200
@@ -37,6 +37,7 @@
#include
#include
#include
+#include
#include "mcelog.h"
#include "paths.h"
#include "k8.h"
@@ -80,14 +81,14 @@
static char logfile_default[] = LOG_FILE;
static char *pidfile = pidfile_default;
static char *logfile;
+static int debug_numerrors;
static void check_cpu(void);
static void disclaimer(void)
{
- Wprintf("HARDWARE ERROR. This is *NOT* a software problem!\n");
- Wprintf("Please contact your hardware vendor\n");
+ Wprintf("Hardware event. This is not a software error.\n");
}
static char *extended_bankname(unsigned bank)
@@ -192,6 +193,24 @@
*model += c.c.ext_model << 4;
}
+static u32 unparse_cpuid(unsigned family, unsigned model)
+{
+ union {
+ struct cpuid1 c;
+ u32 v;
+ } c;
+
+ c.c.family = family;
+ if (family >= 0xf) {
+ c.c.family = 0xf;
+ c.c.ext_family = family - 0xf;
+ }
+ c.c.model = model & 0xf;
+ if (family == 6 || family == 0xf)
+ c.c.ext_model = model >> 4;
+ return c.v;
+}
+
static char *cputype_name[] = {
[CPU_GENERIC] = "generic CPU",
[CPU_P6OLD] = "Intel PPro/P2/P3/old Xeon",
@@ -261,19 +280,35 @@
exit(1);
}
+static char *vendor[] = {
+ [0] = "Intel",
+ [1] = "Cyrix",
+ [2] = "AMD",
+ [3] = "UMC",
+ [4] = "vendor 4",
+ [5] = "Centaur",
+ [6] = "vendor 6",
+ [7] = "Transmeta",
+ [8] = "NSC"
+};
+
+static unsigned cpuvendor_to_num(char *name)
+{
+ unsigned i;
+ unsigned v;
+ char *end;
+
+ v = strtoul(name, &end, 0);
+ if (end > name)
+ return v;
+ for (i = 0; i < NELE(vendor); i++)
+ if (!strcmp(name, vendor[i]))
+ return i;
+ return 0;
+}
+
static char *cpuvendor_name(u32 cpuvendor)
{
- static char *vendor[] = {
- [0] = "Intel",
- [1] = "Cyrix",
- [2] = "AMD",
- [3] = "UMC",
- [4] = "vendor 4",
- [5] = "Centaur",
- [6] = "vendor 6",
- [7] = "Transmeta",
- [8] = "NSC"
- };
return (cpuvendor < NELE(vendor)) ? vendor[cpuvendor] : "Unknown vendor";
}
@@ -318,7 +353,7 @@
static void mce_prepare(struct mce *m)
{
mce_cpuid(m);
- if (m->time)
+ if (!m->time)
m->time = time(NULL);
}
@@ -409,10 +444,6 @@
CPRINT("SOCKETID %u", socketid);
CPRINT("APICID %u", apicid);
CPRINT("MCGCAP %#llx", mcgcap);
- if (m->aux0)
- CPRINT("AUX0 %#llx", aux0);
- if (m->aux1)
- CPRINT("AUX1 %#llx", aux1);
#undef CPRINT
Wprintf("\n");
}
@@ -491,8 +522,21 @@
return s;
}
+static char *skip_syslog(char *s)
+{
+ char *p;
+
+ /* Handle syslog output */
+ p = strstr(s, "mcelog: ");
+ if (p)
+ return p + sizeof("mcelog: ") - 1;
+ return s;
+}
+
static char *skipgunk(char *s)
{
+ s = skip_syslog(s);
+
s = skipspace(s);
if (*s == '<') {
s += strcspn(s, ">");
@@ -508,10 +552,42 @@
return skipspace(s);
}
+static inline int urange(unsigned val, unsigned lo, unsigned hi)
+{
+ return val >= lo && val <= hi;
+}
+
+static int is_short(char *name)
+{
+ return strlen(name) == 3 &&
+ isupper(name[0]) &&
+ islower(name[1]) &&
+ islower(name[2]);
+}
+
+static unsigned skip_date(char *s)
+{
+ unsigned day, hour, min, year, sec;
+ char dayname[11];
+ char month[11];
+ unsigned next;
+
+ if (sscanf(s, "%10s %10s %u %u:%u:%u %u%n",
+ dayname, month, &day, &hour, &min, &sec, &year, &next) != 7)
+ return 0;
+ if (!is_short(dayname) || !is_short(month) || !urange(day, 1, 31) ||
+ !urange(hour, 0, 24) || !urange(min, 0, 59) || !urange(sec, 0, 59) ||
+ year < 1900)
+ return 0;
+ return next;
+}
+
static void dump_mce_final(struct mce *m, char *symbol, int missing, int recordlen,
int dseen)
{
m->finished = 1;
+ if (m->cpuid)
+ mce_cpuid(m);
if (!dump_raw_ascii) {
if (!dseen)
disclaimer();
@@ -525,6 +601,23 @@
flushlog();
}
+static char *skip_patterns[] = {
+ "MCA:*",
+ "MCi_MISC register valid*",
+ "MC? status*",
+ "Unsupported new Family*",
+ "Kernel does not support page offline interface",
+ NULL
+};
+
+static int match_patterns(char *s, char **pat)
+{
+ for (; *pat; pat++)
+ if (!fnmatch(*pat, s, 0))
+ return 0;
+ return 1;
+}
+
#define FIELD(f) \
if (recordlen < endof_field(struct mce, f)) \
recordlen = endof_field(struct mce, f)
@@ -560,12 +653,14 @@
symbol[0] = '\0';
while (next > 0 || getdelim(&line, &linelen, '\n', inf) > 0) {
int n = 0;
+ char *start;
s = next > 0 ? s + next : line;
s = skipgunk(s);
+ start = s;
next = 0;
- if (!strncmp(s, "CPU", 3)) {
+ if (!strncmp(s, "CPU ", 4)) {
unsigned cpu = 0, bank = 0;
n = sscanf(s,
"CPU %u: Machine Check Exception: %16Lx Bank %d: %016Lx%n",
@@ -663,6 +758,8 @@
missing++;
else
FIELD(time);
+
+ next += skip_date(s + next);
}
else if (!strncmp(s, "MCGCAP", 6)) {
if ((n = sscanf(s, "MCGCAP %llx%n", &m.mcgcap, &next)) != 1)
@@ -682,26 +779,50 @@
else
FIELD(socketid);
}
- else if (!strncmp(s, "AUX0", 4)) {
- if ((n = sscanf(s, "AUX0 %llx%n", &m.aux0, &next)) != 1)
- missing++;
- else
- FIELD(aux0);
- }
- else if (!strncmp(s, "AUX1", 4)) {
- if ((n = sscanf(s, "AUX1 %llx%n", &m.aux1, &next)) != 1)
+ else if (!strncmp(s, "CPUID", 5)) {
+ unsigned fam, mod;
+ char vendor[31];
+
+ if ((n = sscanf(s, "CPUID Vendor %30s Family %u Model %u\n",
+ vendor, &fam, &mod)) < 3)
missing++;
- else
- FIELD(aux1);
- }
+ else {
+ m.cpuvendor = cpuvendor_to_num(vendor);
+ m.cpuid = unparse_cpuid(fam, mod);
+ FIELD(cpuid);
+ FIELD(cpuvendor);
+ }
+ }
else if (strstr(s, "HARDWARE ERROR"))
disclaimer_seen = 1;
+ else if (!strncmp(s, "(XEN)", 5)) {
+ char *w;
+ unsigned bank, cpu;
+
+ if (strstr(s, "The hardware reports a non fatal, correctable incident occurred")) {
+ w = strstr(s, "CPU");
+ if (w && sscanf(w, "CPU %d", &cpu)) {
+ m.cpu = cpu;
+ FIELD(cpu);
+ }
+ } else if ((n = sscanf(s, "(XEN) Bank %d: %llx at %llx",
+ &bank, &m.status, &m.addr) >= 1)) {
+ m.bank = bank;
+ FIELD(bank);
+ if (n >= 2)
+ FIELD(status);
+ if (n >= 3)
+ FIELD(addr);
+ }
+ }
+ else if (!match_patterns(s, skip_patterns))
+ n = 0;
else {
s = skipspace(s);
if (*s && data)
dump_mce_final(&m, symbol, missing, recordlen, disclaimer_seen);
if (!dump_raw_ascii)
- Wprintf("%s", line);
+ Wprintf("%s", start);
if (*s && data)
goto restart;
}
@@ -777,6 +898,7 @@
"--cpumhz MHZ Set CPU Mhz to decode time (output unreliable, not needed on new kernels)\n"
"--raw (with --ascii) Dump in raw ASCII format for machine processing\n"
"--daemon Run in background waiting for events (needs newer kernel)\n"
+"--ignorenodev Exit silently when the device cannot be opened\n"
"--file filename With --ascii read machine check log from filename instead of stdin\n"
"--syslog Log decoded machine checks in syslog (default stdout or syslog for daemon)\n"
"--syslog-error Log decoded machine checks in syslog with error level\n"
@@ -823,6 +945,7 @@
O_FOREGROUND,
O_NUMERRORS,
O_PIDFILE,
+ O_DEBUG_NUMERRORS,
};
static struct option options[] = {
@@ -854,6 +977,7 @@
{ "client", 0, NULL, O_CLIENT },
{ "num-errors", 1, NULL, O_NUMERRORS },
{ "pidfile", 1, NULL, O_PIDFILE },
+ { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */
DISKDB_OPTIONS
{}
};
@@ -955,6 +1079,9 @@
case O_CONFIG_FILE:
/* parsed in config.c */
break;
+ case O_DEBUG_NUMERRORS:
+ debug_numerrors = 1;
+ break;
case 0:
break;
default:
@@ -1038,8 +1165,10 @@
}
len = read(fd, buf, recordlen * loglen);
- if (len < 0)
- err("read");
+ if (len < 0) {
+ SYSERRprintf("mcelog read");
+ return;
+ }
for (i = 0; (i < len / (int)recordlen) && !finish; i++) {
struct mce *mce = (struct mce *)(buf + i*recordlen);
@@ -1057,6 +1186,9 @@
flushlog();
}
+ if (debug_numerrors && numerrors <= 0)
+ finish = 1;
+
if (recordlen > sizeof(struct mce)) {
Eprintf("warning: %lu bytes ignored in each record\n",
(unsigned long)recordlen - sizeof(struct mce));
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/mcelog.conf new/mcelog-1.0pre3.6e4e2a000124/mcelog.conf
--- old/mcelog-1.0pre3.6363f5b719e9/mcelog.conf 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/mcelog.conf 2011-08-18 00:01:26.000000000 +0200
@@ -117,7 +117,7 @@
[socket]
# Memory error accounting per socket
-socket-tracing-enabled = yes
+socket-tracking-enabled = yes
# Threshold and trigger for uncorrected memory errors on a socket
# mem-uc-error-trigger = socket-memory-error-trigger
mem-uc-error-threshold = 100 / 24h
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/mcelog.h new/mcelog-1.0pre3.6e4e2a000124/mcelog.h
--- old/mcelog-1.0pre3.6363f5b719e9/mcelog.h 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/mcelog.h 2011-08-18 00:01:26.000000000 +0200
@@ -31,8 +31,6 @@
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
- __u64 aux0;
- __u64 aux1;
};
#define X86_VENDOR_INTEL 0
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/p4.c new/mcelog-1.0pre3.6e4e2a000124/p4.c
--- old/mcelog-1.0pre3.6363f5b719e9/p4.c 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/p4.c 2011-08-18 00:01:26.000000000 +0200
@@ -257,6 +257,8 @@
if (status & MCI_STATUS_UC)
Wprintf("Uncorrected error\n");
+ else
+ Wprintf("Corrected error\n");
if (status & MCI_STATUS_EN)
Wprintf("Error enabled\n");
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/sysfs.c new/mcelog-1.0pre3.6e4e2a000124/sysfs.c
--- old/mcelog-1.0pre3.6363f5b719e9/sysfs.c 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/sysfs.c 2011-08-18 00:01:26.000000000 +0200
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
#include "mcelog.h"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/Makefile new/mcelog-1.0pre3.6e4e2a000124/tests/Makefile
--- old/mcelog-1.0pre3.6363f5b719e9/tests/Makefile 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/tests/Makefile 2011-08-18 00:01:26.000000000 +0200
@@ -7,6 +7,7 @@
./test page "${DEBUG}"
./test memdb "${DEBUG}"
./test socket "${DEBUG}"
+ ./test pfa "${DEBUG}"
clean:
rm -f */*log
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/PFA_test_howto new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/PFA_test_howto
--- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/PFA_test_howto 1970-01-01 01:00:00.000000000 +0100
+++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/PFA_test_howto 2011-08-18 00:01:26.000000000 +0200
@@ -0,0 +1,213 @@
+This README file describes the steps of testing PFA (Predictive Failure
+Analysis) functionality of mcelog under Linux which is facilitated by using
+mce-inject.
+
+PFA is a RAS Feature. PFA capable system can monitor corrected hardware errors
+and take corrective action in advance before uncorrected error happen. For
+example, PFA should offline a memory page if more than 10 errors per hour on a
+memory page are found. It mostly focuses on memory errors.
+
+0. Preparation work
+*******************************
+- Install the Linux kernel with full MCE injection support
+
+ Make sure following configuration options are enabled:
+
+ CONFIG_X86_MCE=y
+ CONFIG_X86_MCE_INTEL=y
+ CONFIG_X86_MCE_INJECT=y or CONFIG_X86_MCE_INJECT=m
+
+- Build mcelog and install in /usr/bin (or rather first in your $PATH)
+
+ # cd $HOME/mcelog
+ # make
+ # make install
+
+- Get mce-inject git version from
+ git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
+ and install in /usr/bin (or rather first in your $PATH)
+
+ # cd $HOME
+ # git clone git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
+ # cd mce-inject
+ # make
+ # make install
+
+- Install page-types tool, which is accompanied with Linux kernel source
+ (2.6.32 or newer).
+
+ # cd $KERNEL_SRC/Documentation/vm/
+ # gcc -o page-types page-types.c
+ # cp page-types /usr/bin/
+
+
+
+1. Start PFA test
+*******************************
+
+The PFA test cases in mcelog are in the following directories:
+
+- mcelog/tests/pfa #page level pfa test cases
+
+You can run all PFA test cases simply just by typing:
+
+ # cd mcelog/tests
+ # ./test pfa
+
+all the test cases in the specified subdirectory will be ran and the test
+results will be saved in files:
+
+ mcelog/tests/pfa/results
+
+When you examine the content of the file, you will find such results:
+
+- if one case passed:
+
+ "*.conf: triggers trigger as expected"
+
+- if one case failed
+
+ "*.conf: triggers did not trigger as expected: $expected_num !=
+ $actual_got_num"
+
+you can refer to the "*.log" file in the specific subdirectory for the log saved
+by mcelog.
+
+
+2. Modify or add new test cases
+*******************************
+
+If you want to modify the existing test cases or add your own case, the
+following description will have a more detailed look which might help:
+
+- To add or run a page level PFA test, you need first get a configure file in
+
+ mcelog/tests/pfa/
+
+ directory defining mainly the threshold and trigger actions you want, then the
+ number of trigger events you expect to happen.
+
+
+- A typical configure file is as following:
+
+ mcelog/tests/pfa/page-account.conf
+ ----------------------------------------------------------
+ # trigger: 5
+ # num-errors = 3
+
+ [page]
+ memory-ce-threshold = 2 / 1h
+ memory-ce-trigger = ../trigger
+ #memory-ce-action = off|account|soft|hard|soft-then-hard
+ memory-ce-action = account
+
+ [trigger]
+ directory = .
+ ------------------------------------------------------------
+
+ - “# trigger: 5”
+
+ Specify the count number of triggers you expect to get based
+ on the threshold defined in "memory-ce-threshold" described below.
+
+ mcelog/tests/test harness in the end will compare this count number with
+ the number of actual trigger events got from the log to verify the test
+ results.
+
+ please note the "#" is needed for mcelog/tests/test harness to read here.
+
+
+ - "# num-errors = 3"
+
+ "num-errors" is a mcelog configure option. if uncomment, it is used by
+ mcelog to stop processing the stored machine check records in mcelog
+ buffer read from /dev/mcelog and return(for debug purpose) when the
+ number is reached even there might be:
+ - still some unprocessed records left in the buffer which
+ will be ignored
+ - or there are not enough records in /dev/mcelog the program
+ will not return.
+
+ if not set as in this example, mcelog will return until finish processing
+ all the records.
+
+ When you are not sure what should be the correct num-errors number, it is
+ not recommended to set this option.
+
+
+ - "memory-ce-threshold = 2 / 1h"
+
+ Define the threshold for memory corrected errors per page. Here means
+ if there are 2 corrected errors detected in one page within 1 hour,
+ the trigger defined in “memory-ce-trigger” described below will be called.
+
+
+ - "memory-ce-trigger = ../trigger"
+
+ Specify the trigger you want when exceeding the threshold.
+ Here mcelog/tests/trigger will be called which simply print some text for
+ testing.
+
+
+ - "memory-ce-action = account"
+
+ specify the internal action in mcelog to exceeding a memory corrected error
+ threshold.
+
+ This is done in addition to executing the trigger script if available.
+ - off: No action
+ - account: only account errors
+ - soft: try to soft-offline page without killing any processes
+ This requires an update kernel. Might not be successful
+ - hard: try to hard-offline page without killing any processes
+ This requires an update kernel. Might not be successful
+ - soft-then-hard: First try to soft offline, then try hard offlining
+
+ The offline action is based on the sysfs_wirte action of:
+ /sys/devices/system/memory/soft_offline_page
+ or
+ /sys/devices/system/memory/hard_offline_page
+
+ Please note that offlining does not work for all pages, but only for pages
+ in the Linux page cache or free pages. And if offline action(soft, hard, or
+ soft-then-hard)are chosen in "memory-ce-action", there will trigger only
+ once for each page,no matter the offline action taken was successful or
+ failed.
+
+
+3. Influencing factors of the trigger results
+*******************************
+
+The correct expectation of triggers depends on 4 factors:
+
+- The count number of trigger expectation defined in "pfa/*.conf" file
+
+ As described above, in our example the trigger expectation are defined to be 5
+ times which means the "mcelog/tests/pfa/inject" script will randomly chosen 5
+ free pages to inject in turn and do the MCE injection on each page for
+ $memory-ce-threshold times.
+
+- The threshold defined in “memory-ce-trigger” of "pfa/*.conf" file
+
+ As described above, for “memory-ce-threshold = 2 / 1h” in our example,
+ "mcelog/tests/pfa/inject" script will do the MCE injection
+ 2 times continuously for each chosen page to make the trigger happen.
+
+- The “memory-ce-action” defined in "pfa/*.conf" file
+
+ As described above. if the “memory-ce-action” is soft/hard/soft-then-hard,
+ no matter offlining action succeed or not, triggers_per_page calculation will
+ changed to be:
+
+ triggers_per_page = INT(injections_per_page / memory-ce-threshold) >= 1? 1:0
+
+- The actual number of records read out if "num-errors" defined in "pfa/*.conf"
+ file
+
+ As described above. mcelog will just read out $num-errors records, that means:
+
+ readout_total_injections = MIN(num-errors, injection_per_page *
+ actual-inject_pages)
+
+ this might affect the trigger counts for some last injected pages since not
+ all the machine check records from /dev/mcelog are processed and counted.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/inject new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/inject
--- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/inject 1970-01-01 01:00:00.000000000 +0100
+++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/inject 2011-08-18 00:01:26.000000000 +0200
@@ -0,0 +1,79 @@
+#!/bin/sh
+PATH=$PATH:$(pwd)/../../../mce-inject
+
+page_type="slab buddy mmap anonymous nopage huge"
+
+function get_free_page()
+{
+ local rand=0
+ cnt=`page-types -Nl -b $1 | tee page_$1 | wc -l`
+ if [ $cnt -gt 1 ]; then
+ rand=$(expr $RANDOM % $cnt + 1)
+ if [ ${rand} -eq 1 ]; then
+ # skip the title line of output
+ ((rand++))
+ fi
+ page=`awk -v line=${rand} 'NR == line {print $1}' page_$1`
+ echo 0x${page}
+ else
+ echo 0
+ fi
+ rm -f page_$1
+}
+
+if [ "$1" = "" ]; then
+ echo "usage $0 conf_file"
+ exit 1
+fi
+
+if [ ! -f $1 ]; then
+ echo "configure file not exists: $1"
+ exit 1
+fi
+
+which page-types > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "please install page-types tool first"
+ exit 1
+fi
+
+echo "+++ start the injection for $1 +++"
+
+NUMT="$(awk '/# trigger: / { print $3}' $1)"
+THRESHOLD="$(awk '/memory-ce-threshold = / { print $3}' $1)"
+
+if [ "$NUMT" -eq 0 ]; then
+ echo "No injection will be done!"
+ exit 0
+fi
+
+if [ "$THRESHOLD" -eq 0 ]; then
+ echo "Threshold should not be 0!"
+ exit 1
+fi
+
+trigger_cnt=0
+while [ "$trigger_cnt" -lt "$NUMT" ]; do
+ for i in ${page_type}; do
+ P=$(get_free_page $i)
+ if [ "$P" = "0" ]; then
+ continue
+ fi
+ if [ "$trigger_cnt" -ge "$NUMT" ]; then
+ exit 0;
+ fi
+ inject_cnt=0
+ while [ "$inject_cnt" -lt "$THRESHOLD" ]; do
+ echo "inject for page type $i at physical address ${P}000 [ NO. $inject_cnt ]"
+ ../../input/GENPAGE $P | mce-inject
+ inject_cnt=$(($inject_cnt+1))
+ done
+ if [ "$inject_cnt" -eq "$THRESHOLD" ]; then
+ trigger_cnt=$(($trigger_cnt+1))
+ fi
+ done
+ if [ "$trigger_cnt" -eq 0 ]; then
+ echo "None available free pages found!"
+ exit 1
+ fi
+done
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-account.conf new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-account.conf
--- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-account.conf 1970-01-01 01:00:00.000000000 +0100
+++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-account.conf 2011-08-18 00:01:26.000000000 +0200
@@ -0,0 +1,12 @@
+# trigger: 1
+#num-errors = 3
+
+[page]
+memory-ce-threshold = 2 / 1h
+memory-ce-trigger = ../trigger
+#memory-ce-action = off|account|soft|hard|soft-then-hard
+memory-ce-action = account
+
+[trigger]
+directory = .
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-hard.conf new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-hard.conf
--- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-hard.conf 1970-01-01 01:00:00.000000000 +0100
+++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-hard.conf 2011-08-18 00:01:26.000000000 +0200
@@ -0,0 +1,12 @@
+# trigger: 1
+#num-errors = 3
+
+[page]
+memory-ce-threshold = 2 / 1h
+memory-ce-trigger = ../trigger
+#memory-ce-action = off|account|soft|hard|soft-then-hard
+memory-ce-action = hard
+
+[trigger]
+directory = .
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-soft-then-hard.conf new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-soft-then-hard.conf
--- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-soft-then-hard.conf 1970-01-01 01:00:00.000000000 +0100
+++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-soft-then-hard.conf 2011-08-18 00:01:26.000000000 +0200
@@ -0,0 +1,12 @@
+# trigger: 1
+#num-errors = 3
+
+[page]
+memory-ce-threshold = 1 / 1h
+memory-ce-trigger = ../trigger
+#memory-ce-action = off|account|soft|hard|soft-then-hard
+memory-ce-action = soft-then-hard
+
+[trigger]
+directory = .
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-soft.conf new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-soft.conf
--- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-soft.conf 1970-01-01 01:00:00.000000000 +0100
+++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-soft.conf 2011-08-18 00:01:26.000000000 +0200
@@ -0,0 +1,12 @@
+# trigger: 1
+#num-errors = 3
+
+[page]
+memory-ce-threshold = 2 / 1h
+memory-ce-trigger = ../trigger
+#memory-ce-action = off|account|soft|hard|soft-then-hard
+memory-ce-action = soft
+
+[trigger]
+directory = .
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/test new/mcelog-1.0pre3.6e4e2a000124/tests/test
--- old/mcelog-1.0pre3.6363f5b719e9/tests/test 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/tests/test 2011-08-18 00:01:26.000000000 +0200
@@ -42,8 +42,8 @@
for conf in `ls *.conf`
do
log=`echo $conf | sed "s/conf/log/g"`
- ./inject
- $D ../../mcelog --foreground --daemon --config $conf --logfile $log
+ ./inject $conf
+ $D ../../mcelog --foreground --daemon --debug-numerrors --config $conf --logfile $log >> result
# let triggers finish
sleep 1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/triggers/cache-error-trigger new/mcelog-1.0pre3.6e4e2a000124/triggers/cache-error-trigger
--- old/mcelog-1.0pre3.6363f5b719e9/triggers/cache-error-trigger 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/triggers/cache-error-trigger 2011-08-18 00:01:26.000000000 +0200
@@ -19,7 +19,7 @@
#
EXIT=0
-for i in $CPUS_AFFECTED ; do
+for i in $AFFECTED_CPUS ; do
if [ $i = 0 ] ; then
logger -s -p daemon.warn -t mcelog "Not offlining CPU 0"
EXIT=1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/xeon75xx.c new/mcelog-1.0pre3.6e4e2a000124/xeon75xx.c
--- old/mcelog-1.0pre3.6363f5b719e9/xeon75xx.c 2010-10-17 20:33:36.000000000 +0200
+++ new/mcelog-1.0pre3.6e4e2a000124/xeon75xx.c 2011-08-18 00:01:26.000000000 +0200
@@ -25,142 +25,15 @@
#include "mcelog.h"
#include "xeon75xx.h"
-/* DIMM description */
-struct aux_pfa_dimm {
- u8 fbd_channel_id;
- u8 ddr_channel_id;
- u8 ddr_dimm_id;
- u8 ddr_rank_id;
- u8 ddr_dimm_bank_id;
- u8 ddr_dimm_row_id;
- u8 ddr_dimm_column_id;
- u8 valid;
-} __attribute__((packed));
-
-enum {
- MCE_BANK_MBOX0 = 8,
- MCE_BANK_MBOX1 = 9,
-
- DIMM_VALID_FBD_CHAN = (1 << 0),
- DIMM_VALID_DDR_CHAN = (1 << 1),
- DIMM_VALID_DDR_DIMM = (1 << 2),
- DIMM_VALID_DDR_RANK = (1 << 3),
- DIMM_VALID_DIMM_BANK = (1 << 4),
- DIMM_VALID_DIMM_ROW = (1 << 5),
- DIMM_VALID_DIMM_COLUMN = (1 << 6),
- DIMM_VALID_ALL = 0x7f,
-};
-
-static struct id {
- char *name;
- unsigned offset;
- unsigned valid;
- enum {
- NL = 1<<0,
- IND = 1<<1,
- } flags;
-} ids[] = {
-#define V(n,f,b) n, offsetof(struct aux_pfa_dimm, f), b
- { V("FBD-Channel", fbd_channel_id, DIMM_VALID_FBD_CHAN) },
- { V("DDR-Channel", ddr_channel_id, DIMM_VALID_DDR_CHAN) },
- { V("DDR-DIMM", ddr_dimm_id, DIMM_VALID_DDR_DIMM) },
- { V("DDR-Rank", ddr_rank_id, DIMM_VALID_DDR_RANK) },
- { V("DIMM-Bank", ddr_dimm_bank_id, DIMM_VALID_DIMM_BANK), NL|IND },
- { V("DIMM-Row", ddr_dimm_row_id, DIMM_VALID_DIMM_ROW) },
- { V("DIMM-Column", ddr_dimm_column_id, DIMM_VALID_DIMM_COLUMN), NL },
-#undef V
- {}
-};
-
-#if 0
-/* Use for memdb channel output */
-
-static int opt_number(char *buf, int val)
-{
- if (val == (u8)-1) {
- *buf++ = '?';
- return 1;
- }
- return sprintf(buf, "%u", val);
-}
-
-static void print_channel(int channel, char *buf)
-{
- int n;
- n = opt_number(buf, ((channel) >> 8) & 0xff);
- buf[n++] = ':';
- opt_number(buf + n, channel & 0xff);
-}
-#endif
-
-static void decode_dimm(struct aux_pfa_dimm *d, int *channel, int *dimm)
-{
- if (d->valid == 0)
- return;
- if (d->valid & DIMM_VALID_DDR_DIMM)
- *dimm = d->ddr_dimm_id;
- if (d->valid & (DIMM_VALID_DDR_CHAN|DIMM_VALID_FBD_CHAN)) {
- int fbd_chan = (d->valid & DIMM_VALID_FBD_CHAN) ?
- d->fbd_channel_id : (u8)-1;
- int ddr_chan = (d->valid & DIMM_VALID_DDR_CHAN) ?
- d->ddr_channel_id : (u8)-1;
- *channel = (fbd_chan << 8) | ddr_chan;
- }
-}
-
-static void print_dimm(int num, struct aux_pfa_dimm *d)
-{
- struct id *id;
- int indent;
- int k;
-
- if (d->valid == 0)
- return;
-
- k = indent = Wprintf("DIMM %d: ", num);
- for (id = ids; id->name; id++) {
- if (d->valid & id->valid)
- k += Wprintf("%s %u ", id->name, *((u8*)d + id->offset));
- if (k > 0) {
- if (id->flags & NL) {
- Wprintf("\n");
- k = 0;
- }
- if (id->flags & IND)
- Wprintf("%.*s", indent, "");
- }
- }
-}
-
-static int is_mem_err(struct mce *m, unsigned msize)
-{
- if (msize < offsetof(struct mce, aux1) + sizeof(u64))
- return 0;
- if (m->bank != MCE_BANK_MBOX0 && m->bank != MCE_BANK_MBOX1)
- return 0;
- return 1;
-}
-
-union d {
- struct aux_pfa_dimm d;
- u64 val;
-};
+/* This used to decode the old xeon 75xx memory error aux format. But that has never
+ been merged into mainline kernels, so removed it again. */
void
xeon75xx_memory_error(struct mce *m, unsigned msize, int *channel, int *dimm)
{
- if (!is_mem_err(m, msize))
- return;
-
- decode_dimm(&((union d *)&m->aux0)->d, &channel[0], &dimm[0]);
- decode_dimm(&((union d *)&m->aux1)->d, &channel[1], &dimm[1]);
}
void xeon75xx_decode_dimm(struct mce *m, unsigned msize)
{
- if (!is_mem_err(m, msize))
- return;
- print_dimm(0, &((union d *)&m->aux0)->d);
- print_dimm(1, &((union d *)&m->aux1)->d);
}
++++++ mcelog_invert_prefill_db_warning.patch ++++++
---
memdb.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
Index: mcelog-1.0.2011.06.08/memdb.c
===================================================================
--- mcelog-1.0.2011.06.08.orig/memdb.c
+++ mcelog-1.0.2011.06.08/memdb.c
@@ -417,11 +417,11 @@ void prefill_memdb(void)
md->location = xstrdup(bl);
md->name = xstrdup(dmi_getstring(&d->header, d->device_locator));
}
- if (missed) {
- static int warned;
- if (!warned) {
- Eprintf("failed to prefill DIMM database from DMI data");
- warned = 1;
+ if (!missed) {
+ static int db_rill_msg;
+ if (!db_rill_msg) {
+ Gprintf("Prefilled DIMM database from DMI data");
+ db_rill_msg = 1;
}
}
}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Remember to have fun...
--
To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org
For additional commands, e-mail: opensuse-commit+help@opensuse.org