https://bugzilla.novell.com/show_bug.cgi?id=206550
Summary: Gfortran library uses memory after free for selected IO
operations
Product: SUSE LINUX 10.0
Version: Final
Platform: 64bit
OS/Version: SLES 10
Status: NEW
Severity: Major
Priority: P5 - None
Component: Development
AssignedTo: pth@novell.com
ReportedBy: TRimmer@silverstorm.com
QAContact: qa@suse.de
As part of testing our MPI product over SLES10, I discovered this bug. I have
root caused it to a problem in the libgfortran IO subsystem. The sample
program included below works fine when built using SLES9 and g77. However on
gfortran in SLES10, the bug is exhibited.
I am using: gcc-fortran-4.1.0-28.4
I have tarballs of a simplified failing program which demonstrates the problem
in a much simpler environment. I am enclosing the information from those
tarballs below. Let me know if there is a better place to provide the
tarballs.
Here is the failing Fortran program (ep.f):
program test
implicit none
character*14 size
double precision m
m=21.0
write(size, '(f13.0)' ) 2.d0**(m+1)
end
To discover the bug, this must be linked with the following C code which hooks
malloc and alters memory when freed (dreg.c):
/*
* Copyright (C) 1999-2001 The Regents of the University of California
* (through E.O. Lawrence Berkeley National Laboratory), subject to
* approval by the U.S. Department of Energy.
*
* Use of this software is under license. The license agreement is included
* in the file MVICH_LICENSE.TXT.
*
* Developed at Berkeley Lab as part of MVICH.
*
* Authors: Bill Saphir
* Michael Welcome
*/
/* Copyright (c) 2002-2005, The Ohio State University. All rights
* reserved.
*
* This file is part of the MVAPICH software package developed by the
* team members of The Ohio State University's Network-Based Computing
* Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
*
* For detailed copyright and licencing information, please refer to the
* copyright file COPYRIGHT_MVAPICH in the top level MPICH directory.
*
*/
#include
#include
#include
#include
#include
#include
#include
static int in_malloc = 0; /* detects recursive malloc/free */
#define D_PRINT(message, args...) do { fprintf(stdout,"[%s:%d] " message "\n",
__FILE__,__LINE__, ##args); fflush(stdout);} while(0)
/* Original malloc functions. */
void (*old_free_hook)(void *ptr, const void *caller) = NULL;
void *(*old_malloc_hook)(size_t size, const void *caller) = NULL;
void new_malloc_init(void);
static void new_free_hook(void *ptr, const void *caller);
static void *new_malloc_hook(size_t size, const void *caller);
void (*__malloc_initialize_hook) (void) = new_malloc_init;
#define MALLOC_OVERRIDE_ON \
{ \
__free_hook = new_free_hook; \
__malloc_hook = new_malloc_hook; \
}
#define MALLOC_OVERRIDE_OFF \
{ \
__free_hook = old_free_hook; \
__malloc_hook = old_malloc_hook; \
}
/* Suspend the hooks and do a real malloc. */
static inline void *
real_malloc(size_t s)
{
void *p;
MALLOC_OVERRIDE_OFF;
p=malloc(s);
MALLOC_OVERRIDE_ON;
return p;
}
/* Suspend the hooks and do a real free. */
static inline void
real_free(void *p)
{
MALLOC_OVERRIDE_OFF;
free(p);
MALLOC_OVERRIDE_ON;
}
#define MALLOC_SIGNATURE 0xa110c123U
/* header for all malloc and free list blocks */
typedef struct _malloc_header {
unsigned long total_size; /* size of block including this header */
unsigned long signature; /* helps validate and better align */
} malloc_header_t;
/* Overrides the malloc_hook functions for the default memory allocator. */
void
new_malloc_init(void)
{
old_free_hook = __free_hook;
old_malloc_hook = __malloc_hook;
MALLOC_OVERRIDE_ON;
D_PRINT( "Standard malloc has been overridden.");
return;
fail:
D_PRINT("Unable to initialize malloc hook mutex\n");
}
/* get memory from libc maintained heap */
static inline malloc_header_t *
get_mem_from_libc_free_list(unsigned long total_size)
{
malloc_header_t *m;
/* Suspend the hooks and do a real malloc. */
m=(malloc_header_t*)real_malloc(total_size);
D_PRINT("Used malloc for %lu: %p", total_size, m);
return m;
}
/* free memory to libc maintained heap */
static void
put_mem_on_libc_free_list(malloc_header_t *m)
{
D_PRINT("Freeing %lu: %p", m->total_size, m);
// fill it with ff's so we can detect use of data after free
memset(m+1, 0xff, m->total_size - sizeof(malloc_header_t));
/* Suspend the hooks and do a real free. */
real_free(m);
}
/*
* callback in replacement of the default malloc() function.
*/
static void*
new_malloc_hook(size_t size, const void *caller)
{
unsigned long total_size = size + sizeof(malloc_header_t);
malloc_header_t *m = NULL;
if (in_malloc) {
/* Recursive malloc/free call */
/* We can't call our allocators lest we get stuck in
* infinite recursion nor can we call printf
*/
m = (malloc_header_t*)real_malloc(total_size);
if (! m)
goto recfail;
m->total_size = total_size;
m->signature = MALLOC_SIGNATURE;
} else {
in_malloc = 1;
m = get_mem_from_libc_free_list(total_size);
if (!m)
goto fail;
m->total_size = total_size;
m->signature = MALLOC_SIGNATURE;
in_malloc = 0;
}
return ((unsigned char*)m+sizeof(malloc_header_t));
fail:
in_malloc = 0;
recfail:
errno = ENOMEM;
return NULL;
}
/*
* callback in replacement of the default free() function.
*/
static void
new_free_hook(void *ptr, const void *caller)
{
malloc_header_t *m = (malloc_header_t*)((unsigned char*)ptr -
sizeof(malloc_header_t));
if (ptr == NULL) {
/*viadevdebug(VIADEV_DEBUG_MALLOC, "Attempted to free null
pointer.");*/
} else if (m->signature != MALLOC_SIGNATURE) {
/* bad pointer or our allocator was not used, or appl walked
* off front of buffer, try a vanila free of ptr
*/
// viadevdebug(VIADEV_DEBUG_MALLOC, "Block found without signature.");
real_free(ptr);
} else if (in_malloc) {
/* recursive malloc/free call */
/* we can't call our allocators lest we get stuck in infinite recursion
* nor can we call printf
*/
real_free(m);
} else {
in_malloc = 1;
put_mem_on_libc_free_list(m);
in_malloc = 0;
}
}
Here is the Makefile used:
SHELL=/bin/sh
OBJS = ep.o dreg.o
all: ep
ep: ${OBJS}
gfortran -o ep ${OBJS}
ep.o: ep.f
gfortran -c ep.f
dreg.o: dreg.c
gcc -c dreg.c
clean:
- rm -f *.o ep *~
Further information:
This program is demonstrating a bug in the gfortran compiler/libraries included
in SLES10. The IO library appears to be using memory after it has been freed.
This test program was created based on some code in the NASA EP benchmark
and a malloc/free hook from our MPI implementation.
I have simplified this program to its absolute minimum to reproduce
the problem.
Problem was reproduced on SLES10 for x86_64. The rpms file shows the list
of rpms installed on the system.
On gfortran with SLES10, I observe failures when the "memset" in dreg.c
is enabled. The write(size, ....) statement in ep.f is causing data to
be used after it has been freed. If that data area is altered by free,
the application core dumps attempting to call an invalid pointer in
_gfortran_st_write_done.
Here is the backtrace:
Core was generated by `./ep'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /usr/lib64/libgfortran.so.1...done.
Loaded symbols for /usr/lib64/libgfortran.so.1
Reading symbols from /lib64/libm.so.6...done.
Loaded symbols for /lib64/libm.so.6
Reading symbols from /lib64/libgcc_s.so.1...done.
Loaded symbols for /lib64/libgcc_s.so.1
Reading symbols from /lib64/libc.so.6...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/ld-linux-x86-64.so.2...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2
#0 0xffffffffffffffff in ?? ()
(gdb) bt
#0 0xffffffffffffffff in ?? ()
#1 0x00002b5e98231b36 in _gfortran_st_write_done ()
from /usr/lib64/libgfortran.so.1
#2 0x0000000000400c1c in MAIN__ ()
#3 0x0000000000400f4e in main ()
(gdb)
Here are the registers:
rax 0x5084d0 5276880
rbx 0x7fff129fd7b0 140733505853360
rcx 0xffffffffffffffc0 -64
rdx 0x9 9
rsi 0xffffffffffffffff -1
rdi 0x5084d0 5276880
rbp 0x7fff129fd960 0x7fff129fd960
rsp 0x7fff129fd788 0x7fff129fd788
r8 0x2098 8344
r9 0x2b5e981c4c00 47685278911488
r10 0x0 0
r11 0x2b5e985d7480 47685283181696
r12 0x0 0
r13 0x7fff129fda50 140733505854032
r14 0x0 0
r15 0x0 0
rip 0xffffffffffffffff 0xffffffffffffffff
eflags 0x10202 66050
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
Here is the output:
cocobolo:/usr/local/src/mpi_apps/testep # ./ep
[dreg.c:98] Standard malloc has been overridden.
[dreg.c:113] Used malloc for 216: 0x502010
[dreg.c:113] Used malloc for 8360: 0x5020f0
[dreg.c:113] Used malloc for 216: 0x5041a0
[dreg.c:113] Used malloc for 8360: 0x504280
[dreg.c:113] Used malloc for 216: 0x506330
[dreg.c:113] Used malloc for 8360: 0x506410
[dreg.c:113] Used malloc for 3672: 0x5084c0
NAS Parallel Benchmarks 3.2 -- EP Benchmark
[dreg.c:121] Freeing 3672: 0x5084c0
[dreg.c:113] Used malloc for 8360: 0x5084c0
[dreg.c:113] Used malloc for 3672: 0x50a570
[dreg.c:121] Freeing 8360: 0x5084c0
Segmentation fault (core dumped)
Note the last free of address 0x5084c0 and rax:
rax 0x5084d0 5276880 # this is start of buffer, dreg add 0x10 bytes
at start of each malloc/free area
Disassembly of faulting area:
0002b5e98231b2c <_gfortran_st_write_done+124>: mov 0x8(%rax),%rax
0x00002b5e98231b30 <_gfortran_st_write_done+128>: mov %rax,%rdi
0x00002b5e98231b33 <_gfortran_st_write_done+131>: callq *0x28(%rax)
0x00002b5e98231b36 <_gfortran_st_write_done+134>: cmp $0x2,%eax
Note indirect function call via 0x28(%rax), which is in the middle of the freed
block of memory.
When this application is build with g77 on SLES9: gcc version 3.3.3 (SuSE
Linux)
and libraries:
libg2c.so.0 => /usr/lib64/libg2c.so.0 (0x0000002a9566d000)
libm.so.6 => /lib64/tls/libm.so.6 (0x0000002a9578e000)
libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x0000002a958e7000)
libc.so.6 => /lib64/tls/libc.so.6 (0x0000002a959f2000)
/lib64/ld-linux-x86-64.so.2 (0x0000002a95556000)
It works fine.
--
Configure bugmail: https://bugzilla.novell.com/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug, or are watching someone who is.