[scyld-users] Problem running Charmm on Scyld cluster

Andre Kerstens akerstens at utep.edu
Tue Mar 14 16:29:40 PST 2006


Hello all,

We have recently bought a Penguin cluster with Scyld release 29cz 
(29cz-5u0001 200506091805) on it and are trying to get a statically 
compiled version of Charmm to run on the nodes. The problem is that 
Charmm runs fine on the master node, but segfaults as soon as it is 
migrated to a compute node. From the strace below you can see that the 
segfault happens after the library /lib64/ld-linux-x86-64.so.2 cannot be 
found (it exists on the master node and is exported to the nodes in 
/etc/beowulf/config though).

[akerstens at cluster 3ptb_1000110]$ bpsh 1 ./strace ./charmm64
execve("./charmm64", ["./charmm64"], [/* 22 vars */]) = 0
uname({sys="Linux", node=".1", ...})    = 0
brk(0)                                  = 0x17a6dcc0
brk(0x17a8ecc0)                         = 0x17a8ecc0
brk(0x17a8f000)                         = 0x17a8f000
times({tms_utime=0, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 180579094
times({tms_utime=0, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 180579094
times({tms_utime=0, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 180579094
access("charmm.inp", F_OK)              = 0
open("charmm.inp", O_RDWR)              = 3
fstat(3, {st_mode=S_IFREG|0775, st_size=23616, ...}) = 0
access("charmm.out", F_OK)              = -1 ENOENT (No such file or 
directory)
open("charmm.out", O_RDWR|O_CREAT|O_TRUNC, 0666) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
open("/etc/localtime", O_RDONLY)        = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=877, ...}) = 0
fstat(5, {st_mode=S_IFREG|0644, st_size=877, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x2a95556000
read(5, "TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0"..., 
4096) = 877
close(5)                                = 0
munmap(0x2a95556000, 4096)              = 0
readlink("/proc/self/fd/0", "socket:[5816]", 511) = 13
ioctl(0, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fbfffee90) = -1 EINVAL 
(Invalid argument)
getuid()                                = 500
socket(PF_UNIX, SOCK_STREAM, 0)         = 5
fcntl(5, F_GETFL)                       = 0x2 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
connect(5, {sa_family=AF_UNIX, path="/var/run/nscd/socket"}, 110) = -1 
ENOENT (No such file or directory)
close(5)                                = 0
socket(PF_UNIX, SOCK_STREAM, 0)         = 5
fcntl(5, F_GETFL)                       = 0x2 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
connect(5, {sa_family=AF_UNIX, path="/var/run/nscd/socket"}, 110) = -1 
ENOENT (No such file or directory)
close(5)                                = 0
open("/etc/nsswitch.conf", O_RDONLY)    = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=175, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x2a95556000
read(5, "# Generated by node_up for Scyld"..., 4096) = 175
read(5, "", 4096)                       = 0
close(5)                                = 0
munmap(0x2a95556000, 4096)              = 0
open("/etc/ld.so.cache", O_RDONLY)      = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=144816, ...}) = 0
mmap(NULL, 144816, PROT_READ, MAP_PRIVATE, 5, 0) = 0x2a95556000
close(5)                                = 0
open("/lib64/libnss_beo.so.2", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20(\0\0"..., 
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=40535, ...}) = 0
mmap(NULL, 1079320, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5, 
0) = 0x2a9557a000
madvise(0x2a9557a000, 1079320, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a95581000, 1050648, PROT_NONE) = 0
mmap(0x2a95681000, 4096, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x7000) = 0x2a95681000
close(5)                                = 0
open("/lib64/libc.so.6", O_RDONLY)      = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\327"..., 
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=1567579, ...}) = 0
mmap(NULL, 2377064, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5, 
0) = 0x2a95682000
madvise(0x2a95682000, 2377064, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a957bd000, 1086824, PROT_NONE) = 0
mmap(0x2a958bd000, 20480, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x13b000) = 0x2a958bd000
mmap(0x2a958c2000, 17768, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2a958c2000
close(5)                                = 0
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file 
or directory)
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file 
or directory)
stat("/lib64", {st_mode=S_IFDIR|0755, st_size=440, ...}) = 0
open("/usr/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such 
file or directory)
stat("/usr/lib64", {st_mode=S_IFDIR|0755, st_size=460, ...}) = 0
munmap(0x2a95556000, 144816)            = 0
munmap(0x2a9557a000, 1079320)           = 0
munmap(0x2a95682000, 2377064)           = 0
open("/etc/ld.so.cache", O_RDONLY)      = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=144816, ...}) = 0
mmap(NULL, 144816, PROT_READ, MAP_PRIVATE, 5, 0) = 0x2a95556000
close(5)                                = 0
open("/lib64/libnss_bproc.so.2", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0%\0\0\0"..., 
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=30705, ...}) = 0
mmap(NULL, 1070784, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5, 
0) = 0x2a9557a000
madvise(0x2a9557a000, 1070784, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a95580000, 1046208, PROT_NONE) = 0
mmap(0x2a9567f000, 4096, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x5000) = 0x2a9567f000
close(5)                                = 0
open("/lib64/libc.so.6", O_RDONLY)      = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\327"..., 
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=1567579, ...}) = 0
mmap(NULL, 2377064, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5, 
0) = 0x2a95680000
madvise(0x2a95680000, 2377064, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a957bb000, 1086824, PROT_NONE) = 0
mmap(0x2a958bb000, 20480, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x13b000) = 0x2a958bb000
mmap(0x2a958c0000, 17768, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2a958c0000
close(5)                                = 0
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file 
or directory)
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file 
or directory)
open("/usr/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such 
file or directory)
munmap(0x2a95556000, 144816)            = 0
munmap(0x2a9557a000, 1070784)           = 0
munmap(0x2a95680000, 2377064)           = 0
uname({sys="Linux", node=".1", ...})    = 0
getpid()                                = 3646
open("/etc/resolv.conf", O_RDONLY)      = -1 ENOENT (No such file or 
directory)
uname({sys="Linux", node=".1", ...})    = 0
stat("/etc/resolv.conf", 0x7fbfffedc0)  = -1 ENOENT (No such file or 
directory)
socket(PF_UNIX, SOCK_STREAM, 0)         = 5
fcntl(5, F_GETFL)                       = 0x2 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
connect(5, {sa_family=AF_UNIX, path="/var/run/nscd/socket"}, 110) = -1 
ENOENT (No such file or directory)
close(5)                                = 0
socket(PF_UNIX, SOCK_STREAM, 0)         = 5
fcntl(5, F_GETFL)                       = 0x2 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
connect(5, {sa_family=AF_UNIX, path="/var/run/nscd/socket"}, 110) = -1 
ENOENT (No such file or directory)
close(5)                                = 0
open("/etc/ld.so.cache", O_RDONLY)      = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=144816, ...}) = 0
mmap(NULL, 144816, PROT_READ, MAP_PRIVATE, 5, 0) = 0x2a95556000
close(5)                                = 0
open("/lib64/libnss_files.so.2", O_RDONLY) = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200%\0\0"..., 
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=57649, ...}) = 0
mmap(NULL, 1096200, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5, 
0) = 0x2a9557a000
madvise(0x2a9557a000, 1096200, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a95586000, 1047048, PROT_NONE) = 0
mmap(0x2a95685000, 4096, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0xb000) = 0x2a95685000
close(5)                                = 0
open("/lib64/libc.so.6", O_RDONLY)      = 5
read(5, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\327"..., 
640) = 640
fstat(5, {st_mode=S_IFREG|0755, st_size=1567579, ...}) = 0
mmap(NULL, 2377064, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 5, 
0) = 0x2a95686000
madvise(0x2a95686000, 2377064, MADV_SEQUENTIAL|0x1) = 0
mprotect(0x2a957c1000, 1086824, PROT_NONE) = 0
mmap(0x2a958c1000, 20480, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 5, 0x13b000) = 0x2a958c1000
mmap(0x2a958c6000, 17768, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2a958c6000
close(5)                                = 0
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file 
or directory)
open("/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such file 
or directory)
open("/usr/lib64/ld-linux-x86-64.so.2", O_RDONLY) = -1 ENOENT (No such 
file or directory)
munmap(0x2a95556000, 144816)            = 0
munmap(0x2a9557a000, 1096200)           = 0
munmap(0x2a95686000, 2377064)           = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++

Master node:
[akerstens at sacagawea 3ptb_1000110]$ ll /lib64/ld-linux-x86-64.so.2
lrwxrwxrwx    1 root     root           11 Sep 29 21:18 
/lib64/ld-linux-x86-64.so.2 -> ld-2.3.2.so
[akerstens at sacagawea 3ptb_1000110]$ ll /lib64/ld-2.3.2.so
-rwxr-xr-x    1 root     root       100772 May 13  2005 /lib64/ld-2.3.2.so

Since the Charmm binary is static, it seems that bpsh is looking for 
this library and cannot find it on the compute nodes somehow.

Did anybody have this problem before and knows what is going on? Any 
help is appreciated as I didn't get any pointers from Penguin support 
for 2 weeks now.

Thanks

Andre Kerstens



-- 
--------------------------------------------------------------
Andre Kerstens

The University of Texas at El Paso
College of Engineering

The best way to predict the future is to invent it.
     --- Alan Kay
---------------------------------------------------------------



More information about the Scyld-users mailing list