Hello Dave,Here are some "notes" i have done in using crash this summer.
Thanks.
================================================================================
PROBLEM TO START CRASH
================================================================================
PB to start crash with this version :
[root@fedora4 crash-4.0-2.33]# cat /proc/version
Linux version 2.6.16.1 (root@fedora4.domain.com) (version gcc 4.0.0 20050519 (Red Hat 4.0.0-8)) #2 SMP PREEMPT Tue Apr 11 12:38:29 CEST 2006
crash 4.0-2.33
Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc.
Copyright (C) 2004, 2005, 2006 IBM Corporation
Copyright (C) 1999-2006 Hewlett-Packard Co
Copyright (C) 2005 Fujitsu Limited
Copyright (C) 2005 NEC Corporation
Copyright (C) 1999, 2002 Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions. Enter "help copying" to see the conditions.
This program has absolutely no warranty. Enter "help warranty" for details.
GNU gdb 6.1
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB. Type "show warranty" for details.
This GDB was configured as "i686-pc-linux-gnu"...
crash: invalid size request: 0 type: "hardirq_ctx"
Looks like it has something to do with kernels not CONFIG_4KSTACKS?:
#ifdef CONFIG_4KSTACKS
/*
* per-CPU IRQ handling contexts (thread information and stack)
*/
union irq_ctx {
struct thread_info tinfo;
u32 stack[THREAD_SIZE/sizeof(u32)];
};static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
#endif
=================================================================
MAY BE A PROBLEM WITH STRUCT AND ARRAYS
=================================================================
May be a problem with struct ?
crash> struct kmem_cache_s f7ffdc80
struct kmem_cache_s {
array = {0xf7ffb980, 0xc1ddf500, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
batchcount = 12,
limit = 24,
lists = {
slabs_partial = {
next = 0xf7bdd000,
prev = 0xf701a000
},
slabs_full = {
next = 0xc1f3a000,
prev = 0xf7e8c000
},
[...]
name = 0xc0322190 "task_struct",
next = {
next = 0xf7ffde78,
prev = 0xf7ffdc78
}
}
crash> struct kmem_cache_s.array f7ffdc80 <--- OTHER FIELDS ARE COMMING WITH ARRAY
array = {0xf7ffb980, 0xc1ddf500, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
batchcount = 12,
limit = 24,
lists = {
slabs_partial = {
next = 0xf7828000,
prev = 0xf7bdd000
},
slabs_full = {
next = 0xc1eb3000,
prev = 0xf7e8c000
},
slabs_free = {
next = 0xf7ffdd18,
prev = 0xf7ffdd18
},
free_objects = 13,
free_touched = 0,
next_reap = 11377353,
shared = 0xf7ff5000
},
crash>
It's in symbols.c: parse_for_member() where the closing "}" is not
at the same indent as the start. I don't know how to fix this off-hand
without breaking other output where there are arrays within enclosed
data structures.
=================================================================
I THINK THERE IS A POINTER PROBLEM WITH SIG
=================================================================
It seems there is a pointer problem with sig.
(Also, currently, sig -l and sig -s do not support RT signals).
I use the very simple programme (procsig.c) which block and send a signal using tkill()
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <linux/unistd.h>
#ifndef GETTID
pid_t gettid(void) {return syscall(__NR_gettid);}
#endif
#ifndef TKILL
int tkill(pid_t tid, int sig) {return syscall (__NR_tkill,tid,sig); }
#endif
void sigusr2(int sig, siginfo_t *sp, void *uc)
{
}
void sigcatch()
{
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = sigusr2;
if (sigaction(SIGUSR2, &sa, NULL)==-1) {
perror("sigaction() - SIGUSR2"); exit(1);
}
}
void sig_block()
{
sigset_t set;
sigemptyset(&set);
sigaddset(&set, SIGUSR2);
if (sigprocmask(SIG_SETMASK, &set,NULL) == -1) {
perror("sigprocmask()"); exit(1);
}
if(tkill(gettid(), SIGUSR2)==-1){
perror("tkill()"); exit(1);
}
}
main(int argc)
{
sig_block();
sigcatch();
pause();
}
[root@fedora4 ~]# ./procsig &
[1] 4985
[root@fedora4 ~]# crash -s
crash> set 4985
PID: 4985
COMMAND: "procsig"
TASK: f7e19020 [THREAD_INFO: f4a97000]
CPU: 0
STATE: TASK_INTERRUPTIBLE
crash> sig >/tmp/res <----------- NETHER FINISH, I MUST KILL THE TASK
This is what i get in the file /tmp/res
PID: 4985 TASK: f7e19020 CPU: 0 COMMAND: "procsig"
SIGPENDING: no
SIGNAL: 0000000000000800
BLOCKED: 0000000000000800
SIGNAL_STRUCT: c1e57980 COUNT: 1
SIG SIGACTION HANDLER MASK FLAGS
[1] c1f3e604 SIG_DFL 0000000000000000 0
[2] c1f3e618 SIG_DFL 0000000000000000 0
[3] c1f3e62c SIG_DFL 0000000000000000 0
[...]
[60] c1f3eaa0 SIG_DFL 0000000000000000 0
[61] c1f3eab4 SIG_DFL 0000000000000000 0
[62] c1f3eac8 SIG_DFL 0000000000000000 0
[63] c1f3eadc SIG_DFL 0000000000000000 0
SIGQUEUE: SIG SIGINFO
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
0 f7e194a4
12 f7ac1344
.....
.....
I don't use that command much -- a fix would be appreciated...
=================================================================
IS CRASH ABLE TO READ per_cpu__xxxxxx STRUCTURES ?
================================================================
Strange results with per_cpu__xxxxxxx
May be, i do something which is not correct. The "problem" exists only with SMP.
crash> sys
KERNEL: /boot/vmlinux-2.6.11-prep
DUMPFILE: /dev/crash
CPUS: 2
DATE: Fri Jul 21 16:30:51 2006
UPTIME: 00:09:11
LOAD AVERAGE: 0.01, 0.11, 0.08
TASKS: 105
NODENAME: fedora4.domain.com
RELEASE: 2.6.11-prep
VERSION: #6 SMP Fri Jul 21 10:13:20 CEST 2006
MACHINE: i686 (2399 Mhz)
MEMORY: 1.5 GB
crash> per_cpu__loopback_stats
per_cpu__loopback_stats = $6 = {
rx_packets = 3435973836,
tx_packets = 3435973836,
rx_bytes = 3435973836,
tx_bytes = 3435973836,
rx_errors = 3435973836,
tx_errors = 3435973836,
rx_dropped = 3435973836,
tx_dropped = 3435973836,
multicast = 3435973836,
collisions = 3435973836,
rx_length_errors = 3435973836,
rx_over_errors = 3435973836,
rx_crc_errors = 3435973836,
rx_frame_errors = 3435973836,
rx_fifo_errors = 3435973836,
rx_missed_errors = 3435973836,
tx_aborted_errors = 3435973836,
tx_carrier_errors = 3435973836,
tx_fifo_errors = 3435973836,
tx_heartbeat_errors = 3435973836,
tx_window_errors = 3435973836,
rx_compressed = 3435973836,
tx_compressed = 3435973836
}
crash> rd per_cpu__loopback_stats 10
c040c320: cccccccc cccccccc cccccccc cccccccc ................
c040c330: cccccccc cccccccc cccccccc cccccccc ................
c040c340: cccccccc cccccccc ........
An other one :
crash> per_cpu__irq_stat
per_cpu__irq_stat = $1 = {
__softirq_pending = 1954047342, /* ???? */
idle_timestamp = 1668312320,
__nmi_count = 1835364191,
apic_timer_irqs = 1702521203
}
crash> rd per_cpu__irq_stat 4
c0407380: 7478656e 63706d00 6d656f5f 657a6973 next.mpc_oemsize
per_cpu data structures cannot be read appropriately, except in the
cases where I need to have them, like the runqueues, where I
do the offset calculations.
.
=================================================================
A SMALL DETAIL with ps
=================================================================
In using prothread - the very first time when i do ps procthread, i see VSZ and RSS with 0
FIRS TIME
crash> ps procthread
PID PPID CPU TASK ST %MEM VSZ RSS COMM
4844 1 1 f6646020 IN 0.0 0 0 procthread
4845 1 0 f6646560 IN 0.0 22108 552 procthread
> 4846 1 1 f78f5560 RU 0.0 22108 552 procthread
SECOND TIME is OK
crash> ps procthread
PID PPID CPU TASK ST %MEM VSZ RSS COMM
4844 1 1 f6646020 IN 0.0 22108 552 procthread
4845 1 0 f6646560 IN 0.0 22108 552 procthread
> 4846 1 1 f78f5560 RU 0.0 22108 552 procthread
Here is procthread.c
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <assert.h>
#include <pthread.h>
#ifndef GETTID
#include <linux/unistd.h>
_syscall0(pid_t,gettid);
#endif
#define NBR_THREADS 2
pthread_t tid[NBR_THREADS];
void *lwp_pr(void *lwp_num)
{
int ret;
pid_t tid = gettid();
printf("PPID %d TGID %d TID %d\n",
getppid(), getpid(), tid);
while(1)
if (tid % 2)
sleep(1);
}
main(int argc)
{
int i, ret;
printf("main PPID %d TGID %d TID %d\n",
getppid(), getpid(), gettid());
for (i=0 ; i <NBR_THREADS ; i++) {
ret = pthread_create( &tid[i],NULL, lwp_pr, NULL);
assert(ret == 0);
}
pause();
}
I don't know what the issue is there...
=================================================================
A SMALL DETAIL WITH SYS -c
=================================================================
A small detail with sys -c
Many obsolete or not implemented system calls map (#define) on sys_ni_syscall.
[root@fedora4 boot]# grep sys_ni_syscall System.map-2.6.11-prep
c0136278 T sys_ni_syscall
[root@fedora4 boot]# grep c0136278 System.map-2.6.11-prep
c0136278 W compat_sys_futex
c0136278 W compat_sys_get_mempolicy
c0136278 W compat_sys_keyctl
c0136278 W compat_sys_mbind
c0136278 W compat_sys_mq_getsetattr
c0136278 W compat_sys_mq_notify
c0136278 W compat_sys_mq_open
c0136278 W compat_sys_mq_timedreceive
c0136278 W compat_sys_mq_timedsend
c0136278 W compat_sys_set_mempolicy
c0136278 W compat_sys_socketcall
c0136278 W ppc_rtas
c0136278 W sys32_ipc
c0136278 W sys32_sysctl
c0136278 W sys_get_mempolicy
c0136278 W sys_mbind
c0136278 T sys_ni_syscall
c0136278 W sys_pciconfig_iobase
c0136278 W sys_pciconfig_read
c0136278 W sys_pciconfig_write
c0136278 W sys_set_mempolicy
crash> sys -c
NUM SYSTEM CALL FILE AND LINE NUMBER
0 sys_restart_syscall ../kernel/signal.c: 2037
1 sys_exit ../kernel/exit.c: 870
2 sys_fork ../arch/i386/kernel/process.c: 650
3 sys_read ../fs/read_write.c: 313
4 sys_write ../fs/read_write.c: 331
5 sys_open ../fs/open.c: 938
6 sys_close ../fs/open.c: 1018
7 sys_waitpid ../kernel/exit.c: 1533
8 sys_creat ../fs/open.c: 974
9 sys_link ../fs/namei.c: 2015
10 sys_unlink ../fs/namei.c: 1863
11 sys_execve ../arch/i386/kernel/process.c: 688
12 sys_chdir ../fs/open.c: 519
13 sys_time ../kernel/time.c: 59
14 sys_mknod ../fs/namei.c: 1620
15 sys_chmod ../fs/open.c: 635
16 sys_lchown16 ../kernel/uid16.c: 26
17 sys_set_mempolicy ../kernel/sys_ni.c: 13 <--- in fact, "it is" sys_ni_syscall
crash> sys -c mempolicy
NUM SYSTEM CALL FILE AND LINE NUMBER
17 sys_set_mempolicy ../kernel/sys_ni.c: 13
31 sys_set_mempolicy ../kernel/sys_ni.c: 13
32 sys_set_mempolicy ../kernel/sys_ni.c: 13
35 sys_set_mempolicy ../kernel/sys_ni.c: 13
44 sys_set_mempolicy ../kernel/sys_ni.c: 13
53 sys_set_mempolicy ../kernel/sys_ni.c: 13
56 sys_set_mempolicy ../kernel/sys_ni.c: 13
58 sys_set_mempolicy ../kernel/sys_ni.c: 13
98 sys_set_mempolicy ../kernel/sys_ni.c: 13
112 sys_set_mempolicy ../kernel/sys_ni.c: 13
127 sys_set_mempolicy ../kernel/sys_ni.c: 13
130 sys_set_mempolicy ../kernel/sys_ni.c: 13
137 sys_set_mempolicy ../kernel/sys_ni.c: 13
167 sys_set_mempolicy ../kernel/sys_ni.c: 13
188 sys_set_mempolicy ../kernel/sys_ni.c: 13
189 sys_set_mempolicy ../kernel/sys_ni.c: 13
223 sys_set_mempolicy ../kernel/sys_ni.c: 13
251 sys_set_mempolicy ../kernel/sys_ni.c: 13
273 sys_set_mempolicy ../kernel/sys_ni.c: 13
274 sys_set_mempolicy ../kernel/sys_ni.c: 13
275 sys_set_mempolicy ../kernel/sys_ni.c: 13
276 sys_set_mempolicy ../kernel/sys_ni.c: 13
283 sys_set_mempolicy ../kernel/sys_ni.c: 13
285 sys_set_mempolicy ../kernel/sys_ni.c: 13
Il would be more clear, il we could "force" sys_ni_syscall (of course we have the reference to sys_ni.c).
Haven't used that command in a long time. Send in a fix...
==============================================================================
Command irq x should be extended for recent systems
==============================================================================
irq only allowed between 0 and 15 (old PICs)
Again, I don't use it -- send in a fix if you need it...
==============================================================================
repeat is not abortable in case of a mistake ?
==============================================================================
If i do repeat 2 xxxxxxx (instead of repeat -2 xxxxxx), i must kill crash
Yeah, the ctrl-C entries don't seem to be making it through if scrolling
is turned off.
============================================================================================
MOUNTDOES NOT SHOW NAMESPACE
=============================================================================================
About mount : an option -n would be very interesting to show namespaces (when supported)
Again, send in a patch...
It would be probably more helpful if you packaged up an ASCII file
in the crash TODO list format, and I can append your bugs/requests
to it for anybody to sign up for doing a fix. It's simple enough
-- just
put your description under DESCRIPTION, and leave RESOLUTION
STATUS to "TBD" unless you want to undertake it, in which case,
put your own email address there:
See http://people.redhat.com/anderson/crash.TODO.html
Your mailer sends stuff in an HTML format that makes it hard
to cut-and-paste.
Thanks,
Dave