Hi,
Sorry, I forgot to attach my source code.
Here is the source code.
Satoru MORIYA wrote:
Hi,
Here is an extension(shared object) of the crash to retrieve the trace
data of systemtap scripts.
I'd like to analyze what caused the kernel panic by using the systemtap.
However, currently the systemtap's trace data can't be retrieved from a
dumped image easily. So, I developed a crash's extension which retrieves
the data recorded by systemtap from the dumped image.
Here is a brief document of this extension. This extension supports the new
utt-based buffer as well as the bulk-mode buffer of old systemtap module.
I have tested this extention on the following system.
* FC6, i386, kernel-2.6.21, systemtap-0.5.14, crash-4.0-1.1
* FC6, i386, kernel-2.6.20, systemtap-0.5.13/14, crash-4.0-1.1
* RHEL5, i386, kernel-2.6.18-8.el5, systemtap-0.5.12, crash-4.0-3.14
Preparation
==============
(A) Build the shared-object(stplog.so).
1. Put Makefile and stplog.c into a directory ($DIR)
$ cd $DIR
2. Make the symbolic link to the crash source code directory
$ ln -s $WHERE_CRASH_PLACED crash
3. Build
$ make
(B) Make the crash dump which includes SystemTap trace data.
(*)If you analyze the live system memory, ignore this section.
1. Install kdump
If you use FC6, see following URL.
http://fedoraproject.org/wiki/FC6KdumpKexecHowTo?highlight=%28kdump%29
2. Use SystemTap
$ stap foo.stp
3. Panic
$ echo c > /proc/sysrq-trigger
How to use
==============
1. start crash
$ crash vmlinux vmcore
(*) If you analyze the live system memory, you don't need "vmcore".
$ crash vmlinux
2. load the shared-object
crash> extend $(WHERE_OBJ_PLACED)/stplog.so
3. retrieve the data
crash> stplog -m <mod_name>
(*) <mod_name> is the name of trace module from which you retrieve data.
4. You can get output files under the directory whose name is <mod_name>.
Output
==============
stplog command makes a file per channel buffer of relayfs(equivalent to per cpu).
And it also removes padding bytes.
I believe this command is very useful for system administrators
if they monitor their systems with SystemTap.
Best Regards,
---
Satoru MORIYA
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: satoru.moriya.br(a)hitachi.com
--
---
Satoru MORIYA
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: satoru.moriya.br(a)hitachi.com
TARGET=stplog.so
CFILE=stplog.c
CFLAGS= -shared -rdynamic -DX86
CFLAGS+= -I./crash -Wall
PRJNAME=libcrash_for_systemtap
VERSION=`date +%Y%m%d`
$(TARGET):$(CFILE)
gcc $(CFLAGS) -o $@ $(CFILE)
clean:
rm -f -r $(TARGET) *~
dist:distclean
mkdir $(PRJNAME)-$(VERSION)
cp $(CFILE) Makefile README $(PRJNAME)-$(VERSION)
tar cvjf $(PRJNAME)-$(VERSION).tar.bz2 $(PRJNAME)-$(VERSION)
rm -f -r $(PRJNAME)-$(VERSION)
distclean:
rm -f -r $(TARGET) *~ crash
/*
crash shared object for retrieving systemtap buffer
Copyright (c) 2007 Hitachi,Ltd.,
Created by Satoru Moriya <satoru.moriya.br(a)hitachi.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "defs.h"
#define STPLOG_NO_MOD -1
#define STPLOG_NO_SYM -2
struct rchan_offsets {
long subbuf_size;
long n_subbufs;
long buf;
long buf_start;
long buf_offset;
long buf_subbufs_produced;
long buf_padding;
};
struct fake_rchan_buf {
void *start;
size_t offset;
size_t subbufs_produced;
size_t *padding;
};
struct fake_rchan {
size_t subbuf_size;
size_t n_subbufs;
};
struct per_cpu_data {
struct fake_rchan_buf buf;
};
static struct rchan_offsets rchan_offsets;
static struct fake_rchan chan;
static struct per_cpu_data per_cpu[NR_CPUS];
static FILE *outfp;
static char *subbuf;
static jmp_buf saved_env;
static int is_global;
static int old_format;
void cmd_systemtaplog(void);
char *help_systemtaplog[];
static struct command_table_entry command_table[] = {
{"stplog", cmd_systemtaplog, help_systemtaplog, 0},
{NULL},
};
static void cleanup(void)
{
if (outfp) {
fclose(outfp);
outfp = NULL;
}
if (subbuf) {
free(subbuf);
subbuf = NULL;
}
}
static int get_rchan_offsets(void)
{
rchan_offsets.subbuf_size = MEMBER_OFFSET("rchan", "subbuf_size");
if (rchan_offsets.subbuf_size < 0)
goto ERR;
rchan_offsets.n_subbufs = MEMBER_OFFSET("rchan", "n_subbufs");
if (rchan_offsets.n_subbufs < 0)
goto ERR;
rchan_offsets.buf = MEMBER_OFFSET("rchan", "buf");
if (rchan_offsets.buf < 0)
goto ERR;
rchan_offsets.buf_start = MEMBER_OFFSET("rchan_buf", "start");
if (rchan_offsets.buf_start < 0)
goto ERR;
rchan_offsets.buf_offset = MEMBER_OFFSET("rchan_buf", "offset");
if (rchan_offsets.buf_offset < 0)
goto ERR;
rchan_offsets.buf_subbufs_produced
= MEMBER_OFFSET("rchan_buf", "subbufs_produced");
if (rchan_offsets.buf_subbufs_produced < 0)
goto ERR;
rchan_offsets.buf_padding = MEMBER_OFFSET("rchan_buf", "padding");
if (rchan_offsets.buf_padding < 0)
goto ERR;
return 0;
ERR:
error(WARNING, "cannot get rchan offset\n");
return -1;
}
static ulong get_rchan(ulong chan_addr)
{
ulong rchan;
readmem(chan_addr, KVADDR, &rchan, sizeof(void*),
"stp_channel", FAULT_ON_ERROR);
readmem(rchan + rchan_offsets.subbuf_size,
KVADDR, &chan.subbuf_size, sizeof(size_t),
"stp_channel.subbuf_size", FAULT_ON_ERROR);
readmem(rchan + rchan_offsets.n_subbufs,
KVADDR, &chan.n_subbufs, sizeof(size_t),
"stp_channel.n_subbufs", FAULT_ON_ERROR);
return rchan;
}
static void get_rchan_buf(int cpu, ulong rchan)
{
ulong rchan_buf;
struct per_cpu_data *pcd;
pcd = &per_cpu[cpu];
readmem(rchan + rchan_offsets.buf + sizeof(void*) * cpu,
KVADDR, &rchan_buf, sizeof(void*),
"stp_channel.buf", FAULT_ON_ERROR);
readmem(rchan_buf + rchan_offsets.buf_start,
KVADDR, &pcd->buf.start, sizeof(void*),
"stp_channel.buf.start", FAULT_ON_ERROR);
readmem(rchan_buf + rchan_offsets.buf_offset,
KVADDR, &pcd->buf.offset, sizeof(size_t),
"stp_channel.buf.offset", FAULT_ON_ERROR);
readmem(rchan_buf + rchan_offsets.buf_subbufs_produced,
KVADDR, &pcd->buf.subbufs_produced, sizeof(size_t),
"stp_channel.buf.subbufs_produced", FAULT_ON_ERROR);
readmem(rchan_buf + rchan_offsets.buf_padding,
KVADDR, &pcd->buf.padding, sizeof(size_t*),
"stp_channel.buf.padding", FAULT_ON_ERROR);
}
static ulong get_symbol_addr(char *module, char *symbol)
{
int i;
struct syment *sym, *sym_end;
struct load_module *lm;
for (i = 0; i < kt->mods_installed; i++) {
lm = &st->load_modules[i];
if (!STREQ(module, lm->mod_name))
continue;
sym = lm->mod_symtable;
sym_end = lm->mod_symend;
for ( ; sym <= sym_end; sym++) {
if(STREQ(sym->name, symbol))
return ((ulong)sym->value);
}
error(WARNING, "'%s' doesn't have the symbol named
'%s'.\n",
module, symbol);
return STPLOG_NO_SYM;
}
error(WARNING, "'%s' is not loaded.\n", module);
return STPLOG_NO_MOD;
}
static ulong get_rchan_addr(ulong stp_utt_addr)
{
ulong stp_utt;
readmem(stp_utt_addr, KVADDR, &stp_utt, sizeof(void*),
"stp_utt", FAULT_ON_ERROR);
return (stp_utt + sizeof(int));
}
static int check_global_buffer(ulong rchan)
{
int cpu;
ulong rchan_buf[2];
for (cpu = 0; cpu < 2; cpu++) {
readmem(rchan + rchan_offsets.buf + sizeof(void*) * cpu,
KVADDR, &rchan_buf[cpu], sizeof(void*),
"stp_channel.buf", FAULT_ON_ERROR);
}
if (rchan_buf[0] == rchan_buf[1])
return 1;
return 0;
}
static int setup_global_data(char *module)
{
int i;
ulong stp_utt_addr = 0;
ulong stp_rchan_addr = 0;
ulong rchan;
stp_utt_addr = get_symbol_addr(module, "_stp_utt");
if (stp_utt_addr == STPLOG_NO_MOD) {
return -1;
} else if (stp_utt_addr == STPLOG_NO_SYM) {
stp_rchan_addr = get_symbol_addr(module, "_stp_chan");
if (stp_rchan_addr == STPLOG_NO_SYM)
return -1;
old_format = 1;
} else {
stp_rchan_addr = get_rchan_addr(stp_utt_addr);
}
rchan = get_rchan(stp_rchan_addr);
for (i = 0; i < kt->cpus; i++)
get_rchan_buf(i, rchan);
if (kt->cpus > 1) {
is_global = check_global_buffer(rchan);
}
return 0;
}
static int output_cpu_logs(char *module)
{
int i, max = 256;
struct per_cpu_data *pcd;
size_t n, idx, start, end, ready, len;
unsigned padding;
char fname[max + 1], *source;
DIR *dir;
/* check and create log directory */
dir = opendir(module);
if (dir) {
closedir(dir);
} else {
if (mkdir(module, S_IRWXU) < 0) {
error(WARNING, "cannot create log directory '%s\n'", module);
return -1;
}
}
/* allocate subbuf memory */
subbuf = malloc(chan.subbuf_size);
if (!subbuf) {
error(WARNING, "cannot allocate memory\n");
return -1;
}
fname[max] = '\0';
for (i = 0; i < kt->cpus; i++) {
pcd = &per_cpu[i];
ready = pcd->buf.subbufs_produced + (pcd->buf.offset ? 1 : 0);
if (ready > chan.n_subbufs) {
start = ready % chan.n_subbufs;
end = start + chan.n_subbufs;
} else {
start = 0;
end = ready;
}
/* print information */
fprintf(fp, "--- generating 'cpu%d' ---\n", i);
fprintf(fp, " subbufs ready on relayfs:%d\n", ready);
fprintf(fp, " n_subbufs:%d, read from:%d to:%d (offset:%d)\n\n",
chan.n_subbufs, start, end, pcd->buf.offset);
/* create log file */
snprintf(fname, max, "%s/cpu%d", module, i);
outfp = fopen(fname, "w");
if (!outfp) {
error(WARNING, "cannot create log file '%s'\n", fname);
return -1;
}
for (n = start; n < end; n++) {
/* read relayfs subbufs and write to log file */
idx = n % chan.n_subbufs;
source = pcd->buf.start + idx * chan.subbuf_size;
readmem((ulong)pcd->buf.padding + sizeof(padding) * idx,
KVADDR, &padding, sizeof(padding),
"padding", FAULT_ON_ERROR);
if (n == end - 1 && pcd->buf.offset) {
len = pcd->buf.offset;
} else {
len = chan.subbuf_size;
}
if (old_format == 1) {
source += sizeof(padding);
len -= sizeof(padding) + padding;
} else {
len -= padding;
}
if (len) {
readmem((ulong)source, KVADDR, subbuf, len,
"subbuf", FAULT_ON_ERROR);
if (fwrite(subbuf, len, 1, outfp) != 1) {
error(WARNING, "cannot write log data\n");
return -1;
}
}
}
fclose(outfp);
outfp = NULL;
if (is_global == 1)
break;
}
return 0;
}
static void do_systemtaplog(char *module)
{
if (setup_global_data(module) < 0)
return;
if (output_cpu_logs(module) < 0)
return;
}
void cmd_systemtaplog(void)
{
int c;
char *module = NULL;
while ((c = getopt(argcnt, args, "m:")) != EOF) {
switch (c) {
case 'm':
module = optarg;
break;
default:
argerrs++;
break;
}
}
if (!module || argerrs)
cmd_usage(pc->curcmd, SYNOPSIS);
saved_env[0] = pc->main_loop_env[0];
if (setjmp(pc->main_loop_env))
goto EXIT;
do_systemtaplog(module);
EXIT:
cleanup();
pc->main_loop_env[0] = saved_env[0];
}
char *help_systemtaplog[] = {
"systemtaplog",
"Retrieve SystemTap log data",
"-m module_name",
" Retrieve SystemTap's log data and write them to files.\n",
" -m module_name All valid SystemTap log data made by the trace",
" module which name is 'module_name' are written",
" into log files in `module_name` directory. The",
" name of each log file is cpu0, cpu1...cpuN. ",
" They have same format data as channel buffer",
" except padding(This command removes padding). ",
NULL,
};
void __attribute__ ((constructor)) systemtaplog_init(void)
{
if (get_rchan_offsets() < 0)
return;
register_extension(command_table);
return;
}
void __attribute__ ((destructor)) systemtaplog_fini(void)
{
return;
}