Re: [Crash-utility] [PATCH] xen: Add support for dom0 with Linux kernel 3.19 and newer
by Dave Anderson
----- Original Message -----
>
> Linux kernel commit 054954eb051f35e74b75a566a96fe756015352c8
> (xen: switch to linear virtual mapped sparse p2m list), which
> appeared in 3.19, introduced linear virtual mapped sparse p2m
> list. If readmem() reads p2m then it access this list using
> physical addresses. Sadly, VMA to physical address translation
> in crash requires access to p2m list. This means that we have
> a chicken and egg problem. In general this issue must be solved
> by introducing some changes in libxl, Linux kernel and crash
> (I have added this task to my long TODO list). However, in dom0
> case we can use crash_xen_info_t.dom0_pfn_to_mfn_frame_list_list
> which is available out of the box. So, let's use it and make
> at least some users happy.
>
> Signed-off-by: Daniel Kiper <daniel.kiper(a)oracle.com>
Hi Daniel,
Can you help me out with a consise changelog entry? As I understand
it, the crash utility has not supported Xen dom0 and domU dumpfiles
since the referenced 3.19 commit, and this patch resurrects support
for dom0 dumpfiles only. Are there issues with live system analysis
as well? And without the patch, what is the final, fatal error message?
Thanks,
Dave
8 years, 10 months
[PATCH] xen: Add support for dom0 with Linux kernel 3.19 and newer
by Daniel Kiper
Linux kernel commit 054954eb051f35e74b75a566a96fe756015352c8
(xen: switch to linear virtual mapped sparse p2m list), which
appeared in 3.19, introduced linear virtual mapped sparse p2m
list. If readmem() reads p2m then it access this list using
physical addresses. Sadly, VMA to physical address translation
in crash requires access to p2m list. This means that we have
a chicken and egg problem. In general this issue must be solved
by introducing some changes in libxl, Linux kernel and crash
(I have added this task to my long TODO list). However, in dom0
case we can use crash_xen_info_t.dom0_pfn_to_mfn_frame_list_list
which is available out of the box. So, let's use it and make
at least some users happy.
Signed-off-by: Daniel Kiper <daniel.kiper(a)oracle.com>
---
kernel.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------
xen_dom0.c | 3 ++-
xen_dom0.h | 2 ++
3 files changed, 77 insertions(+), 9 deletions(-)
diff --git a/kernel.c b/kernel.c
index 5ce2fb9..b07149e 100644
--- a/kernel.c
+++ b/kernel.c
@@ -17,6 +17,7 @@
#include "defs.h"
#include "xen_hyper_defs.h"
+#include "xen_dom0.h"
#include <elf.h>
#include <libgen.h>
#include <ctype.h>
@@ -61,6 +62,7 @@ static int restore_stack(struct bt_info *);
static ulong __xen_m2p(ulonglong, ulong);
static ulong __xen_pvops_m2p_l2(ulonglong, ulong);
static ulong __xen_pvops_m2p_l3(ulonglong, ulong);
+static ulong __xen_pvops_m2p_hyper(ulonglong, ulong);
static int search_mapping_page(ulong, ulong *, ulong *, ulong *);
static void read_in_kernel_config_err(int, char *);
static void BUG_bytes_init(void);
@@ -175,6 +177,9 @@ kernel_init()
&kt->pvops_xen.p2m_mid_missing);
get_symbol_data("p2m_missing", sizeof(ulong),
&kt->pvops_xen.p2m_missing);
+ } else if (symbol_exists("xen_p2m_addr")) {
+ if (!XEN_CORE_DUMPFILE())
+ error(FATAL, "p2m array in new format is unreadable.");
} else {
kt->pvops_xen.p2m_top_entries = get_array_length("p2m_top", NULL, 0);
kt->pvops_xen.p2m_top = symbol_value("p2m_top");
@@ -5850,12 +5855,14 @@ no_cpu_flags:
else
fprintf(fp, "\n");
- fprintf(fp, " pvops_xen:\n");
- fprintf(fp, " p2m_top: %lx\n", kt->pvops_xen.p2m_top);
- fprintf(fp, " p2m_top_entries: %d\n", kt->pvops_xen.p2m_top_entries);
- if (symbol_exists("p2m_mid_missing"))
- fprintf(fp, " p2m_mid_missing: %lx\n", kt->pvops_xen.p2m_mid_missing);
- fprintf(fp, " p2m_missing: %lx\n", kt->pvops_xen.p2m_missing);
+ if (!symbol_exists("xen_p2m_addr")) {
+ fprintf(fp, " pvops_xen:\n");
+ fprintf(fp, " p2m_top: %lx\n", kt->pvops_xen.p2m_top);
+ fprintf(fp, " p2m_top_entries: %d\n", kt->pvops_xen.p2m_top_entries);
+ if (symbol_exists("p2m_mid_missing"))
+ fprintf(fp, " p2m_mid_missing: %lx\n", kt->pvops_xen.p2m_mid_missing);
+ fprintf(fp, " p2m_missing: %lx\n", kt->pvops_xen.p2m_missing);
+ }
}
/*
@@ -8873,6 +8880,12 @@ __xen_m2p(ulonglong machine, ulong mfn)
ulong c, i, kmfn, mapping, p, pfn;
ulong start, end;
ulong *mp = (ulong *)kt->m2p_page;
+ int memtype;
+
+ if (XEN_CORE_DUMPFILE() && symbol_exists("xen_p2m_addr"))
+ memtype = PHYSADDR;
+ else
+ memtype = KVADDR;
/*
* Check the FIFO cache first.
@@ -8883,13 +8896,19 @@ __xen_m2p(ulonglong machine, ulong mfn)
(mfn <= kt->p2m_mapping_cache[c].end))) {
if (kt->p2m_mapping_cache[c].mapping != kt->last_mapping_read) {
- if (!readmem(kt->p2m_mapping_cache[c].mapping, KVADDR,
+ if (memtype == PHYSADDR)
+ pc->curcmd_flags |= XEN_MACHINE_ADDR;
+
+ if (!readmem(kt->p2m_mapping_cache[c].mapping, memtype,
mp, PAGESIZE(), "phys_to_machine_mapping page (cached)",
RETURN_ON_ERROR))
error(FATAL, "cannot access "
"phys_to_machine_mapping page\n");
else
kt->last_mapping_read = kt->p2m_mapping_cache[c].mapping;
+
+ if (memtype == PHYSADDR)
+ pc->curcmd_flags &= ~XEN_MACHINE_ADDR;
} else
kt->p2m_page_cache_hits++;
@@ -8919,11 +8938,13 @@ __xen_m2p(ulonglong machine, ulong mfn)
if (PVOPS_XEN()) {
/*
* The machine address was not cached, so search from the
- * beginning of the p2m_top array, caching the contiguous
+ * beginning of the p2m tree/array, caching the contiguous
* range containing the found machine address.
*/
if (symbol_exists("p2m_mid_missing"))
pfn = __xen_pvops_m2p_l3(machine, mfn);
+ else if (symbol_exists("xen_p2m_addr"))
+ pfn = __xen_pvops_m2p_hyper(machine, mfn);
else
pfn = __xen_pvops_m2p_l2(machine, mfn);
@@ -9088,6 +9109,50 @@ __xen_pvops_m2p_l3(ulonglong machine, ulong mfn)
return XEN_MFN_NOT_FOUND;
}
+static ulong
+__xen_pvops_m2p_hyper(ulonglong machine, ulong mfn)
+{
+ ulong c, end, i, mapping, p, pfn, start;
+
+ for (p = 0;
+ p < xkd->p2m_frames;
+ ++p) {
+
+ mapping = PTOB(xkd->p2m_mfn_frame_list[p]);
+
+ if (mapping != kt->last_mapping_read) {
+ pc->curcmd_flags |= XEN_MACHINE_ADDR;
+ if (!readmem(mapping, PHYSADDR, (void *)kt->m2p_page,
+ PAGESIZE(), "p2m_mfn_frame_list page", RETURN_ON_ERROR))
+ error(FATAL, "cannot access p2m_mfn_frame_list[] page\n");
+
+ pc->curcmd_flags &= ~XEN_MACHINE_ADDR;
+ kt->last_mapping_read = mapping;
+ }
+
+ kt->p2m_pages_searched++;
+
+ if (search_mapping_page(mfn, &i, &start, &end)) {
+ pfn = p * XEN_PFNS_PER_PAGE + i;
+ if (CRASHDEBUG(1))
+ console("pages: %d mfn: %lx (%llx) p: %ld"
+ " i: %ld pfn: %lx (%llx)\n", p + 1, mfn, machine,
+ p, i, pfn, XEN_PFN_TO_PSEUDO(pfn));
+
+ c = kt->p2m_cache_index;
+ kt->p2m_mapping_cache[c].start = start;
+ kt->p2m_mapping_cache[c].end = end;
+ kt->p2m_mapping_cache[c].mapping = mapping;
+ kt->p2m_mapping_cache[c].pfn = p * XEN_PFNS_PER_PAGE;
+ kt->p2m_cache_index = (c+1) % P2M_MAPPING_CACHE;
+
+ return pfn;
+ }
+ }
+
+ return XEN_MFN_NOT_FOUND;
+}
+
/*
* Search for an mfn in the current mapping page, and if found,
* determine the range of contiguous mfns that it's contained
diff --git a/xen_dom0.c b/xen_dom0.c
index 6abb443..6770fd4 100644
--- a/xen_dom0.c
+++ b/xen_dom0.c
@@ -20,7 +20,8 @@
#include "xen_dom0.h"
static struct xen_kdump_data xen_kdump_data = { 0 };
-static struct xen_kdump_data *xkd = &xen_kdump_data;
+
+struct xen_kdump_data *xkd = &xen_kdump_data;
void
dump_xen_kdump_data(FILE *fp)
diff --git a/xen_dom0.h b/xen_dom0.h
index 4f0ff53..963c75c 100644
--- a/xen_dom0.h
+++ b/xen_dom0.h
@@ -68,6 +68,8 @@ struct xen_kdump_data {
#define P2M_FAILURE ((physaddr_t)(0xffffffffffffffffLL))
+extern struct xen_kdump_data *xkd;
+
void dump_xen_kdump_data(FILE *);
struct xen_kdump_data *get_xen_kdump_data(void);
--
1.7.10.4
8 years, 10 months
crash: invalid structure member offset with current kernels
by Sebastian Ott
Hi,
Crash fails to start with current (4.4+) kernels. The following patch
fixes this.
Regards,
Sebastian
----->8
>From ddd809812705ba36796c6750d12a12838b4106ec Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott(a)linux.vnet.ibm.com>
Date: Tue, 19 Jan 2016 14:14:13 +0100
Subject: [PATCH] Fix invalid structure member offset.
Struct module was changed by kernel commit
7523e4dc50 "module: use a structure to encapsulate layout."
Fix the offsets to handle the following crash error:
crash: invalid structure member offset: module_init_text_size
FILE: symbols.c LINE: 1668 FUNCTION: store_module_symbols_v2()
[../crash/crash] error trace: 10062e92 => 10109812 => 1014f16e => 101813ac
101813ac: OFFSET_verify+124
1014f16e: store_module_symbols_v2+2182
10109812: module_init+4386
10062e92: main_loop+410
Signed-off-by: Sebastian Ott <sebott(a)linux.vnet.ibm.com>
---
kernel.c | 49 +++++++++++++++++++++++++++++++++++++++----------
1 file changed, 39 insertions(+), 10 deletions(-)
diff --git a/kernel.c b/kernel.c
index 2d4188a..5ce2fb9 100644
--- a/kernel.c
+++ b/kernel.c
@@ -3291,16 +3291,45 @@ module_init(void)
MEMBER_OFFSET_INIT(module_gpl_syms, "module", "gpl_syms");
MEMBER_OFFSET_INIT(module_num_gpl_syms, "module",
"num_gpl_syms");
- MEMBER_OFFSET_INIT(module_module_core, "module",
- "module_core");
- MEMBER_OFFSET_INIT(module_core_size, "module",
- "core_size");
- MEMBER_OFFSET_INIT(module_core_text_size, "module",
- "core_text_size");
- MEMBER_OFFSET_INIT(module_module_init, "module", "module_init");
- MEMBER_OFFSET_INIT(module_init_size, "module", "init_size");
- MEMBER_OFFSET_INIT(module_init_text_size, "module",
- "init_text_size");
+
+ if (MEMBER_EXISTS("module", "module_core")) {
+ MEMBER_OFFSET_INIT(module_core_size, "module",
+ "core_size");
+ MEMBER_OFFSET_INIT(module_init_size, "module",
+ "init_size");
+
+ MEMBER_OFFSET_INIT(module_core_text_size, "module",
+ "core_text_size");
+ MEMBER_OFFSET_INIT(module_init_text_size, "module",
+ "init_text_size");
+
+ MEMBER_OFFSET_INIT(module_module_core, "module",
+ "module_core");
+ MEMBER_OFFSET_INIT(module_module_init, "module",
+ "module_init");
+ } else {
+ ASSIGN_OFFSET(module_core_size) =
+ MEMBER_OFFSET("module", "core_layout") +
+ MEMBER_OFFSET("module_layout", "size");
+ ASSIGN_OFFSET(module_init_size) =
+ MEMBER_OFFSET("module", "init_layout") +
+ MEMBER_OFFSET("module_layout", "size");
+
+ ASSIGN_OFFSET(module_core_text_size) =
+ MEMBER_OFFSET("module", "core_layout") +
+ MEMBER_OFFSET("module_layout", "text_size");
+ ASSIGN_OFFSET(module_init_text_size) =
+ MEMBER_OFFSET("module", "init_layout") +
+ MEMBER_OFFSET("module_layout", "text_size");
+
+ ASSIGN_OFFSET(module_module_core) =
+ MEMBER_OFFSET("module", "core_layout") +
+ MEMBER_OFFSET("module_layout", "base");
+ ASSIGN_OFFSET(module_module_init) =
+ MEMBER_OFFSET("module", "init_layout") +
+ MEMBER_OFFSET("module_layout", "base");
+ }
+
MEMBER_OFFSET_INIT(module_percpu, "module", "percpu");
/*
--
2.3.0
8 years, 10 months
failed to do crash analysis after insert custom module in the first kernel.
by Liu, Jianbo (James)
Hi Experts:
Here is crash tool failed to work issue:
0, it occur on pwoerpc p2041(e500mc) board, p2020(e500v2) will not have this issue.
kernel version: 2.6.34
crash tool version: 6.1.4
1, before trigger kdump, if insert a custom kernel module, the created vmcore will failed to be analysis.
2, before trigger kdump, if no custom kernel module was inserted, the crash can worked well on created vmcore file.
Do you have some comments on this scene?
The error log when run crash tool is here listed for reference:
/*********************************************************************/
/coredump> ./crash.p4 vmlinux.host.20160108nokgdbserial vmcore-1970-01-01
crash.p4 6.1.4
Copyright (C) 2002-2013 Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation
Copyright (C) 1999-2006 Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited
Copyright (C) 2006, 2007 VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011 NEC Corporation
Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions. Enter "help copying" to see the conditions.
This program has absolutely no warranty. Enter "help warranty" for details.
GNU gdb (GDB) 7.3.1
Copyright (C) 2011 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "powerpc-wrs-linux"...
please wait... (gathering module symbol data)
crash.p4: invalid structure member offset: module_core_size
FILE: kernel.c LINE: 2976 FUNCTION: module_init()
[./crash.p4] error trace: 1006b48c => 100deff8 => 1011ac7c => 10067740
10067740: OFFSET_verify.part.27+76
1011ac7c: OFFSET_verify+76
100deff8: module_init+1576
1006b48c: main_loop+236
root@RCU-1:/mnt/userdir/coredump>
/*********************************************************************/
Best Regards,
James
8 years, 10 months
[BUG] ps -t pid
by John Blackwood
Hi Dave,
I just wanted to report a minor issue.
The -t option on the 'ps' command will return invalid values for newer
kernels.
I believe that this has to do with newer kernels around the 3.17
timeframe having the 'start_time' field in the task_struct structure
changed from being a 'struct timespec' to being a 'u64' value.
I'm using a 4.1 based kernel. (The 3.16 based kernel that I also tried
it on does not exhibit this issue.)
For example:
crash> ps -t 1
PID: 1 TASK: ffff88046d688000 CPU: 8 COMMAND: "systemd"
RUN TIME: 213503982286 days, 08:28:04
START TIME: 126000000
UTIME: 486536482
STIME: 8484707060
I'm afraid that I do not possess the skill to suggest a fix,
but I thought that I would mention it.
Thanks for reading this.
8 years, 10 months
答复: if it's normal to get into kdb shell when run crash
by Liu, Jianbo (James)
Hi Kumar:
Thanks for your detailed updates.
Very sorry for did not state clearly and bring confusing.
After look into more under your suggestion, this issue has no relation with crash tool.
Thanks very much for your times.
Best Regards,
James
Liu Jianbo | WIND RIVER | Senior Engineer - Technical Support
Tel 86 28 65318098 | Cell 86 13558641588 | Fax 86 28 65319983
Manage your support account:
https://support.windriver.com
Ask a Technical Question
https://ask.windriver.com
Submit a Service Request
https://windriver.force.com/support
________________________________
发件人: crash-utility-bounces(a)redhat.com [crash-utility-bounces(a)redhat.com] 代表 Buland Kumar Singh [6b65726e656c(a)gmail.com]
发送时间: 2015年12月30日 22:13
收件人: Discussion list for crash utility usage, maintenance and development
主题: Re: [Crash-utility] if it's normal to get into kdb shell when run crash
On 30 December 2015 at 16:34, Liu, Jianbo (James) <James.Liu(a)windriver.com<mailto:James.Liu@windriver.com>> wrote:
Hi Experts:
Sorry for disturbing you again.
When I run crash to debug vmcore, if kernel enable kgdb, it will get into kdb shell directly not crash shell, althrough I can get into crash via execute two go command in kdb shell, not sure if it's normal, do you have some comments on it?
When I disnable kgdb in kernel, there will not be this kind of issue, if there is some compatibility problem between kgdb and crash tool?
Thanks for your time and happen new year!!
/****************************************/
root@localhost:~/coredump>sh testcoredump.sh; sh startcoredump.sh
segment[0].mem:0x2000000 memsz:9183232
segment[1].mem:0x28c2000 memsz:65536
segment[2].mem:0x28d2000 memsz:4096
segment[3].mem:0x28d3000 memsz:28672
segment[4].mem:0x2ff5000 memsz:45056
SysRq : Trigger a crash
Entering kdb (current=0xdb17f900, pid 822) on processor 1 Oops: (null)
due to oops @ 0xc0348f64
NIP: c0348f64 LR: c034974c CTR: c0348f50
REGS: db18ddc0 TRAP: 0300 Not tainted (2.6.34.15-grsec-WR4.3.0.0_cgl)
MSR: 00021202 <ME,CE,DE> CR: 20242444 XER: 00000000
DEAR: 00000000, ESR: 00800000
TASK = db17f900[822] 'sh' THREAD: db18c000 CPU: 1
GPR00: 00000001 db18de70 db17f900 00000063 00000000 ffffffff c035e930 00000000
GPR08: 00008000 00000000 00000054 118c6000 20242444 100b84b8 100b0828 100b0904
GPR16: 00000000 00000000 00000000 1006d2d0 00000000 100cc220 1006b530 00000000
GPR24: 00000001 c0780d24 00029202 c0780e08 c0770000 00000000 c08578a4 00000063
NIP [c0348f64] sysrq_handle_crash+0x14/0x20
LR [c034974c] __handle_sysrq+0xcc/0x1d0
Call Trace:
[db18de70] [c0349734] __handle_sysrq+0xb4/0x1d0 (unreliable)
[db18dea0] [c03498ac] write_sysrq_trigger+0x5c/0x70
[db18deb0] [c01662f0] proc_reg_write+0x80/0xc0
[db18dee0] [c010ec50] vfs_write+0xc0/0x170
[db18df00] [c010ee80] sys_write+0x50/0x110
[db18df40] [c0011d58] ret_from_syscall+0x0/0x4
--- Exception: c01 at 0xff16ba4
LR = 0xfebad5c
Instruction dump:
[1]more>
39600003 7d605f9e 556b103a 7c005b78 98090003 4e800020 60000000 38000001
3d20c07a 90092170 7c0004ac 39200000 <98090000> 4e800020 60000000 3803ffd0
[1]kdb>
[1]kdb> go
Catastrophic error detected
kdb_continue_catastrophic=0, type go a second time if you really want to continue
[1]kdb> go
Catastrophic error detected
kdb_continue_catastrophic=0, attempting to continue
Oops: Kernel access of bad area, sig: 11 [#1]
PREEMPT SMP NR_CPUS=4 LTT NESTING LEVEL : 0
P2041 RDB
last sysfs file: /sys/devices/system/cpu/cpu3/crash_notes
Modules linked in:
NIP: c0348f64 LR: c034974c CTR: c0348f50
REGS: db18ddc0 TRAP: 0300 Not tainted (2.6.34.15-grsec-WR4.3.0.0_cgl)
MSR: 00021202 <ME,CE,DE> CR: 20242444 XER: 00000000
DEAR: 00000000, ESR: 00800000
TASK = db17f900[822] 'sh' THREAD: db18c000 CPU: 1
GPR00: 00000001 db18de70 db17f900 00000063 00000000 ffffffff c035e930 00000000
GPR08: 00008000 00000000 00000054 118c6000 20242444 100b84b8 100b0828 100b0904
GPR16: 00000000 00000000 00000000 1006d2d0 00000000 100cc220 1006b530 00000000
GPR24: 00000001 c0780d24 00029202 c0780e08 c0770000 00000000 c08578a4 00000063
NIP [c0348f64] sysrq_handle_crash+0x14/0x20
LR [c034974c] __handle_sysrq+0xcc/0x1d0
Call Trace:
[db18de70] [c0349734] __handle_sysrq+0xb4/0x1d0 (unreliable)
[db18dea0] [c03498ac] write_sysrq_trigger+0x5c/0x70
[db18deb0] [c01662f0] proc_reg_write+0x80/0xc0
[db18dee0] [c010ec50] vfs_write+0xc0/0x170
[db18df00] [c010ee80] sys_write+0x50/0x110
[db18df40] [c0011d58] ret_from_syscall+0x0/0x4
--- Exception: c01 at 0xff16ba4
LR = 0xfebad5c
Instruction dump:
39600003 7d605f9e 556b103a 7c005b78 98090003 4e800020 60000000 38000001
3d20c07a 90092170 7c0004ac 39200000 <98090000> 4e800020 60000000 3803ffd0
Sending IPI to other cpus...
Bye!
Using P2041 RDB machine description
......
/**********************/
Best Regards,
James
Liu Jianbo | WIND RIVER | Senior Engineer - Technical Support
Tel 86 28 65318098 | Cell 86 13558641588 | Fax 86 28 65319983
Manage your support account:
https://support.windriver.com
Ask a Technical Question
https://ask.windriver.com
Submit a Service Request
https://windriver.force.com/support
--
Crash-utility mailing list
Crash-utility(a)redhat.com<mailto:Crash-utility@redhat.com>
https://www.redhat.com/mailman/listinfo/crash-utility
Hello James,
crash and kgdb are two completely different debuggers to diagnose the kernel
panic issues. You can not access or start the crash from the context of kgdb.
[**crash utility:**]
There are two ways to invoke crash utility[[1]];
1) Typical postmortem debugging: (after panic)
# crash /path/to/vmcore /path/to/vmlinux
o Kernel object file and memory image are supplied, respectively.
2) Live memory debugging:
# crash
o Pre-defined directories are searched for proper vmlinux
o Version string matched to the running kernel (/proc/version)
**OR**
# crash /path/to/vmlinux
[**kdb/kgdb:**][[2]]
1) You can put the target system in debug mode using SysRq event (g).
# echo g > /proc/sysrq-trigger
Eg:
# echo g > /proc/sysrq-trigger
[ 181.300854] SysRq : DEBUG
Entering kdb (current=0xffff8800766ebc60, pid 2347) on processor 1 due to Keyboard Entry
[1]kdb> summary
sysname Linux
release 3.10.84
version #1 SMP Tue Jul 28 19:53:37 IST 2015
machine x86_64
nodename localhost.localdomain
domainname (none)
ccversion CCVERSION
date 2015-12-30 13:51:06 tz_minuteswest 0
uptime 00:03
load avg 0.11 0.11 0.04
MemTotal: 2050340 kB
MemFree: 1707332 kB
Buffers: 764 kB
1]kdb> dmesg | grep DMI:
<7>[ 0.000000] DMI: Red Hat KVM, BIOS 0.5.1 01/01/2007
[1]kdb> bt
Stack traceback for pid 2347
0xffff8800766ebc60 2347 624 1 1 R 0xffff8800766ec240 *bash
ffff88007bd61e70 0000000000000018
Call Trace:
<#DB> <<EOE>> [<ffffffff810dd84c>] ? sysrq_handle_dbg+0x2c/0x50
[<ffffffff8134b312>] ? __handle_sysrq+0xa2/0x170
[<ffffffff8134b80a>] ? write_sysrq_trigger+0x4a/0x50
[<ffffffff811fb61d>] ? proc_reg_write+0x3d/0x80
[<ffffffff811993bd>] ? vfs_write+0xbd/0x1e0
[<ffffffff81199d89>] ? SyS_write+0x49/0xa0
[<ffffffff815a27c7>] ? tracesys+0xdd/0xe2
[1]kdb> rd
ax: 0000000000000001 bx: ffffffff818a4560 cx: ffff88007fd0eec0
dx: 0000000000000000 si: 0000000000000000 di: 0000000000000067
bp: ffff88007bd61e70 sp: ffff88007bd61e70 r8: ffffffff81b9069c
r9: 0000000000000248 r10: 0000000000000247 r11: 0000000000000003
r12: 0000000000000067 r13: 0000000000000246 r14: 0000000000000007
r15: 0000000000000000 ip: ffffffff810dd7f4 flags: 00000002 cs: 00000010
ss: 00000018 ds: 00000018 es: 00000018 fs: 00000018 gs: 00000018
[1]kdb> go
[ 251.553055] systemd-journald[2372]: File /run/log/journal/71b46828837d4b1cb7f04385c86e7626/system.journal corrupted or uncleanly shut down, renaming and replacing.
# <<<---{ root user }
[Note:] kdb command "go" is used to continue kernel execution. In above case
it will take you back to bash shell.
2) You can crash the target system using SysRq event (c):
# echo c > /proc/sysrq-trigger
Eg:
# echo c > /proc/sysrq-trigger
[ 28.963227] SysRq : Trigger a crash
[ 28.964025] BUG: unable to handle kernel NULL pointer dereference at (null)
[ 28.964025] IP: [<ffffffff8134abb6>] sysrq_handle_crash+0x16/0x20
[ 28.964025] Oops: 0002 [#1] SMP
Entering kdb (current=0xffff880077275a90, pid 2352) on processor 1 Oops: (null)
due to oops @ 0xffffffff8134abb6
?dCPU: 1 PID: 2352 Comm: bash Not tainted 3.10.84 #1
?dHardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007
?dtask: ffff880077275a90 ti: ffff880076494000 task.ti: ffff880076494000
?dRIP: 0010:[<ffffffff8134abb6>] [<ffffffff8134abb6>] sysrq_handle_crash+0x16/0x20
?dRSP: 0018:ffff880076495e80 EFLAGS: 00010096
?dRAX: 000000000000000f RBX: ffffffff8190dbc0 RCX: ffff88007fd0eec0
?dRDX: 0000000000000000 RSI: ffff88007fd0d368 RDI: 0000000000000063
?dRBP: ffff880076495e80 R08: ffffffff81b9069c R09: 0000000000000245
?dR10: 0000000000000244 R11: 0000000000000003 R12: 0000000000000063
?dR13: 0000000000000246 R14: 0000000000000007 R15: 0000000000000000
?dFS: 00007f53aec0c740(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
?dCS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
?dCR2: 0000000000000000 CR3: 00000000764bd000 CR4: 00000000000006e0
?dDR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
?dDR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
?dStack:
ffff880076495eb8 ffffffff8134b312 0000000000000002 00007f53aec0a000
ffff880076495f50 0000000000000002 0000000000000000 ffff880076495ed8
ffffffff8134b80a 00007f53aec0a000 ffff88007661c000 ffff880076495ef8
?dCall Trace:
more>
Only 'q' or 'Q' are processed at more prompt, input ignored
?d [<ffffffff8134b312>] __handle_sysrq+0xa2/0x170
?d [<ffffffff8134b80a>] write_sysrq_trigger+0x4a/0x50
?d [<ffffffff811fb61d>] proc_reg_write+0x3d/0x80
?d [<ffffffff811993bd>] vfs_write+0xbd/0x1e0
?d [<ffffffff81199d89>] SyS_write+0x49/0xa0
?d [<ffffffff815a27c7>] tracesys+0xdd/0xe2
?dCode: eb 9b 45 01 f4 45 39 65 34 75 e5 4c 89 ef e8 02 f8 ff ff eb db 0f 1f 44 00 00 55 c7 05 20 17 54 00 01 00 00 00 48 89 e5 0f ae f8 <c6> 04 25 00 00 00 00 01 5d c3 0f 1f 44 00 00 55 31 c0 c7 05 7e
[1]kdb> summary
sysname Linux
release 3.10.84
version #1 SMP Tue Jul 28 19:53:37 IST 2015
machine x86_64
nodename localhost.localdomain
domainname (none)
ccversion CCVERSION
date 2015-12-30 14:07:49 tz_minuteswest 0
uptime 00:01
load avg 0.95 0.21 0.06
MemTotal: 2050340 kB
MemFree: 1775224 kB
Buffers: 764 kB
[1]kdb> bt
Stack traceback for pid 2352
0xffff880077275a90 2352 629 1 1 R 0xffff880077276070 *bash
ffff880076495e80 0000000000000018 ffff880076495eb8 ffffffff8134b312
0000000000000002 00007f53aec0a000 ffff880076495f50 0000000000000002
0000000000000000 ffff880076495ed8 ffffffff8134b80a 00007f53aec0a000
Call Trace:
[<ffffffff8134b312>] ? __handle_sysrq+0xa2/0x170
[<ffffffff8134b80a>] ? write_sysrq_trigger+0x4a/0x50
[<ffffffff811fb61d>] ? proc_reg_write+0x3d/0x80
[<ffffffff811993bd>] ? vfs_write+0xbd/0x1e0
[<ffffffff81199d89>] ? SyS_write+0x49/0xa0
[<ffffffff815a27c7>] ? tracesys+0xdd/0xe2
[1]kdb> rd
ax: 000000000000000f bx: ffffffff8190dbc0 cx: ffff88007fd0eec0
dx: 0000000000000000 si: 0000000000000000 di: 0000000000000063
bp: ffff880076495e80 sp: ffff880076495e80 r8: ffffffff81b9069c
r9: 0000000000000245 r10: 0000000000000244 r11: 0000000000000003
r12: 0000000000000063 r13: 0000000000000246 r14: 0000000000000007
r15: 0000000000000000 ip: ffffffff8134abb6 flags: 00010096 cs: 00000010
ss: 00000018 ds: 00000018 es: 00000018 fs: 00000018 gs: 00000018
[1]kdb> go
Catastrophic error detected
kdb_continue_catastrophic=0, type go a second time if you really want to continue
[1]kdb> go
Catastrophic error detected
kdb_continue_catastrophic=0, attempting to continue
[ 28.964025] Modules linked in: ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer snd serio_raw pcspkr i2c_piix4 virtio_balloon soundcore nfsd auth_rpcgss nfs_acl lockd sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi ata_piix cirrus syscopyarea sysfillrect sysimgblt drm_kms_helper ttm virtio_net virtio_blk drm libata virtio_pci virtio_ring virtio i2c_core floppy dm_mirror dm_region_hash dm_log dm_mod
[ 112.090263] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007
[ 112.090263] task: ffff880077275a90 ti: ffff880076494000 task.ti: ffff880076494000
[ 112.090263] RIP: 0010:[<ffffffff8134abb6>] [<ffffffff8134abb6>] sysrq_handle_crash+0x16/0x20
[ 112.090263] RSP: 0018:ffff880076495e80 EFLAGS: 00010096
[ 112.090263] RAX: 000000000000000f RBX: ffffffff8190dbc0 RCX: ffff88007fd0eec0
[ 112.090263] RDX: 0000000000000000 RSI: ffff88007fd0d368 RDI: 0000000000000063
[ 112.090263] RBP: ffff880076495e80 R08: ffffffff81b9069c R09: 0000000000000245
[ 112.090263] R10: 0000000000000244 R11: 0000000000000003 R12: 0000000000000063
[ 112.090263] R13: 0000000000000246 R14: 0000000000000007 R15: 0000000000000000
[ 112.090263] FS: 00007f53aec0c740(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
[ 112.090263] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 112.090263] CR2: 0000000000000000 CR3: 00000000764bd000 CR4: 00000000000006e0
[ 112.090263] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 112.104034] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 112.104034] Stack:
[ 112.104034] ffff880076495eb8 ffffffff8134b312 0000000000000002 00007f53aec0a000
[ 112.104034] ffff880076495f50 0000000000000002 0000000000000000 ffff880076495ed8
[ 112.104034] ffffffff8134b80a 00007f53aec0a000 ffff88007661c000 ffff880076495ef8
[ 112.104034] Call Trace:
[ 112.104034] [<ffffffff8134b312>] __handle_sysrq+0xa2/0x170
[ 112.104034] [<ffffffff8134b80a>] write_sysrq_trigger+0x4a/0x50
[ 112.104034] [<ffffffff811fb61d>] proc_reg_write+0x3d/0x80
[ 112.104034] [<ffffffff811993bd>] vfs_write+0xbd/0x1e0
[ 112.104034] [<ffffffff81199d89>] SyS_write+0x49/0xa0
[ 112.104034] [<ffffffff815a27c7>] tracesys+0xdd/0xe2
[ 112.104034] Code: eb 9b 45 01 f4 45 39 65 34 75 e5 4c 89 ef e8 02 f8 ff ff eb db 0f 1f 44 00 00 55 c7 05 20 17 54 00 01 00 00 00 48 89 e5 0f ae f8 <c6> 04 25 00 00 00 00 01 5d c3 0f 1f 44 00 00 55 31 c0 c7 05 7e
[ 112.104034] RIP [<ffffffff8134abb6>] sysrq_handle_crash+0x16/0x20
[ 112.104034] RSP <ffff880076495e80>
[ 112.104034] CR2: 0000000000000000
[ 112.104034] ---[ end trace 4484e44d0167e21c ]---
[ 112.104034] Kernel panic - not syncing: Fatal exception
PANIC: Fatal exception
Entering kdb (current=0xffff880077275a90, pid 2352) on processor 1 due to Keyboard Entry
[Note:] kdb command "go" is used to continue kernel execution. In above case
it will not take you back to bash shell. :)
Few questions for you;
[Q:1] What exactly you are trying to achieve ?
[Q:2] What is the exact issue ?
Regards,
BKS
[[1]] http://people.redhat.com/anderson/crash_whitepaper/#INVOCATION
[[2]] https://www.kernel.org/pub/linux/kernel/people/jwessel/kdb/
8 years, 10 months
Extensions: Dump log buffer of Intel Processor Trace
by Takao Indoh
Hi Dave,
The attached files are extension module to dump log buffer of Intel
Processor Trace from vmcore. Please consider placing this in the
extensions page.
[Overview of PT]
PT(Processor Trace) is a new feature of Intel CPU "Broadwell", it
captures information about program execution flow.[1]
Once Intel PT is enabled, the events which change program flow, like
branch instructions, exceptions, interruptions, traps and so on are
logged in the memory. This is very useful for debugging because we can
know the detailed behavior of software.
[About extension]
This extension retrieves log buff of PT from vmcore and saves it as a
file. 'ptdump' command can be used once this extension is loaded.
crash> extend extensions/ptdump.so
./extensions/ptdump.so: shared object loaded
crash> ptdump output_dir
[0] buffer dump: dump.0
[0] packet decode: decode.0
[1] buffer dump: dump.1
[1] packet decode: decode.1
(snipped)
In this case, output_dir directory is created, and then dump.N and
decode.N files are created in the directory for each cpus(N is cpu
number).
dump.N: raw data of PT
decode.N: result of PT packet decoder
dump.N is binary data and it is not human readable. decode.N is
generated by fastdecode[2], which is PT packet dumper created by Andi
Kleen. It's useful for checking what kinds of packets are included in
dump.N. I'll update extension using PT library(libipt[3]) to generate
more useful file for investigation.
[Build extension]
To build the module from the top-level crash-<version> directory, enter:
$ tar xvf ptdump-1.0.0.tar.gz
$ mv ptdump-1.0.0/* extensions
$ make extensions
[1] https://software.intel.com/en-us/blogs/2013/09/18/processor-tracing
[2] https://github.com/andikleen/simple-pt
[3] https://github.com/01org/processor-trace
Thanks,
Takao Indoh
8 years, 10 months
Crash tool support for 3level-64k and 4level-4K page tables on ARM64
by Azriel Samson
Hi David,
I recently tested makedumpfile on ARM64 with 3level-4K, 3level-64K and
4level-4K page tables.
I tried to analyze the dumpfile with the crash tool.
I noticed that with 3level-64K and 4level-4K I got the following error
when I tried to run "kmem -f" :
kmem: invalid kernel virtual address: ffff************ type: "first
list entry"
I did not see the above error with 3level-4K. Also, on all three
combinations, other commands (bt,dmesg etc) ran fine.
Are there any plans to add support to the crash tool for these
additional kernel configurations?
--
Thanks,
Azriel Samson
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
8 years, 10 months
[PATCH] arm64: support compat user mode prstatus
by Andrew Jones
compat user mode prstatus already just works, almost. This missing
pieces are that pt_regs->sp and pt_regs->fp are not in their usual
locations. We need to pull them out of their architecturally mapped
general purpose registers.
---
arm64.c | 42 ++++++++++++++++++++++++++++++++++--------
1 file changed, 34 insertions(+), 8 deletions(-)
diff --git a/arm64.c b/arm64.c
index 183e768498fe8..3a82d20cdd465 100644
--- a/arm64.c
+++ b/arm64.c
@@ -993,6 +993,25 @@ arm64_stackframe_init(void)
#define PSR_MODE_EL3h 0x0000000d
#define PSR_MODE_MASK 0x0000000f
+/* Architecturally defined mapping between AArch32 and AArch64 registers */
+#define compat_usr(x) regs[(x)]
+#define compat_fp regs[11]
+#define compat_sp regs[13]
+#define compat_lr regs[14]
+
+#define user_mode(ptregs) \
+ (((ptregs)->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t)
+
+#define compat_user_mode(ptregs) \
+ (((ptregs)->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) == \
+ (PSR_MODE32_BIT | PSR_MODE_EL0t))
+
+#define user_stack_pointer(ptregs) \
+ (!compat_user_mode(ptregs) ? (ptregs)->sp : (ptregs)->compat_sp)
+
+#define user_frame_pointer(ptregs) \
+ (!compat_user_mode(ptregs) ? (ptregs)->regs[29] : (ptregs)->compat_fp)
+
static int
arm64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr)
{
@@ -1340,21 +1359,28 @@ arm64_get_dumpfile_stackframe(struct bt_info *bt, struct arm64_stackframe *frame
struct machine_specific *ms = machdep->machspec;
struct arm64_pt_regs *ptregs;
- if (!ms->panic_task_regs ||
- (!ms->panic_task_regs[bt->tc->processor].sp &&
- !ms->panic_task_regs[bt->tc->processor].pc)) {
+ if (!ms->panic_task_regs || !ms->panic_task_regs[bt->tc->processor].pc) {
bt->flags |= BT_REGS_NOT_FOUND;
return FALSE;
}
ptregs = &ms->panic_task_regs[bt->tc->processor];
- frame->sp = ptregs->sp;
frame->pc = ptregs->pc;
- frame->fp = ptregs->regs[29];
-
- if (!is_kernel_text(frame->pc) &&
- in_user_stack(bt->tc->task, frame->sp))
+ if (user_mode(ptregs)) {
+ frame->sp = user_stack_pointer(ptregs);
+ frame->fp = user_frame_pointer(ptregs);
+ if (is_kernel_text(frame->pc) ||
+ !in_user_stack(bt->tc->task, frame->sp)) {
+ error(WARNING, "Corrupt prstatus? pstate=0x%lx, but no user frame found\n",
+ ptregs->pstate);
+ bt->flags |= BT_REGS_NOT_FOUND;
+ return FALSE;
+ }
bt->flags |= BT_USER_SPACE;
+ } else {
+ frame->sp = ptregs->sp;
+ frame->fp = ptregs->regs[29];
+ }
if (arm64_in_kdump_text(bt, frame))
bt->flags |= BT_KDUMP_ADJUST;
--
1.8.3.1
8 years, 10 months
[PATCH] x86_64: Fix that Particular kvaddr is converted to wrong paddr (RHEL6 x86_64)
by Nakajima Akira
I didn't check XEN HYPER MODE, I don't have XEN.
If we need similar statement "if (kvaddr < MODULES_END)"
please add inside in "if (XEN_HYPER_MODE())" (1859@x86_64_kvtop)
>From ed300b74998e0923313e4fd14b9a41e305942b44 Mon Sep 17 00:00:00 2001
From: Nakajima Akira <nakajima.akira(a)nttcom.co.jp>
Date: Tue, 22 Dec 2015 15:46:42 +0900
Subject: [PATCH] Fix that particular kvaddr is converted to wrong paddr
BUG INFO
Particular kvaddr is converted to wrong paddr.
You can see this bug on RHEL6_x86_64. (at present only RHEL6)
(I checked RHEL5, RHEL7, Fedora21, Fedora23)
from /proc/kallsyms
ffffffffff6008c0 D __jiffies
/////////// wrong ///////////
crash> vtop ffffffffff6008c0
VIRTUAL PHYSICAL
ffffffffff6008c0 7f6008c0
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea00000623b8 1c11000 0 0 1 20000000000400 reserved
crash> rd ffffffffff6008c0
ffffffffff6008c0: 0000000000000000 ........
/////////// correct ///////////
crash> vtop ffffffffff6008c0
VIRTUAL PHYSICAL
ffffffffff6008c0 1c118c0
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea00000623b8 1c11000 0 0 1 20000000000400 reserved
crash> rd ffffffffff6008c0
ffffffffff6008c0: 00000000ffffe43a :.......
Reported-by: Nakajima Akira <nakajima.akira(a)nttcom.co.jp>
Signed-off-by: Nakajima Akira <nakajima.akira(a)nttcom.co.jp>
---
x86_64.c | 28 +++++++++++++++-------------
1 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/x86_64.c b/x86_64.c
index ff6fdd5..dab4d43 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -1872,19 +1872,21 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo
fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4);
}
} else {
- if (!vt->vmalloc_start) {
- *paddr = x86_64_VTOP(kvaddr);
- return TRUE;
- }
-
- if (!IS_VMALLOC_ADDR(kvaddr)) {
- *paddr = x86_64_VTOP(kvaddr);
- if (!verbose)
- return TRUE;
- }
-
- if (XEN() && (kt->xen_flags & WRITABLE_PAGE_TABLES))
- return (x86_64_kvtop_xen_wpt(tc, kvaddr, paddr, verbose));
+ if (kvaddr < MODULES_END) {
+ if (!vt->vmalloc_start) {
+ *paddr = x86_64_VTOP(kvaddr);
+ return TRUE;
+ }
+
+ if (!IS_VMALLOC_ADDR(kvaddr)) {
+ *paddr = x86_64_VTOP(kvaddr);
+ if (!verbose)
+ return TRUE;
+ }
+
+ if (XEN() && (kt->xen_flags & WRITABLE_PAGE_TABLES))
+ return (x86_64_kvtop_xen_wpt(tc, kvaddr, paddr, verbose));
+ }
/*
* pgd = pgd_offset_k(addr);
--
1.7.1
8 years, 10 months