A error stack trace of bt cmd observed:
crash> bt 1
PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
#0 [c0000000037735c0] _end at c0000000037154b0 (unreliable)
#1 [c000000003773770] __switch_to at c00000000001fa9c
#2 [c0000000037737d0] __schedule at c00000000112e4ec
#3 [c0000000037738b0] schedule at c00000000112ea80
...
The #0 stack trace is incorrect, the function address shouldn't exceed _end.
The reason is for kernel commit cd52414d5a6c ("powerpc/64: ELFv2 use
minimal stack frames in int and switch frame sizes"), the offset of pt_regs
to sp changed from STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS.
For CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will
read a wrong value from stack when ABI_V2 enabled.
After the patch:
crash> bt 1
PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
#0 [c0000000037737d0] __schedule at c00000000112e4ec
#1 [c0000000037738b0] schedule at c00000000112ea80
...
Signed-off-by: Tao Liu <ltao(a)redhat.com>
Suggested-by: Aditya Gupta <adityag(a)linux.ibm.com>
---
defs.h | 3 ++-
netdump.c | 14 ++++++++++----
ppc64.c | 34 +++++++++++++++++++++++++++++++---
symbols.c | 5 +++--
4 files changed, 46 insertions(+), 10 deletions(-)
diff --git a/defs.h b/defs.h
index e2a9278..abde47c 100644
--- a/defs.h
+++ b/defs.h
@@ -4650,6 +4650,7 @@ struct efi_memory_desc_t {
#define MSR_PR_LG 14 /* Problem State / Privilege Level */
/* Used to find the user or kernel-mode frame*/
+#define STACK_SWITCH_FRAME_REGS 48
#define STACK_FRAME_OVERHEAD 112
#define EXCP_FRAME_MARKER 0x7265677368657265
@@ -5770,7 +5771,7 @@ void dump_offset_table(char *, ulong);
int is_elf_file(char *);
int is_kernel(char *);
int is_shared_object(char *);
-int file_elf_version(char *);
+int file_elf_header(char *, char *);
int is_system_map(char *);
int is_compressed_kernel(char *, char **);
int select_namelist(char *);
diff --git a/netdump.c b/netdump.c
index b4e2a5c..8fdc530 100644
--- a/netdump.c
+++ b/netdump.c
@@ -646,11 +646,11 @@ resize_elf_header(int fd, char *file, char **eheader_ptr, char
**sect0_ptr,
}
/*
- * Return the e_version number of an ELF file
+ * Return the e_version & e_flags number of an ELF file
* (or -1 if its not readable ELF file)
*/
int
-file_elf_version(char *file)
+file_elf_header(char *file, char *member)
{
int fd, size;
Elf32_Ehdr *elf32;
@@ -680,11 +680,17 @@ file_elf_version(char *file)
(elf32->e_ident[EI_CLASS] == ELFCLASS32) &&
(elf32->e_ident[EI_DATA] == ELFDATA2LSB) &&
(elf32->e_ident[EI_VERSION] == EV_CURRENT)) {
- return (elf32->e_version);
+ if (STRNEQ(member, "e_version"))
+ return (elf32->e_version);
+ else if (STRNEQ(member, "e_flags"))
+ return (elf32->e_flags);
} else if (STRNEQ(elf64->e_ident, ELFMAG) &&
(elf64->e_ident[EI_CLASS] == ELFCLASS64) &&
(elf64->e_ident[EI_VERSION] == EV_CURRENT)) {
- return (elf64->e_version);
+ if (STRNEQ(member, "e_version"))
+ return (elf64->e_version);
+ else if (STRNEQ(member, "e_flags"))
+ return (elf64->e_flags);
}
return -1;
diff --git a/ppc64.c b/ppc64.c
index e8930a1..87f9c4c 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud);
static ulong pmd_page_vaddr_l4(ulong pmd);
static int is_opal_context(ulong sp, ulong nip);
void opalmsg(void);
+static bool is_ppc64_elf_abi_v2(void);
static int is_opal_context(ulong sp, ulong nip)
{
@@ -2813,6 +2814,25 @@ ppc64_get_sp(ulong task)
return sp;
}
+static bool
+is_ppc64_elf_abi_v2(void)
+{
+ static bool ret = false;
+ static bool checked = false;
+
+ if (checked)
+ return ret;
+ switch (file_elf_header(pc->namelist, "e_flags")) {
+ case 2:
+ ret = true;
+ case 1:
+ break;
+ default:
+ error(WARNING, "Unstable elf_abi v1/v2 detection.\n");
+ }
+ checked = true;
+ return ret;
+}
/*
* get the SP and PC values for idle tasks.
@@ -2834,9 +2854,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp)
sp = ppc64_get_sp(task);
if (!INSTACK(sp, bt))
goto out;
- readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
- sizeof(struct ppc64_pt_regs),
- "PPC64 pt_regs", FAULT_ON_ERROR);
+
+ if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) {
+ readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s,
+ sizeof(struct ppc64_pt_regs),
+ "PPC64 pt_regs", FAULT_ON_ERROR);
+ } else {
+ readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
+ sizeof(struct ppc64_pt_regs),
+ "PPC64 pt_regs", FAULT_ON_ERROR);
+ }
+
ip = regs.nip;
closest = closest_symbol(ip);
if (STREQ(closest, ".__switch_to") || STREQ(closest, "__switch_to"))
{
diff --git a/symbols.c b/symbols.c
index d00fbd7..840e455 100644
--- a/symbols.c
+++ b/symbols.c
@@ -216,7 +216,7 @@ symtab_init(void)
* Check whether the namelist is a kerntypes file built by
* dwarfextract, which places a magic number in e_version.
*/
- if (file_elf_version(pc->namelist) == EV_DWARFEXTRACT)
+ if (file_elf_header(pc->namelist, "e_version") == EV_DWARFEXTRACT)
pc->flags |= KERNTYPES;
if (pc->flags & SYSMAP) {
@@ -13005,7 +13005,8 @@ load_module_symbols(char *modref, char *namelist, ulong
base_addr)
error(FATAL, "cannot determine object file format: %s\n",
namelist);
- if (LKCD_KERNTYPES() && (file_elf_version(namelist) == EV_DWARFEXTRACT))
+ if (LKCD_KERNTYPES() &&
+ (file_elf_header(namelist, "e_version") == EV_DWARFEXTRACT))
goto add_symbols; /* no symbols, add the debuginfo */
if (!(bfd_get_file_flags(mbfd) & HAS_SYMS))
--
2.46.2