A error stack trace of bt cmd observed:
crash> bt 1
PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
#0 [c0000000037735c0] _end at c0000000037154b0 (unreliable)
#1 [c000000003773770] __switch_to at c00000000001fa9c
#2 [c0000000037737d0] __schedule at c00000000112e4ec
#3 [c0000000037738b0] schedule at c00000000112ea80
...
The #0 stack trace is incorrect, the function address shouldn't exceed _end.
The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from
STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For
CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will read a
wrong value from stack when ABI_V2 enabled.
To determine if ABI_V2 enabled is tricky. This patch do it by check the
following:
In arch/powerpc/include/asm/ppc_asm.h:
#ifdef CONFIG_PPC64_ELF_ABI_V2
#define STK_GOT 24
#else
#define STK_GOT 40
In arch/powerpc/kernel/tm.S:
_GLOBAL(tm_reclaim)
mfcr r5
mflr r0
stw r5, 8(r1)
std r0, 16(r1)
std r2, STK_GOT(r1)
...
So a disassemble on tm_reclaim, and extract the STK_GOT value from std
instruction is used as the approach.
After the patch:
crash> bt 1
PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
#0 [c0000000037737d0] __schedule at c00000000112e4ec
#1 [c0000000037738b0] schedule at c00000000112ea80
...
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
defs.h | 1 +
ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index 2231cb6..d5cb8cc 100644
--- a/defs.h
+++ b/defs.h
@@ -4643,6 +4643,7 @@ struct efi_memory_desc_t {
#define MSR_PR_LG 14 /* Problem State / Privilege Level */
/* Used to find the user or kernel-mode frame*/
+#define STACK_SWITCH_FRAME_REGS 48
#define STACK_FRAME_OVERHEAD 112
#define EXCP_FRAME_MARKER 0x7265677368657265
diff --git a/ppc64.c b/ppc64.c
index e8930a1..6e5f155 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud);
static ulong pmd_page_vaddr_l4(ulong pmd);
static int is_opal_context(ulong sp, ulong nip);
void opalmsg(void);
+static bool is_ppc64_elf_abi_v2(void);
static int is_opal_context(ulong sp, ulong nip)
{
@@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task)
return sp;
}
+static bool
+is_ppc64_elf_abi_v2(void)
+{
+ char buf1[BUFSIZE];
+ char *pos1, *pos2;
+ int errflag = 0;
+ ulong stk_got = 0;
+ static bool ret = false;
+ static bool checked = false;
+
+ if (checked == true || !symbol_exists("tm_reclaim"))
+ return ret;
+
+ sprintf(buf1, "x/16i tm_reclaim");
+ open_tmpfile();
+ if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR))
+ goto out;
+ checked = true;
+ rewind(pc->tmpfile);
+ while (fgets(buf1, BUFSIZE, pc->tmpfile)) {
+ // "std r2, STK_GOT(r1)" is expected
+ if (strstr(buf1, "std") &&
+ strstr(buf1, "(r1)") &&
+ (pos1 = strstr(buf1, "r2,"))) {
+ pos1 += strlen("r2,");
+ for (pos2 = pos1; *pos2 != '\0' && *pos2 != '('; pos2++);
+ *pos2 = '\0';
+ stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag);
+ break;
+ }
+ }
+
+ if (!errflag) {
+ switch (stk_got) {
+ case 24:
+ ret = true;
+ case 40:
+ goto out;
+ }
+ }
+ error(WARNING, "Unstable elf_abi v1/v2 detection.\n");
+out:
+ close_tmpfile();
+ return ret;
+}
/*
* get the SP and PC values for idle tasks.
@@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp)
sp = ppc64_get_sp(task);
if (!INSTACK(sp, bt))
goto out;
- readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
- sizeof(struct ppc64_pt_regs),
- "PPC64 pt_regs", FAULT_ON_ERROR);
+
+ if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) {
+ readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s,
+ sizeof(struct ppc64_pt_regs),
+ "PPC64 pt_regs", FAULT_ON_ERROR);
+ } else {
+ readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
+ sizeof(struct ppc64_pt_regs),
+ "PPC64 pt_regs", FAULT_ON_ERROR);
+ }
+
ip = regs.nip;
closest = closest_symbol(ip);
if (STREQ(closest, ".__switch_to") || STREQ(closest, "__switch_to"))
{
--
2.40.1