In addition, there are a few places in ppc64.c which also uses
STACK_FRAME_OVERHEAD, mainly dealing with hwintrstack and exception
frame. So they need to be updated as well. And also the
EXCP_FRAME_MARKER for determining the exception frame is a little bit
outdated, better to be fixed separately to this patch.
Please ignore this patch, and review the patchset [1] instead. The
mentioned issues above are all addressed in [1].
[1]:
On Fri, Sep 13, 2024 at 6:51 PM Tao Liu <ltao(a)redhat.com>
wrote:
>
> A error stack trace of bt cmd observed:
>
> crash> bt 1
> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
> #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable)
> #1 [c000000003773770] __switch_to at c00000000001fa9c
> #2 [c0000000037737d0] __schedule at c00000000112e4ec
> #3 [c0000000037738b0] schedule at c00000000112ea80
> ...
>
> The #0 stack trace is incorrect, the function address shouldn't exceed _end.
> The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from
> STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For
> CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will read
a
> wrong value from stack when ABI_V2 enabled.
>
> To determine if ABI_V2 enabled is tricky. This patch do it by check the
> following:
>
> In arch/powerpc/include/asm/ppc_asm.h:
> #ifdef CONFIG_PPC64_ELF_ABI_V2
> #define STK_GOT 24
> #else
> #define STK_GOT 40
>
> In arch/powerpc/kernel/tm.S:
> _GLOBAL(tm_reclaim)
> mfcr r5
> mflr r0
> stw r5, 8(r1)
> std r0, 16(r1)
> std r2, STK_GOT(r1)
> ...
>
> So a disassemble on tm_reclaim, and extract the STK_GOT value from std
> instruction is used as the approach.
>
> After the patch:
> crash> bt 1
> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
> #0 [c0000000037737d0] __schedule at c00000000112e4ec
> #1 [c0000000037738b0] schedule at c00000000112ea80
> ...
>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
>
> This patch is the follow-up of gdb stack unwinding support v7 discussion[1],
> where a "gdb bt" fail is observed. After applying the patch, the "gdb
bt" can
> also work normal for gdb stack unwinding support v7.
>
> [1]:
https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01120.html
>
> ---
> defs.h | 1 +
> ppc64.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 2231cb6..d5cb8cc 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t {
> #define MSR_PR_LG 14 /* Problem State / Privilege Level */
> /* Used to find the user or kernel-mode frame*/
>
> +#define STACK_SWITCH_FRAME_REGS 48
> #define STACK_FRAME_OVERHEAD 112
> #define EXCP_FRAME_MARKER 0x7265677368657265
>
> diff --git a/ppc64.c b/ppc64.c
> index e8930a1..5e2595e 100644
> --- a/ppc64.c
> +++ b/ppc64.c
> @@ -2813,6 +2813,51 @@ ppc64_get_sp(ulong task)
> return sp;
> }
>
> +static bool
> +is_ppc64_elf_abi_v2(void)
> +{
> + char buf1[BUFSIZE];
> + char *pos1, *pos2;
> + int errflag = 0;
> + ulong stk_got = 0;
> + static bool ret = false;
> + static bool checked = false;
> +
> + if (checked == true || !symbol_exists("tm_reclaim"))
> + return ret;
> +
> + sprintf(buf1, "x/16i tm_reclaim");
> + open_tmpfile();
> + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR))
> + goto out;
> + checked = true;
> + rewind(pc->tmpfile);
> + while (fgets(buf1, BUFSIZE, pc->tmpfile)) {
> + // "std r2, STK_GOT(r1)" is expected
> + if (strstr(buf1, "std") &&
> + strstr(buf1, "(r1)") &&
> + (pos1 = strstr(buf1, "r2,"))) {
> + pos1 += strlen("r2,");
> + for (pos2 = pos1; *pos2 != '\0' && *pos2 !=
'('; pos2++);
> + *pos2 = '\0';
> + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag);
> + break;
> + }
> + }
> +
> + if (!errflag) {
> + switch (stk_got) {
> + case 24:
> + ret = true;
> + case 40:
> + goto out;
> + }
> + }
> + error(WARNING, "Unstable elf_abi v1/v2 detection.\n");
> +out:
> + close_tmpfile();
> + return ret;
> +}
>
> /*
> * get the SP and PC values for idle tasks.
> @@ -2834,9 +2879,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong
*getsp)
> sp = ppc64_get_sp(task);
> if (!INSTACK(sp, bt))
> goto out;
> - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
> - sizeof(struct ppc64_pt_regs),
> - "PPC64 pt_regs", FAULT_ON_ERROR);
> +
> + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2())
{
> + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s,
> + sizeof(struct ppc64_pt_regs),
> + "PPC64 pt_regs", FAULT_ON_ERROR);
> + } else {
> + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
> + sizeof(struct ppc64_pt_regs),
> + "PPC64 pt_regs", FAULT_ON_ERROR);
> + }
> +
> ip = regs.nip;
> closest = closest_symbol(ip);
> if (STREQ(closest, ".__switch_to") || STREQ(closest,
"__switch_to")) {
> --
> 2.40.1
>