On Fri, May 31, 2024 at 5:33 PM <devel-request@lists.crash-utility.osci.io> wrote:
Date: Fri, 31 May 2024 17:19:31 +0800
From: Tao Liu <ltao@redhat.com>
Subject: [Crash-utility] [PATCH v4 08/16] ppc64: correct gdb
        passthroughs by implementing machdep->get_cpu_reg
To: devel@lists.crash-utility.osci.io
Cc: Mahesh J Salgaonkar <mahesh@linux.ibm.com>, "Naveen N . Rao"
        <naveen.n.rao@linux.vnet.ibm.com>, Lianbo Jiang <lijiang@redhat.com>,
        Alexey Makhalov <alexey.makhalov@broadcom.com>
Message-ID: <20240531091939.97828-9-ltao@redhat.com>
Content-Type: text/plain; charset=UTF-8

From: Aditya Gupta <adityag@linux.ibm.com>

Currently, gdb passthroughs of 'bt', 'frame', 'up', 'down', 'info
locals' don't work. This is due to gdb not knowing the register values to
unwind the stack frames

Every gdb passthrough goes through `gdb_interface`. And then, gdb expects
`crash_target::fetch_registers` to give it the register values, which is
dependent on `machdep->get_cpu_reg` to read the register values for
specific architecture.

                                      ----------------------------
           gdb passthrough (eg. "bt") |                          |
   crash   -------------------------> |                          |
                                      |      gdb_interface       |
                                      |                          |
                                      |                          |
                                      |  ----------------------  |
                 fetch_registers      |  |                    |  |
crash_target<-------------------------+--|        gdb         |  |
            --------------------------+->|                    |  |
              Registers (SP,NIP, etc.)|  |                    |  |
                                      |  |                    |  |
                                      |  ----------------------  |
                                      ----------------------------

Implement `machdep->get_cpu_reg` on PPC64, so that crash provides the
register values to gdb to unwind stack frames properly

With these changes, on powerpc, 'bt' command output in gdb mode, will look
like this:

    gdb> bt
    #0  0xc0000000002a53e8 in crash_setup_regs (oldregs=<optimized out>, newregs=0xc00000000486f8d8) at ./arch/powerpc/include/asm/kexec.h:69
    #1  __crash_kexec (regs=<optimized out>) at kernel/kexec_core.c:974
    #2  0xc000000000168918 in panic (fmt=<optimized out>) at kernel/panic.c:358
    #3  0xc000000000b735f8 in sysrq_handle_crash (key=<optimized out>) at drivers/tty/sysrq.c:155
    #4  0xc000000000b742cc in __handle_sysrq (key=key@entry=99, check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:602
    #5  0xc000000000b7506c in write_sysrq_trigger (file=<optimized out>, buf=<optimized out>, count=2, ppos=<optimized out>) at drivers/tty/sysrq.c:1163
    #6  0xc00000000069a7bc in pde_write (ppos=<optimized out>, count=<optimized out>, buf=<optimized out>, file=<optimized out>, pde=0xc000000009ed3a80) at fs/proc/inode.c:340
    #7  proc_reg_write (file=<optimized out>, buf=<optimized out>, count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:352
    #8  0xc0000000005b3bbc in vfs_write (file=file@entry=0xc00000009dda7d00, buf=buf@entry=0xebcfc7c6040 <error: Cannot access memory at address 0xebcfc7c6040>, count=count@entry=2, pos=pos@entry=0xc00000000486fda0) at fs/read_write.c:582

instead of earlier output without this patch:

    gdb> bt
    #0  <unavailable> in ?? ()
    Backtrace stopped: previous frame identical to this frame (corrupt stack?)

Also, 'get_dumpfile_regs' has been introduced to get registers from
multiple supported vmcore formats. Correspondingly a flag 'BT_NO_PRINT_REGS'
has been introduced to tell helper functions to get registers, to not
print registers with every call to backtrace in gdb.

 Note: This feature to support GDB unwinding doesn't support live debugging

Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Mahesh J Salgaonkar <mahesh@linux.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Lianbo Jiang <lijiang@redhat.com>
Cc: HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab@nec.com>
Cc: Tao Liu <ltao@redhat.com>
Cc: Alexey Makhalov <alexey.makhalov@broadcom.com>
Improved-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Aditya Gupta <adityag@linux.ibm.com>
---
 defs.h   | 123 +++++++++++++++++++++++++++++++++++++++++
 kernel.c |  33 +++++++++++
 ppc64.c  | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 312 insertions(+), 7 deletions(-)

diff --git a/defs.h b/defs.h
index c0e6a29..8450aea 100644
--- a/defs.h
+++ b/defs.h
@@ -982,6 +982,7 @@ struct bt_info {
        ulong eframe_ip;
        ulong radix;
        ulong *cpumask;
+       bool need_free;
 
Comment on it later.
 
 };

 #define STACK_OFFSET_TYPE(OFF) \
@@ -6096,6 +6097,7 @@ int load_module_symbols_helper(char *);
 void unlink_module(struct load_module *);
 int check_specified_module_tree(char *, char *);
 int is_system_call(char *, ulong);
+void get_dumpfile_regs(struct bt_info*, ulong*, ulong*);
 void generic_dump_irq(int);
 void generic_get_irq_affinity(int);
 void generic_show_interrupts(int, ulong *);
@@ -6195,6 +6197,7 @@ ulong cpu_map_addr(const char *type);
 #define BT_REGS_NOT_FOUND (0x4000000000000ULL)
 #define BT_OVERFLOW_STACK (0x8000000000000ULL)
 #define BT_SKIP_IDLE     (0x10000000000000ULL)
+#define BT_NO_PRINT_REGS (0x20000000000000ULL)
 #define BT_SYMBOL_OFFSET   (BT_SYMBOLIC_ARGS)

 #define BT_REF_HEXVAL         (0x1)
@@ -8052,6 +8055,126 @@ enum x86_64_regnum {
         LAST_REGNUM
 };

+/*
+ * Register numbers to make crash_target->fetch_registers()
+ * ---> machdep->get_current_task_reg() work properly.
+ *
+ *  These register numbers and names are given according to output of
+ *  `rs6000_register_name`, because that is what was being used by
+ *  crash_target::fetch_registers in case of PPC64
+ */
+enum ppc64_regnum {
+       PPC64_R0_REGNUM = 0,
+       PPC64_R1_REGNUM,
+       PPC64_R2_REGNUM,
+       PPC64_R3_REGNUM,
+       PPC64_R4_REGNUM,
+       PPC64_R5_REGNUM,
+       PPC64_R6_REGNUM,
+       PPC64_R7_REGNUM,
+       PPC64_R8_REGNUM,
+       PPC64_R9_REGNUM,
+       PPC64_R10_REGNUM,
+       PPC64_R11_REGNUM,
+       PPC64_R12_REGNUM,
+       PPC64_R13_REGNUM,
+       PPC64_R14_REGNUM,
+       PPC64_R15_REGNUM,
+       PPC64_R16_REGNUM,
+       PPC64_R17_REGNUM,
+       PPC64_R18_REGNUM,
+       PPC64_R19_REGNUM,
+       PPC64_R20_REGNUM,
+       PPC64_R21_REGNUM,
+       PPC64_R22_REGNUM,
+       PPC64_R23_REGNUM,
+       PPC64_R24_REGNUM,
+       PPC64_R25_REGNUM,
+       PPC64_R26_REGNUM,
+       PPC64_R27_REGNUM,
+       PPC64_R28_REGNUM,
+       PPC64_R29_REGNUM,
+       PPC64_R30_REGNUM,
+       PPC64_R31_REGNUM,
+
+       PPC64_F0_REGNUM = 32,
+       PPC64_F1_REGNUM,
+       PPC64_F2_REGNUM,
+       PPC64_F3_REGNUM,
+       PPC64_F4_REGNUM,
+       PPC64_F5_REGNUM,
+       PPC64_F6_REGNUM,
+       PPC64_F7_REGNUM,
+       PPC64_F8_REGNUM,
+       PPC64_F9_REGNUM,
+       PPC64_F10_REGNUM,
+       PPC64_F11_REGNUM,
+       PPC64_F12_REGNUM,
+       PPC64_F13_REGNUM,
+       PPC64_F14_REGNUM,
+       PPC64_F15_REGNUM,
+       PPC64_F16_REGNUM,
+       PPC64_F17_REGNUM,
+       PPC64_F18_REGNUM,
+       PPC64_F19_REGNUM,
+       PPC64_F20_REGNUM,
+       PPC64_F21_REGNUM,
+       PPC64_F22_REGNUM,
+       PPC64_F23_REGNUM,
+       PPC64_F24_REGNUM,
+       PPC64_F25_REGNUM,
+       PPC64_F26_REGNUM,
+       PPC64_F27_REGNUM,
+       PPC64_F28_REGNUM,
+       PPC64_F29_REGNUM,
+       PPC64_F30_REGNUM,
+       PPC64_F31_REGNUM,
+
+       PPC64_PC_REGNUM = 64,
+       PPC64_MSR_REGNUM = 65,
+       PPC64_CR_REGNUM = 66,
+       PPC64_LR_REGNUM = 67,
+       PPC64_CTR_REGNUM = 68,
+       PPC64_XER_REGNUM = 69,
+       PPC64_FPSCR_REGNUM = 70,
+
+       PPC64_VR0_REGNUM = 106,
+       PPC64_VR1_REGNUM,
+       PPC64_VR2_REGNUM,
+       PPC64_VR3_REGNUM,
+       PPC64_VR4_REGNUM,
+       PPC64_VR5_REGNUM,
+       PPC64_VR6_REGNUM,
+       PPC64_VR7_REGNUM,
+       PPC64_VR8_REGNUM,
+       PPC64_VR9_REGNUM,
+       PPC64_VR10_REGNUM,
+       PPC64_VR11_REGNUM,
+       PPC64_VR12_REGNUM,
+       PPC64_VR13_REGNUM,
+       PPC64_VR14_REGNUM,
+       PPC64_VR15_REGNUM,
+       PPC64_VR16_REGNUM,
+       PPC64_VR17_REGNUM,
+       PPC64_VR18_REGNUM,
+       PPC64_VR19_REGNUM,
+       PPC64_VR20_REGNUM,
+       PPC64_VR21_REGNUM,
+       PPC64_VR22_REGNUM,
+       PPC64_VR23_REGNUM,
+       PPC64_VR24_REGNUM,
+       PPC64_VR25_REGNUM,
+       PPC64_VR26_REGNUM,
+       PPC64_VR27_REGNUM,
+       PPC64_VR28_REGNUM,
+       PPC64_VR29_REGNUM,
+       PPC64_VR30_REGNUM,
+       PPC64_VR31_REGNUM,
+
+       PPC64_VSCR_REGNUM = 138,
+       PPC64_VRSAVE_REGNU = 139
+};
+
 
Is it possible to move the regnum definitions to ppc64.c? I did not see them used in other modules. But, not sure if there are any compilation issues, I haven't tried it.

 /* crash_target.c */
 extern int gdb_change_thread_context ();

diff --git a/kernel.c b/kernel.c
index 78b7b1e..3730c55 100644
--- a/kernel.c
+++ b/kernel.c
@@ -3533,6 +3533,39 @@ get_lkcd_regs(struct bt_info *bt, ulong *eip, ulong *esp)
        machdep->get_stack_frame(bt, eip, esp);
 }

+void
+get_dumpfile_regs(struct bt_info *bt, ulong *eip, ulong *esp)
+{
+       bt->flags |= BT_NO_PRINT_REGS;
+
+       if (NETDUMP_DUMPFILE())
+               get_netdump_regs(bt, eip, esp);
+       else if (KDUMP_DUMPFILE())
+               get_kdump_regs(bt, eip, esp);
+       else if (DISKDUMP_DUMPFILE())
+               get_diskdump_regs(bt, eip, esp);
+       else if (KVMDUMP_DUMPFILE())
+               get_kvmdump_regs(bt, eip, esp);
+       else if (LKCD_DUMPFILE())
+               get_lkcd_regs(bt, eip, esp);
+       else if (XENDUMP_DUMPFILE())
+               get_xendump_regs(bt, eip, esp);
+       else if (SADUMP_DUMPFILE())
+               get_sadump_regs(bt, eip, esp);
+       else if (VMSS_DUMPFILE())
+               get_vmware_vmss_regs(bt, eip, esp);
+       else if (REMOTE_PAUSED()) {
+               if (!is_task_active(bt->task) || !get_remote_regs(bt, eip, esp))
+                       machdep->get_stack_frame(bt, eip, esp);
+       } else
+               machdep->get_stack_frame(bt, eip, esp);
+
+       bt->flags &= ~BT_NO_PRINT_REGS;
+
+       bt->instptr = *eip;
+       bt->stkptr = *esp;
+}
+

 /*
  *  Store the head of the kernel module list for future use.
diff --git a/ppc64.c b/ppc64.c
index e8930a1..ee1995a 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -55,6 +55,8 @@ static void ppc64_set_bt_emergency_stack(enum emergency_stack_type type,
 static char * ppc64_check_eframe(struct ppc64_pt_regs *);
 static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
                struct bt_info *);
+static int ppc64_get_current_task_reg(int regno, const char *name, int size,
+                 void *value);
 static void parse_cmdline_args(void);
 static int ppc64_paca_percpu_offset_init(int);
 static void ppc64_init_cpu_info(void);
@@ -73,6 +75,26 @@ static ulong pmd_page_vaddr_l4(ulong pmd);
 static int is_opal_context(ulong sp, ulong nip);
 void opalmsg(void);

+struct user_regs_bitmap_struct {
+       struct {
+               long gpr[32];
+               long nip;
+               long msr;
+               long orig_gpr3;      /* Used for restarting system calls */
+               long ctr;
+               long link;
+               long xer;
+               long ccr;
+               long mq;             /* 601 only (not used at present) */
+                                       /* Used on APUS to hold IPL value. */
+               long trap;           /* Reason for being here */
+               long dar;            /* Fault registers */
+               long dsisr;
+               long result;         /* Result of a system call */
+       };
+       ulong bitmap;
+};
+
 static int is_opal_context(ulong sp, ulong nip)
 {
        uint64_t opal_start, opal_end;
@@ -704,6 +726,8 @@ ppc64_init(int when)
                                error(FATAL, "cannot malloc hwirqstack buffer space.");
                }

+               machdep->get_current_task_reg = ppc64_get_current_task_reg;
+
                ppc64_init_paca_info();

                if (!machdep->hz) {
@@ -2501,6 +2525,120 @@ ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs,
        ppc64_print_nip_lr(regs, 1);
 }

+static int
+ppc64_get_current_task_reg(int regno, const char *name, int size,
+                 void *value)
+{
+       struct bt_info bt_info, bt_setup;
+       struct task_context *tc;
+       ulong task;
+       struct user_regs_bitmap_struct *ur_bitmap;
+       ulong ip, sp;
+       bool ret = FALSE;
+
+       /* Currently only handling registers available in ppc64_pt_regs:
+        *
+        * 0-31:   r0-r31
+        * 64:     pc/nip
+        * 65:     msr
+        *
+        * 67:     lr
+        * 68:     ctr
+        */
+       switch (regno) {
+       case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+
+       case PPC64_PC_REGNUM:
+       case PPC64_MSR_REGNUM:
+       case PPC64_LR_REGNUM:
+       case PPC64_CTR_REGNUM:
+               break;
+
+       default:
+               // return false if we can't get that register
+               if (CRASHDEBUG(1))
+                       error(WARNING, "unsupported register, regno=%d\n", regno);
+               return FALSE;
+       }
+

The above switch-case code block can be defined as a new static function to check the validity of regnum, for example:
static int sanity_check_regs(int regno)

And then it can be invoked in the current function. Just an idea, maybe this looks more concise and readable. Any thoughts?

+       tc = CURRENT_CONTEXT();
+       if (!tc)
+               return FALSE;
+       BZERO(&bt_setup, sizeof(struct bt_info));
+       clone_bt_info(&bt_setup, &bt_info, tc);
+       fill_stackbuf(&bt_info);
+
+       // reusing the get_dumpfile_regs function to get pt regs structure
+       get_dumpfile_regs(&bt_info, &sp, &ip);
+       if (bt_info.stackbuf)
+               FREEBUF(bt_info.stackbuf);
+       ur_bitmap = (struct user_regs_bitmap_struct *)bt_info.machdep;
+
+       if (!ur_bitmap) {
+               error(WARNING, "pt_regs not available for cpu %d\n", tc->processor);
+               return FALSE;
+       }
+       if (!bt_info.need_free) {
+               goto get_all;
+       }

Could you please explain why the need_free is used to determine if it should jump into the get_all code block?  The flag looks very strange, or is there any another solution?

+
+       switch (ur_bitmap->bitmap) {
+       case 0x100000002:
+               switch (regno) {
+               case PPC64_R1_REGNUM:
+               case PPC64_PC_REGNUM:
+                       break;
+               default:
+                       return FALSE;
+               }
+               break;
+       }

The switch-case code block should be equivalent to the implementation below(if I understood correctly), and looks more readable?

+    /* Only for R1 and PC values */
+    if ((ur_bitmap->bitmap == 0x100000002) &&
+        (regno != PPC64_R1_REGNUM && regno != PPC64_PC_REGNUM))
+        return FALSE;

 
+
+get_all:
+       switch (regno) {
+       case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+               if (size != sizeof(ur_bitmap->gpr[regno])) {
+                       ret = FALSE; break;  // size mismatch
+               }

In fact , the variable ret can be initialized to FALSE one time at start of this function, and then it can be simplified as below:
+       switch (regno) {
+       case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+               if (size != sizeof(ur_bitmap->gpr[regno])) 
+                     break;
+               memcpy(value, &ur_bitmap->gpr[regno], size);
+               ret = TRUE;
+               break;

+               memcpy(value, &ur_bitmap->gpr[regno], size);
+               ret = TRUE; break;
+
+       case PPC64_PC_REGNUM:
+               if (size != sizeof(ur_bitmap->nip)) {
+                       ret = FALSE; break;  // size mismatch
+               }
+               memcpy(value, &ur_bitmap->nip, size);
+               ret = TRUE; break;
+
+       case PPC64_MSR_REGNUM:
+               if (size != sizeof(ur_bitmap->msr)) {
+                       ret = FALSE; break;  // size mismatch
+               }
+               memcpy(value, &ur_bitmap->msr, size);
+               ret = TRUE; break;
+
+       case PPC64_LR_REGNUM:
+               if (size != sizeof(ur_bitmap->link)) {
+                       ret = FALSE; break;  // size mismatch
+               }
+               memcpy(value, &ur_bitmap->link, size);
+               ret = TRUE; break;
+
+       case PPC64_CTR_REGNUM:
+               if (size != sizeof(ur_bitmap->ctr)) {
+                       ret = FALSE; break;  // size mismatch
+               }
+               memcpy(value, &ur_bitmap->ctr, size);
+               ret = TRUE; break;
+       }
+       if (bt_info.need_free) {
+               FREEBUF(ur_bitmap);
+               bt_info.need_free = FALSE;
+       }
+
+       return ret;
+}
+
 /*
  * For vmcore typically saved with KDump or FADump, get SP and IP values
  * from the saved ptregs.
@@ -2613,9 +2751,11 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
                pt_regs = (struct ppc64_pt_regs *)bt->machdep;
                ur_nip = pt_regs->nip;
                ur_ksp = pt_regs->gpr[1];
-               /* Print the collected regs for panic task. */
-               ppc64_print_regs(pt_regs);
-               ppc64_print_nip_lr(pt_regs, 1);
+               if (!(bt->flags & BT_NO_PRINT_REGS)) {
+                       /* Print the collected regs for panic task. */
+                       ppc64_print_regs(pt_regs);
+                       ppc64_print_nip_lr(pt_regs, 1);
+               }
        } else if ((pc->flags & KDUMP) ||
                   ((pc->flags & DISKDUMP) &&
                    (*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
@@ -2779,19 +2919,28 @@ static void
 ppc64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
 {
        ulong ksp, nip;
-       
+       struct user_regs_bitmap_struct *ur_bitmap;
+
        nip = ksp = 0;

-       if (DUMPFILE() && is_task_active(bt->task))
+       if (DUMPFILE() && is_task_active(bt->task)) {
                ppc64_get_dumpfile_stack_frame(bt, &nip, &ksp);
-       else
+               bt->need_free = FALSE;
+       } else {
                get_ppc64_frame(bt, &nip, &ksp);
+               ur_bitmap = (struct user_regs_bitmap_struct *)GETBUF(sizeof(*ur_bitmap));
+               memset(ur_bitmap, 0, sizeof(*ur_bitmap));
+               ur_bitmap->nip = nip;
+               ur_bitmap->gpr[1] = ksp;
+               ur_bitmap->bitmap += 0x100000002;

This confused me a little bit. Maybe it's good enough like this?

+               ur_bitmap->bitmap = 0x100000002;

Thanks
Lianbo

+               bt->machdep = ur_bitmap;
+               bt->need_free = TRUE;
+       }

        if (pcp)
                *pcp = nip;
        if (spp)
                *spp = ksp;
-
 }

 static ulong
--
2.40.1