On Thu, Feb 22, 2024 at 10:52:56AM +0530, Aditya Gupta wrote:
Currently, gdb passthroughs of 'bt', 'frame',
'up', 'down', 'info
locals' don't work. This is due to gdb not knowing the register values to
unwind the stack frames
Every gdb passthrough goes through `gdb_interface`. And then, gdb expects
`crash_target::fetch_registers` to give it the register values, which is
dependent on `machdep->get_cpu_reg` to read the register values for
specific architecture.
----------------------------
gdb passthrough (eg. "bt") | |
crash -------------------------> | |
| gdb_interface |
| |
| |
| ---------------------- |
fetch_registers | | | |
crash_target<-------------------------+--| gdb | |
--------------------------+->| | |
Registers (SP,NIP, etc.)| | | |
| | | |
| ---------------------- |
----------------------------
Implement `machdep->get_cpu_reg` on PPC64, so that crash provides the
register values to gdb to unwind stack frames properly
With these changes, on powerpc, 'bt' command output in gdb mode, will look
like this:
gdb> bt
#0 0xc0000000002a53e8 in crash_setup_regs (oldregs=<optimized out>,
newregs=0xc00000000486f8d8) at ./arch/powerpc/include/asm/kexec.h:69
#1 __crash_kexec (regs=<optimized out>) at kernel/kexec_core.c:974
#2 0xc000000000168918 in panic (fmt=<optimized out>) at kernel/panic.c:358
#3 0xc000000000b735f8 in sysrq_handle_crash (key=<optimized out>) at
drivers/tty/sysrq.c:155
#4 0xc000000000b742cc in __handle_sysrq (key=key@entry=99,
check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:602
#5 0xc000000000b7506c in write_sysrq_trigger (file=<optimized out>,
buf=<optimized out>, count=2, ppos=<optimized out>) at
drivers/tty/sysrq.c:1163
#6 0xc00000000069a7bc in pde_write (ppos=<optimized out>, count=<optimized
out>, buf=<optimized out>, file=<optimized out>, pde=0xc000000009ed3a80) at
fs/proc/inode.c:340
#7 proc_reg_write (file=<optimized out>, buf=<optimized out>,
count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:352
#8 0xc0000000005b3bbc in vfs_write (file=file@entry=0xc00000009dda7d00,
buf=buf@entry=0xebcfc7c6040 <error: Cannot access memory at address 0xebcfc7c6040>,
count=count@entry=2, pos=pos@entry=0xc00000000486fda0) at fs/read_write.c:582
instead of earlier output without this patch:
gdb> bt
#0 <unavailable> in ?? ()
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
Also, 'get_dumpfile_regs' has been introduced to get registers from
multiple supported vmcore formats. Correspondingly a flag 'BT_NO_PRINT_REGS'
has been introduced to tell helper functions to get registers, to not
print registers with every call to backtrace in gdb.
Note: This feature to support GDB unwinding doesn't support live debugging
Signed-off-by: Aditya Gupta <adityag(a)linux.ibm.com>
---
defs.h | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
kernel.c | 33 +++++++++++++++
ppc64.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 263 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index 98650e8780bf..414f039b35f1 100644
--- a/defs.h
+++ b/defs.h
@@ -6095,6 +6095,7 @@ int load_module_symbols_helper(char *);
void unlink_module(struct load_module *);
int check_specified_module_tree(char *, char *);
int is_system_call(char *, ulong);
+void get_dumpfile_regs(struct bt_info*, ulong*, ulong*);
void generic_dump_irq(int);
void generic_get_irq_affinity(int);
void generic_show_interrupts(int, ulong *);
@@ -6194,6 +6195,7 @@ ulong cpu_map_addr(const char *type);
#define BT_REGS_NOT_FOUND (0x4000000000000ULL)
#define BT_OVERFLOW_STACK (0x8000000000000ULL)
#define BT_SKIP_IDLE (0x10000000000000ULL)
+#define BT_NO_PRINT_REGS (0x20000000000000ULL)
#define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
#define BT_REF_HEXVAL (0x1)
@@ -8050,4 +8052,124 @@ enum x86_64_regnum {
LAST_REGNUM
};
+/*
+ * Register numbers to make crash_target->fetch_registers()
+ * ---> machdep->get_cpu_reg() work properly.
+ *
+ * These register numbers and names are given according to output of
+ * `rs6000_register_name`, because that is what was being used by
+ * crash_target::fetch_registers in case of PPC64
+ */
+enum ppc64_regnum {
+ PPC64_R0_REGNUM = 0,
+ PPC64_R1_REGNUM,
+ PPC64_R2_REGNUM,
+ PPC64_R3_REGNUM,
+ PPC64_R4_REGNUM,
+ PPC64_R5_REGNUM,
+ PPC64_R6_REGNUM,
+ PPC64_R7_REGNUM,
+ PPC64_R8_REGNUM,
+ PPC64_R9_REGNUM,
+ PPC64_R10_REGNUM,
+ PPC64_R11_REGNUM,
+ PPC64_R12_REGNUM,
+ PPC64_R13_REGNUM,
+ PPC64_R14_REGNUM,
+ PPC64_R15_REGNUM,
+ PPC64_R16_REGNUM,
+ PPC64_R17_REGNUM,
+ PPC64_R18_REGNUM,
+ PPC64_R19_REGNUM,
+ PPC64_R20_REGNUM,
+ PPC64_R21_REGNUM,
+ PPC64_R22_REGNUM,
+ PPC64_R23_REGNUM,
+ PPC64_R24_REGNUM,
+ PPC64_R25_REGNUM,
+ PPC64_R26_REGNUM,
+ PPC64_R27_REGNUM,
+ PPC64_R28_REGNUM,
+ PPC64_R29_REGNUM,
+ PPC64_R30_REGNUM,
+ PPC64_R31_REGNUM,
+
+ PPC64_F0_REGNUM = 32,
+ PPC64_F1_REGNUM,
+ PPC64_F2_REGNUM,
+ PPC64_F3_REGNUM,
+ PPC64_F4_REGNUM,
+ PPC64_F5_REGNUM,
+ PPC64_F6_REGNUM,
+ PPC64_F7_REGNUM,
+ PPC64_F8_REGNUM,
+ PPC64_F9_REGNUM,
+ PPC64_F10_REGNUM,
+ PPC64_F11_REGNUM,
+ PPC64_F12_REGNUM,
+ PPC64_F13_REGNUM,
+ PPC64_F14_REGNUM,
+ PPC64_F15_REGNUM,
+ PPC64_F16_REGNUM,
+ PPC64_F17_REGNUM,
+ PPC64_F18_REGNUM,
+ PPC64_F19_REGNUM,
+ PPC64_F20_REGNUM,
+ PPC64_F21_REGNUM,
+ PPC64_F22_REGNUM,
+ PPC64_F23_REGNUM,
+ PPC64_F24_REGNUM,
+ PPC64_F25_REGNUM,
+ PPC64_F26_REGNUM,
+ PPC64_F27_REGNUM,
+ PPC64_F28_REGNUM,
+ PPC64_F29_REGNUM,
+ PPC64_F30_REGNUM,
+ PPC64_F31_REGNUM,
+
+ PPC64_PC_REGNUM = 64,
+ PPC64_MSR_REGNUM = 65,
+ PPC64_CR_REGNUM = 66,
+ PPC64_LR_REGNUM = 67,
+ PPC64_CTR_REGNUM = 68,
+ PPC64_XER_REGNUM = 69,
+ PPC64_FPSCR_REGNUM = 70,
+
+ PPC64_VR0_REGNUM = 106,
+ PPC64_VR1_REGNUM,
+ PPC64_VR2_REGNUM,
+ PPC64_VR3_REGNUM,
+ PPC64_VR4_REGNUM,
+ PPC64_VR5_REGNUM,
+ PPC64_VR6_REGNUM,
+ PPC64_VR7_REGNUM,
+ PPC64_VR8_REGNUM,
+ PPC64_VR9_REGNUM,
+ PPC64_VR10_REGNUM,
+ PPC64_VR11_REGNUM,
+ PPC64_VR12_REGNUM,
+ PPC64_VR13_REGNUM,
+ PPC64_VR14_REGNUM,
+ PPC64_VR15_REGNUM,
+ PPC64_VR16_REGNUM,
+ PPC64_VR17_REGNUM,
+ PPC64_VR18_REGNUM,
+ PPC64_VR19_REGNUM,
+ PPC64_VR20_REGNUM,
+ PPC64_VR21_REGNUM,
+ PPC64_VR22_REGNUM,
+ PPC64_VR23_REGNUM,
+ PPC64_VR24_REGNUM,
+ PPC64_VR25_REGNUM,
+ PPC64_VR26_REGNUM,
+ PPC64_VR27_REGNUM,
+ PPC64_VR28_REGNUM,
+ PPC64_VR29_REGNUM,
+ PPC64_VR30_REGNUM,
+ PPC64_VR31_REGNUM,
+
+ PPC64_VSCR_REGNUM = 138,
+ PPC64_VRSAVE_REGNU = 139
+};
+
#endif /* !GDB_COMMON */
diff --git a/kernel.c b/kernel.c
index 1728b70c1b5c..661ef0e237eb 100644
--- a/kernel.c
+++ b/kernel.c
@@ -3533,6 +3533,39 @@ get_lkcd_regs(struct bt_info *bt, ulong *eip, ulong *esp)
machdep->get_stack_frame(bt, eip, esp);
}
+void
+get_dumpfile_regs(struct bt_info *bt, ulong *eip, ulong *esp)
+{
+ bt->flags |= BT_NO_PRINT_REGS;
+
+ if (NETDUMP_DUMPFILE())
+ get_netdump_regs(bt, eip, esp);
+ else if (KDUMP_DUMPFILE())
+ get_kdump_regs(bt, eip, esp);
+ else if (DISKDUMP_DUMPFILE())
+ get_diskdump_regs(bt, eip, esp);
+ else if (KVMDUMP_DUMPFILE())
+ get_kvmdump_regs(bt, eip, esp);
+ else if (LKCD_DUMPFILE())
+ get_lkcd_regs(bt, eip, esp);
+ else if (XENDUMP_DUMPFILE())
+ get_xendump_regs(bt, eip, esp);
+ else if (SADUMP_DUMPFILE())
+ get_sadump_regs(bt, eip, esp);
+ else if (VMSS_DUMPFILE())
+ get_vmware_vmss_regs(bt, eip, esp);
+ else if (REMOTE_PAUSED()) {
+ if (!is_task_active(bt->task) || !get_remote_regs(bt, eip, esp))
+ machdep->get_stack_frame(bt, eip, esp);
+ } else
+ machdep->get_stack_frame(bt, eip, esp);
+
+ bt->flags &= ~BT_NO_PRINT_REGS;
+
+ bt->instptr = *eip;
+ bt->stkptr = *esp;
+}
+
/*
* Store the head of the kernel module list for future use.
diff --git a/ppc64.c b/ppc64.c
index e8930a139e0d..870b0fbb49ce 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -55,6 +55,8 @@ static void ppc64_set_bt_emergency_stack(enum emergency_stack_type
type,
static char * ppc64_check_eframe(struct ppc64_pt_regs *);
static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
struct bt_info *);
+static int ppc64_get_cpu_reg(int cpu, int regno, const char *name, int size,
+ void *value);
static void parse_cmdline_args(void);
static int ppc64_paca_percpu_offset_init(int);
static void ppc64_init_cpu_info(void);
@@ -704,6 +706,8 @@ ppc64_init(int when)
error(FATAL, "cannot malloc hwirqstack buffer space.");
}
+ machdep->get_cpu_reg = ppc64_get_cpu_reg;
+
ppc64_init_paca_info();
if (!machdep->hz) {
@@ -2501,6 +2505,105 @@ ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs,
ppc64_print_nip_lr(regs, 1);
}
+static int
+ppc64_get_cpu_reg(int cpu, int regno, const char *name, int size,
+ void *value)
+{
+ struct bt_info bt_info, bt_setup;
+ struct task_context *tc;
+ ulong task;
+ struct ppc64_pt_regs *pt_regs;
+ ulong ip, sp;
+
+ if (LIVE()) {
+ /* doesn't support reading registers in live dump */
+ return FALSE;
+ }
What do you think if we remove the LIVE() check for v10? According to my test(with my
x86_64 patchsets), with ppc64 arbitrary task stack unwinding support, we can view the
stack unwinding for non-active tasks for live debug:
crash> sys
KERNEL: /usr/lib/debug/lib/modules/5.14.0-425.el9.ppc64le/vmlinux
DUMPFILE: /proc/kcore
CPUS: 8
DATE: Tue Feb 27 04:19:36 EST 2024
UPTIME: 01:11:56
LOAD AVERAGE: 0.26, 0.09, 1.70
TASKS: 196
NODENAME:
ibm-p9z-26-lp13.virt.pnr.lab.eng.rdu2.redhat.com
RELEASE: 5.14.0-425.el9.ppc64le
VERSION: #1 SMP Wed Feb 21 15:29:04 EST 2024
MACHINE: ppc64le (2900 Mhz)
MEMORY: 8 GB
crash> set 1
PID: 1
COMMAND: "systemd"
TASK: c0000000035fc900 [THREAD_INFO: c0000000035fc900]
CPU: 1
STATE: TASK_INTERRUPTIBLE
crash> bt
PID: 1 TASK: c0000000035fc900 CPU: 1 COMMAND: "systemd"
#0 [c00000000369fa60] __schedule at c000000000fc3c58
#1 [c00000000369fb20] schedule at c000000000fc411c
#2 [c00000000369fb50] schedule_hrtimeout_range_clock at c000000000fcd2a4
#3 [c00000000369fc00] ep_poll at c00000000063640c
#4 [c00000000369fcf0] do_epoll_wait at c000000000636584
#5 [c00000000369fd40] sys_epoll_wait at c000000000636608
#6 [c00000000369fdb0] system_call_exception at c00000000002e994
#7 [c00000000369fe10] system_call_vectored_common at c00000000000bfe8
crash> gdb bt
#0 0xc000000000fc3c58 in context_switch (rf=0xc00000000369fac0, next=0x0, prev=0x0,
rq=0x0) at kernel/sched/core.c:5409
#1 __schedule (sched_mode=sched_mode@entry=0) at kernel/sched/core.c:6737
#2 0xc000000000fc411c in schedule_loop (sched_mode=<optimized out>) at
kernel/sched/core.c:6807
#3 schedule () at kernel/sched/core.c:6816
#4 0xc000000000fcd2a4 in schedule_hrtimeout_range_clock (expires=<optimized out>,
delta=<optimized out>, mode=<optimized out>, clock_id=<optimized out>)
at kernel/time/hrtimer.c:2297
#5 0xc00000000063640c in ep_poll (ep=0xc000000034b32e80, events=0x0,
events@entry=0x1001d23a9e0, maxevents=maxevents@entry=82, timeout=timeout@entry=0x0) at
fs/eventpoll.c:1904
#6 0xc000000000636584 in do_epoll_wait (epfd=epfd@entry=4,
events=events@entry=0x1001d23a9e0, maxevents=maxevents@entry=82, to=0x0) at
fs/eventpoll.c:2288
#7 0xc000000000636608 in __do_sys_epoll_wait (timeout=<optimized out>,
maxevents=82, events=0x1001d23a9e0, epfd=4) at fs/eventpoll.c:2300
#8 __se_sys_epoll_wait (epfd=4, events=1100000504288, maxevents=82, timeout=<optimized
out>) at fs/eventpoll.c:2295
#9 0xc00000000002e994 in system_call_exception (r3=4, r4=1100000504288, r5=82, r6=-1,
r7=140735513318112, r8=140735513287592, r0=<optimized out>, regs=0xc00000000369fe80)
at arch/powerpc/kernel/interrupt.c:221
#10 0xc00000000000bfe8 in system_call_vectored_common () at
arch/powerpc/kernel/interrupt_64.S:209
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
However for those active tasks in live mode, stack unwind will fail, but I think it
doesn't matter because "bt" and "gdb bt" both cannot show stacks:
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
0 0 0 c000000002af6380 RU 0.0 0 0 [swapper/0]
0 0 1 c0000000035f9000 RU 0.0 0 0
[swapper/1]
0 0 2 c0000000035f0180 RU 0.0 0 0 [swapper/2]
0 0 3 c0000000035c5580 RU 0.0 0 0 [swapper/3]
0 0 4 c0000000035fac80 RU 0.0 0 0 [swapper/4]
0 0 5 c0000000035c7200 RU 0.0 0 0 [swapper/5]
0 0 6 c0000000035c0000 RU 0.0 0 0 [swapper/6]
0 0 7 c0000000035dc800 RU 0.0 0 0 [swapper/7]
1 0 1 c0000000035fc900 IN 0.2 182272 14016 systemd
crash> set c0000000035f0180
PID: 0
COMMAND: "swapper/2"
TASK: c0000000035f0180 (1 of 8) [THREAD_INFO: c0000000035f0180]
CPU: 2
STATE: TASK_RUNNING (ACTIVE)
crash> bt
PID: 0 TASK: c0000000035f0180 CPU: 2 COMMAND: "swapper/2"
(active)
crash> gdb bt
#0 0xc000000003847d50 in ?? ()
gdb: invalid kernel virtual address: fffffffffffffffc type: "gdb_readmem
callback"
gdb: invalid kernel virtual address: fffffffffffffff8 type: "gdb_readmem
callback"
gdb: invalid kernel virtual address: fffffffffffffffc type: "gdb_readmem
callback"
gdb: invalid kernel virtual address: fffffffffffffff8 type: "gdb_readmem
callback"
#1 0x0000000000000000 in ?? ()
What do you think?
Thanks,
Tao Liu
+
+ /* Currently only handling registers available in ppc64_pt_regs:
+ *
+ * 0-31: r0-r31
+ * 64: pc/nip
+ * 65: msr
+ *
+ * 67: lr
+ * 68: ctr
+ */
+ switch (regno) {
+ case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+
+ case PPC64_PC_REGNUM:
+ case PPC64_MSR_REGNUM:
+ case PPC64_LR_REGNUM:
+ case PPC64_CTR_REGNUM:
+ break;
+
+ default:
+ // return false if we can't get that register
+ if (CRASHDEBUG(1))
+ error(WARNING, "unsupported register, regno=%d\n", regno);
+ return FALSE;
+ }
+
+ task = get_active_task(cpu);
+ tc = task_to_context(task);
+ BZERO(&bt_setup, sizeof(struct bt_info));
+ clone_bt_info(&bt_setup, &bt_info, tc);
+ fill_stackbuf(&bt_info);
+
+ // reusing the get_dumpfile_regs function to get pt regs structure
+ get_dumpfile_regs(&bt_info, &sp, &ip);
+ pt_regs = (struct ppc64_pt_regs *)bt_info.machdep;
+
+ if (!pt_regs) {
+ error(WARNING, "pt_regs not available for cpu %d\n", cpu);
+ return FALSE;
+ }
+
+ switch (regno) {
+ case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+ if (size != sizeof(pt_regs->gpr[regno]))
+ return FALSE; // size mismatch
+
+ memcpy(value, &pt_regs->gpr[regno], size);
+ break;
+
+ case PPC64_PC_REGNUM:
+ if (size != sizeof(pt_regs->nip))
+ return FALSE; // size mismatch
+
+ memcpy(value, &pt_regs->nip, size);
+ break;
+
+ case PPC64_MSR_REGNUM:
+ if (size != sizeof(pt_regs->msr))
+ return FALSE; // size mismatch
+
+ memcpy(value, &pt_regs->msr, size);
+ break;
+
+ case PPC64_LR_REGNUM:
+ if (size != sizeof(pt_regs->link))
+ return FALSE; // size mismatch
+
+ memcpy(value, &pt_regs->link, size);
+ break;
+
+ case PPC64_CTR_REGNUM:
+ if (size != sizeof(pt_regs->ctr))
+ return FALSE; // size mismatch
+
+ memcpy(value, &pt_regs->ctr, size);
+ break;
+ }
+
+ /* free buffer allocated by fill_stackbuf */
+ if (bt_info.stackbuf)
+ FREEBUF(bt_info.stackbuf);
+
+ return TRUE;
+}
+
/*
* For vmcore typically saved with KDump or FADump, get SP and IP values
* from the saved ptregs.
@@ -2613,9 +2716,11 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *nip,
ulong *ksp)
pt_regs = (struct ppc64_pt_regs *)bt->machdep;
ur_nip = pt_regs->nip;
ur_ksp = pt_regs->gpr[1];
- /* Print the collected regs for panic task. */
- ppc64_print_regs(pt_regs);
- ppc64_print_nip_lr(pt_regs, 1);
+ if (!(bt->flags & BT_NO_PRINT_REGS)) {
+ /* Print the collected regs for panic task. */
+ ppc64_print_regs(pt_regs);
+ ppc64_print_nip_lr(pt_regs, 1);
+ }
} else if ((pc->flags & KDUMP) ||
((pc->flags & DISKDUMP) &&
(*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
--
2.43.0