This patch will add stack unwinding support for x86_64 CPU arch. It
follows the tech path of ppc64_get_stack_frame() & ppc64_get_cpu_reg()
in ppc64.c, to get and consume the cpu regs.
One different case is, we need to handle inactive_task_frame structure
specifically in x86_64.
If inactive_task_frame structure enabled, for inactive tasks, 7 regs can
be get from inactive_task_frame in stack, and sp need to rewind back to
skip inactive_task_frame structure (See code comments in
x86_64.c:x86_64_get_stack_frame()); for active tasks, we get regs by the
original way.
If inactive_task_frame structure is not enabled, for inactive tasks, the
stack frame is organized as linked list, and sp/bp can be get from stack;
for active tasks, we get regs by the original way.
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
defs.h | 19 +++-
x86_64.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 283 insertions(+), 27 deletions(-)
diff --git a/defs.h b/defs.h
index 8450aea..ed52cc3 100644
--- a/defs.h
+++ b/defs.h
@@ -2241,6 +2241,20 @@ struct offset_table { /* stash of commonly-used
offsets */
long mnt_namespace_nr_mounts;
long mount_mnt_node;
long log_caller_id;
+ long inactive_task_frame_r15;
+ long inactive_task_frame_r14;
+ long inactive_task_frame_r13;
+ long inactive_task_frame_r12;
+ long inactive_task_frame_flags;
+ long inactive_task_frame_si;
+ long inactive_task_frame_di;
+ long inactive_task_frame_bx;
+ long thread_struct_es;
+ long thread_struct_ds;
+ long thread_struct_fsbase;
+ long thread_struct_gsbase;
+ long thread_struct_fs;
+ long thread_struct_gs;
};
struct size_table { /* stash of commonly-used sizes */
@@ -8008,7 +8022,7 @@ extern int have_full_symbols(void);
/*
* Register numbers must be in sync with gdb/features/i386/64bit-core.c
- * to make crash_target->fetch_registers() ---> machdep->get_cpu_reg()
+ * to make crash_target->fetch_registers() ---> machdep->get_current_task_reg()
* working properly.
*/
enum x86_64_regnum {
@@ -8052,6 +8066,9 @@ enum x86_64_regnum {
FOSEG_REGNUM,
FOOFF_REGNUM,
FOP_REGNUM,
+ FS_BASE_REGNUM = 152,
+ GS_BASE_REGNUM,
+ ORIG_RAX_REGNUM,
LAST_REGNUM
};
diff --git a/x86_64.c b/x86_64.c
index 0c21eb8..4ba0b40 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -126,7 +126,7 @@ static int x86_64_get_framesize(struct bt_info *, ulong, ulong, char
*);
static void x86_64_framesize_debug(struct bt_info *);
static void x86_64_get_active_set(void);
static int x86_64_get_kvaddr_ranges(struct vaddr_range *);
-static int x86_64_get_cpu_reg(int, int, const char *, int, void *);
+static int x86_64_get_current_task_reg(int, const char *, int, void *);
static int x86_64_verify_paddr(uint64_t);
static void GART_init(void);
static void x86_64_exception_stacks_init(void);
@@ -143,6 +143,29 @@ struct machine_specific x86_64_machine_specific = { 0 };
static const char *exception_functions_orig[];
static const char *exception_functions_5_8[];
+/* Use this hardwired version -- sometimes the
+ * debuginfo doesn't pick this up even though
+ * it exists in the kernel; it shouldn't change.
+ */
+struct x86_64_user_regs_struct {
+ unsigned long r15, r14, r13, r12, bp, bx;
+ unsigned long r11, r10, r9, r8, ax, cx, dx;
+ unsigned long si, di, orig_ax, ip, cs;
+ unsigned long flags, sp, ss, fs_base;
+ unsigned long gs_base, ds, es, fs, gs;
+};
+
+struct user_regs_bitmap_struct {
+ struct {
+ unsigned long r15, r14, r13, r12, bp, bx;
+ unsigned long r11, r10, r9, r8, ax, cx, dx;
+ unsigned long si, di, orig_ax, ip, cs;
+ unsigned long flags, sp, ss, fs_base;
+ unsigned long gs_base, ds, es, fs, gs;
+ };
+ ulong bitmap;
+};
+
/*
* Do all necessary machine-specific setup here. This is called several
* times during initialization.
@@ -195,7 +218,7 @@ x86_64_init(int when)
machdep->machspec->irq_eframe_link = UNINITIALIZED;
machdep->machspec->irq_stack_gap = UNINITIALIZED;
machdep->get_kvaddr_ranges = x86_64_get_kvaddr_ranges;
- machdep->get_cpu_reg = x86_64_get_cpu_reg;
+ machdep->get_current_task_reg = x86_64_get_current_task_reg;
if (machdep->cmdline_args[0])
parse_cmdline_args();
if ((string = pc->read_vmcoreinfo("relocate"))) {
@@ -500,6 +523,12 @@ x86_64_init(int when)
MEMBER_OFFSET_INIT(thread_struct_rsp, "thread_struct", "sp");
if (INVALID_MEMBER(thread_struct_rsp0))
MEMBER_OFFSET_INIT(thread_struct_rsp0, "thread_struct", "sp0");
+ MEMBER_OFFSET_INIT(thread_struct_es, "thread_struct", "es");
+ MEMBER_OFFSET_INIT(thread_struct_ds, "thread_struct", "ds");
+ MEMBER_OFFSET_INIT(thread_struct_fsbase, "thread_struct",
"fsbase");
+ MEMBER_OFFSET_INIT(thread_struct_gsbase, "thread_struct",
"gsbase");
+ MEMBER_OFFSET_INIT(thread_struct_fs, "thread_struct", "fs");
+ MEMBER_OFFSET_INIT(thread_struct_gs, "thread_struct", "gs");
STRUCT_SIZE_INIT(tss_struct, "tss_struct");
MEMBER_OFFSET_INIT(tss_struct_ist, "tss_struct", "ist");
if (INVALID_MEMBER(tss_struct_ist)) {
@@ -583,17 +612,6 @@ x86_64_init(int when)
"user_regs_struct", "r15");
STRUCT_SIZE_INIT(user_regs_struct, "user_regs_struct");
if (!VALID_STRUCT(user_regs_struct)) {
- /* Use this hardwired version -- sometimes the
- * debuginfo doesn't pick this up even though
- * it exists in the kernel; it shouldn't change.
- */
- struct x86_64_user_regs_struct {
- unsigned long r15, r14, r13, r12, bp, bx;
- unsigned long r11, r10, r9, r8, ax, cx, dx;
- unsigned long si, di, orig_ax, ip, cs;
- unsigned long flags, sp, ss, fs_base;
- unsigned long gs_base, ds, es, fs, gs;
- };
ASSIGN_SIZE(user_regs_struct) =
sizeof(struct x86_64_user_regs_struct);
ASSIGN_OFFSET(user_regs_struct_rip) =
@@ -891,7 +909,7 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, " is_page_ptr: x86_64_is_page_ptr()\n");
fprintf(fp, " verify_paddr: x86_64_verify_paddr()\n");
fprintf(fp, " get_kvaddr_ranges: x86_64_get_kvaddr_ranges()\n");
- fprintf(fp, " get_cpu_reg: x86_64_get_cpu_reg()\n");
+ fprintf(fp, "get_current_task_reg: x86_64_get_current_task_reg()\n");
fprintf(fp, " init_kernel_pgd: x86_64_init_kernel_pgd()\n");
fprintf(fp, "clear_machdep_cache: x86_64_clear_machdep_cache()\n");
fprintf(fp, " xendump_p2m_create: %s\n", PVOPS_XEN() ?
@@ -4971,22 +4989,124 @@ x86_64_eframe_verify(struct bt_info *bt, long kvaddr, long cs,
long ss,
return FALSE;
}
+#define GET_REG_FROM_INACTIVE_TASK_FRAME(reg) \
+({ \
+ ulong offset, reg_value = 0, rsp; \
+ if (VALID_MEMBER(inactive_task_frame_bp)) { \
+ offset = OFFSET(task_struct_thread) + OFFSET(thread_struct_rsp); \
+ readmem(bt->task + offset, KVADDR, &rsp, \
+ sizeof(ulong), "thread_struct.rsp", FAULT_ON_ERROR); \
+ readmem(rsp + OFFSET(inactive_task_frame_##reg), KVADDR, ®_value, \
+ sizeof(ulong), "inactive_task_frame.##reg", FAULT_ON_ERROR); \
+ } \
+ reg_value; \
+})
+
/*
* Get a stack frame combination of pc and ra from the most relevent spot.
*/
static void
x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
{
- if (bt->flags & BT_DUMPFILE_SEARCH)
- return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
+ struct user_regs_bitmap_struct *ur_bitmap;
+ ulong pcp_save = *pcp;
+ ulong spp_save = *spp;
+ ulong sp;
if (bt->flags & BT_SKIP_IDLE)
bt->flags &= ~BT_SKIP_IDLE;
+ if (pcp)
+ *pcp = x86_64_get_pc(bt);
+ if (spp)
+ sp = *spp = x86_64_get_sp(bt);
+ ur_bitmap = (struct user_regs_bitmap_struct *)GETBUF(sizeof(*ur_bitmap));
+ memset(ur_bitmap, 0, sizeof(struct user_regs_bitmap_struct));
+
+ if (VALID_MEMBER(inactive_task_frame_bp)) {
+ if (!is_task_active(bt->task)) {
+ /*
+ * For inactive tasks in live and dumpfile, regs can be
+ * get from inactive_task_frame struct.
+ */
+ ur_bitmap->r15 = GET_REG_FROM_INACTIVE_TASK_FRAME(r15);
+ ur_bitmap->r14 = GET_REG_FROM_INACTIVE_TASK_FRAME(r14);
+ ur_bitmap->r13 = GET_REG_FROM_INACTIVE_TASK_FRAME(r13);
+ ur_bitmap->r12 = GET_REG_FROM_INACTIVE_TASK_FRAME(r12);
+ ur_bitmap->bx = GET_REG_FROM_INACTIVE_TASK_FRAME(bx);
+ ur_bitmap->bp = GET_REG_FROM_INACTIVE_TASK_FRAME(bp);
+ /*
+ For inactive tasks:
+ crash> task -x 1|grep sp
+ sp = 0xffffc90000013d00
+ crash> rd ffffc90000013d00 32
+ ffffc90000013d00: ffff888104dad4a8 0000000000000000 r15,r14
+ ffffc90000013d10: ffff888100280000 ffff888100216500 r13,r12
+ ffffc90000013d20: ffff888100217018 ffff88817fd2c800 rbx,rbp
+ ffffc90000013d30: ffffffff81a6a1b3 ffffc90000013de0 saved_rip,...
+ ffffc90000013d40: ffff888100000004 99ccbf53ea493000
+ ffffc90000013d50: ffff888100216500 ffff888100216500
+
+ crash> dis __schedule
+ ...
+ 0xffffffff81a6a1ab <__schedule+507>: mov %r13,%rsi
+ 0xffffffff81a6a1ae <__schedule+510>: call 0xffffffff81003490
<__switch_to_asm>
+ 0xffffffff81a6a1b3 <__schedule+515>: mov %rax,%rdi <<=== saved_rip
+ ...
+ crash> dis __switch_to_asm
+ 0xffffffff81003490 <__switch_to_asm>: push %rbp
+ 0xffffffff81003491 <__switch_to_asm+1>: push %rbx
+ 0xffffffff81003492 <__switch_to_asm+2>: push %r12
+ 0xffffffff81003494 <__switch_to_asm+4>: push %r13
+ 0xffffffff81003496 <__switch_to_asm+6>: push %r14
+ 0xffffffff81003498 <__switch_to_asm+8>: push %r15
+ 0xffffffff8100349a <__switch_to_asm+10>: mov %rsp,0x14d8(%rdi)
+ ...
+ Now saved_rip = ffffffff81a6a1b3, and we are starting
+ the stack unwind at saved_rip, which is function __schedule()
+ instead of function __switch_to_asm(), so the stack pointer should
+ be rewind from ffffc90000013d00 back to ffffc90000013d38,
+ aka *spp += 7 * reg_len. Otherwise we are unwinding function
+ __schedule() but with __switch_to_asm()'s stack frame, which
+ will fail.
+ */
+ sp += 7 * sizeof(unsigned long);
+ ur_bitmap->bitmap += 0x3f;
+ } else {
+ /*
+ * For active tasks in dumpfile, we get regs through the
+ * original way. For active tasks in live, we only get
+ * ip and sp in the end of the function.
+ */
+ if (bt->flags & BT_DUMPFILE_SEARCH) {
+ FREEBUF(ur_bitmap);
+ bt->need_free = FALSE;
+ *pcp = pcp_save;
+ *spp = spp_save;
+ return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
+ }
+ }
+ } else {
+ if (!is_task_active(bt->task)) {
+ readmem(*spp, KVADDR, &(ur_bitmap->bp),
+ sizeof(ulong), "ur_bitmap->bp", FAULT_ON_ERROR);
+ ur_bitmap->bitmap += 0x10;
+ } else {
+ if (bt->flags & BT_DUMPFILE_SEARCH) {
+ FREEBUF(ur_bitmap);
+ bt->need_free = FALSE;
+ *pcp = pcp_save;
+ *spp = spp_save;
+ return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
+ }
+ }
+ }
- if (pcp)
- *pcp = x86_64_get_pc(bt);
- if (spp)
- *spp = x86_64_get_sp(bt);
+ ur_bitmap->ip = *pcp;
+ ur_bitmap->sp = sp;
+ ur_bitmap->bitmap += 0x50000;
+
+ bt->machdep = ur_bitmap;
+ bt->need_free = TRUE;
}
/*
@@ -6458,6 +6578,14 @@ x86_64_ORC_init(void)
MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame",
"bp");
MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame",
"ret_addr");
+ MEMBER_OFFSET_INIT(inactive_task_frame_r15, "inactive_task_frame",
"r15");
+ MEMBER_OFFSET_INIT(inactive_task_frame_r14, "inactive_task_frame",
"r14");
+ MEMBER_OFFSET_INIT(inactive_task_frame_r13, "inactive_task_frame",
"r13");
+ MEMBER_OFFSET_INIT(inactive_task_frame_r12, "inactive_task_frame",
"r12");
+ MEMBER_OFFSET_INIT(inactive_task_frame_flags, "inactive_task_frame",
"flags");
+ MEMBER_OFFSET_INIT(inactive_task_frame_si, "inactive_task_frame",
"si");
+ MEMBER_OFFSET_INIT(inactive_task_frame_di, "inactive_task_frame",
"di");
+ MEMBER_OFFSET_INIT(inactive_task_frame_bx, "inactive_task_frame",
"bx");
orc->has_signal = MEMBER_EXISTS("orc_entry", "signal"); /* added
at 6.3 */
orc->has_end = MEMBER_EXISTS("orc_entry", "end"); /* removed at
6.4 */
@@ -9071,17 +9199,128 @@ x86_64_get_kvaddr_ranges(struct vaddr_range *vrp)
return cnt;
}
+#define REG_CASE(R, r) \
+ case R##_REGNUM: \
+ if (size != sizeof(ur_bitmap->r)) { \
+ ret = FALSE; break; \
+ } else { \
+ memcpy(value, &ur_bitmap->r, size); \
+ ret = TRUE; break; \
+ }
+
static int
-x86_64_get_cpu_reg(int cpu, int regno, const char *name,
+x86_64_get_current_task_reg(int regno, const char *name,
int size, void *value)
{
- if (regno >= LAST_REGNUM)
- return FALSE;
+ struct bt_info bt_info, bt_setup;
+ struct task_context *tc;
+ struct user_regs_bitmap_struct *ur_bitmap;
+ ulong ip, sp;
+ bool ret = FALSE;
- if (VMSS_DUMPFILE())
- return vmware_vmss_get_cpu_reg(cpu, regno, name, size, value);
+ switch (regno) {
+ case RAX_REGNUM ... GS_REGNUM:
+ case FS_BASE_REGNUM ... ORIG_RAX_REGNUM:
+ break;
+ default:
+ return FALSE;
+ }
- return FALSE;
+ tc = CURRENT_CONTEXT();
+ if (!tc)
+ return FALSE;
+
+ if (VMSS_DUMPFILE())
+ return vmware_vmss_get_cpu_reg(tc->processor, regno, name, size, value);
+
+ BZERO(&bt_setup, sizeof(struct bt_info));
+ clone_bt_info(&bt_setup, &bt_info, tc);
+ fill_stackbuf(&bt_info);
+
+ // reusing the get_dumpfile_regs function to get pt regs structure
+ get_dumpfile_regs(&bt_info, &sp, &ip);
+ if (bt_info.stackbuf)
+ FREEBUF(bt_info.stackbuf);
+ ur_bitmap = (struct user_regs_bitmap_struct *)bt_info.machdep;
+ if (!ur_bitmap)
+ return FALSE;
+
+ /* Get all registers from elf notes*/
+ if (!bt_info.need_free) {
+ goto get_all;
+ }
+
+ /* Get subset registers from stack frame*/
+ switch (ur_bitmap->bitmap) {
+ case 0x5003f:
+ switch (regno) {
+ case R12_REGNUM ... R15_REGNUM:
+ case RBP_REGNUM:
+ case RBX_REGNUM:
+ case RIP_REGNUM:
+ case RSP_REGNUM:
+ break;
+ default:
+ return FALSE;
+ }
+ break;
+ case 0x50010:
+ switch (regno) {
+ case RBP_REGNUM:
+ case RIP_REGNUM:
+ case RSP_REGNUM:
+ break;
+ default:
+ return FALSE;
+ }
+ break;
+ case 0x50000:
+ switch (regno) {
+ case RIP_REGNUM:
+ case RSP_REGNUM:
+ break;
+ default:
+ return FALSE;
+ }
+ break;
+ }
+
+get_all:
+ switch (regno) {
+ REG_CASE(RAX, ax);
+ REG_CASE(RBX, bx);
+ REG_CASE(RCX, cx);
+ REG_CASE(RDX, dx);
+ REG_CASE(RSI, si);
+ REG_CASE(RDI, di);
+ REG_CASE(RBP, bp);
+ REG_CASE(RSP, sp);
+ REG_CASE(R8, r8);
+ REG_CASE(R9, r9);
+ REG_CASE(R10, r10);
+ REG_CASE(R11, r11);
+ REG_CASE(R12, r12);
+ REG_CASE(R13, r13);
+ REG_CASE(R14, r14);
+ REG_CASE(R15, r15);
+ REG_CASE(RIP, ip);
+ REG_CASE(EFLAGS, flags);
+ REG_CASE(CS, cs);
+ REG_CASE(SS, ss);
+ REG_CASE(DS, ds);
+ REG_CASE(ES, es);
+ REG_CASE(FS, fs);
+ REG_CASE(GS, gs);
+ REG_CASE(FS_BASE, fs_base);
+ REG_CASE(GS_BASE, gs_base);
+ REG_CASE(ORIG_RAX, orig_ax);
+ }
+
+ if (bt_info.need_free) {
+ FREEBUF(ur_bitmap);
+ bt_info.need_free = FALSE;
+ }
+ return ret;
}
/*
--
2.40.1