We have enountered the following edge case for orc unwinder which results an
incorrect stack unwinding:
$ objdump -d vmlinux
ffffffff8100be70 <panic>:
ffffffff8100be70: f3 0f 1e fa endbr64
ffffffff8100be74: e8 c7 b9 0d 00 call ffffffff810e7840
<__fentry__>
ffffffff8100be79: 55 push %rbp
ffffffff8100be7a: 49 89 fa mov %rdi,%r10
ffffffff8100be7d: 48 89 e5 mov %rsp,%rbp
ffffffff8100be80: 48 83 ec 50 sub $0x50,%rsp
...
ffffffff8100bed2: 48 89 44 24 10 mov %rax,0x10(%rsp)
ffffffff8100bed7: e8 f4 fc ff ff call ffffffff8100bbd0 <vpanic>
ffffffff8100bedc <nmi_panic.cold>:
ffffffff8100bedc: 48 c7 c7 6e c2 8b 82 mov $0xffffffff828bc26e,%rdi
ffffffff8100bee3: e8 88 ff ff ff call ffffffff8100be70 <panic>
$ objtool --dump=orc vmlinux
...
ffffffff8100be7a:type:call sp:sp+16 bp:prevsp-16 signal:0
ffffffff8100be80:type:call sp:bp+16 bp:prevsp-16 signal:0
ffffffff8100bedc:type:call sp:sp+8 bp:(und) signal:0
ffffffff8100bee8:type:call sp:sp+16 bp:(und) signal:0
crash> gdb bt
#0 0xffffffff8126b3e4 in crash_setup_regs
#1 0xffffffff8126b7a2 in __crash_kexec
#2 0xffffffff8100bca9 in vpanic
#3 0xffffffff8100bedc in panic
#4 0xffffffff81903a9a in sysrq_handle_crash
...
crash> bt -r
...
ffffc9000126fc40: machine_kexec+184 sysrq_showstate_op
ffffc9000126fc50: .LC4+109 __crash_kexec+114
ffffc9000126fc60: 0000000000000000 0000000000000007
ffffc9000126fc70: 0000000000000063 ffffc9000126fd48
ffffc9000126fc80: .LC4+109 sysrq_showstate_op
ffffc9000126fc90: 0000000000000002 _printk_rb_static_infos+64944
ffffc9000126fca0: _printk_rb_static_descs+17712 0000000000000000
ffffc9000126fcb0: 0000000000000000 0000000000000000
ffffc9000126fcc0: 0000000000000001 0000000000000000
ffffc9000126fcd0: ffffc9000126fc60 0000000000000000
ffffc9000126fce0: crash_setup_regs+116 0000000000000010
ffffc9000126fcf0: 0000000000000046 ffffc9000126fc58
ffffc9000126fd00: 0000000000000018 a9e74447ad8b9200
ffffc9000126fd10: sysrq_showstate_op vpanic+217
ffffc9000126fd20: sysrq_crash_op ffffc9000126fd98
ffffc9000126fd30: 0000000000000000 0000000000000063
ffffc9000126fd40: panic+108 0000000000000008
ffffc9000126fd50: ffffc9000126fda8 ffffc9000126fd68
ffffc9000126fd60: a9e74447ad8b9200 a9e74447ad8b9200
ffffc9000126fd70: ffff88813bc1d1c0 0000000000000000
ffffc9000126fd80: 0000000000000000 0000000000000000
ffffc9000126fd90: _printk_rb_static_descs+786408 0000000000000000
ffffc9000126fda0: sysrq_handle_crash+26 __handle_sysrq.cold+159
Within the stack, we can identify the following stackframes:
1. rsp ffffc9000126fc40, rip (machine_kexec+184)
2. rsp ffffc9000126fc58, rip (__crash_kexec+114)
3. rsp ffffc9000126fd10, rip (vpanic+217)
4. rsp ffffc9000126fd40, rip (panic+108)
Before the fix, crash will directly pass rip to orc_find() to calculate
each framesize, actually this is incorrect, because the rip is what
we found in stack, as we all know the "call" instruction will push
the address of the next instruction into stack, rather than the address
of the "call" instruction itself. So for the (panic+108, or ffffffff8100bedc)
case, orc winder finds the frame of 0xffffffff8100bedc is sp:sp+8.
This is incorrect, because 0xffffffff8100bedc belongs to a different
function, aka nmi_panic(), rather than panic(). So we should use (rip -
5, or ffffffff8100bed7) for orc_find(), which gives sp:bp+16 for unwind
stack frame. We can confirm this by reading the disassembly at
ffffffff8100be70 ~ ffffffff8100be80.
This is an edge case because the address after the "call" instruction
located to a different function, which lead to orc_find() got a wrong
framesize.
This patch fix this by check the 1st previous instruction of rip we read
from stack. If the instruction is "call", then we pass the address of
the "call" to orc_find(), to ensure we orc unwinding the correct function.
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
x86_64.c | 34 +++++++++++++++++++++++++++++++++-
1 file changed, 33 insertions(+), 1 deletion(-)
diff --git a/x86_64.c b/x86_64.c
index b2cddbf..70bc1da 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -8887,7 +8887,38 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp,
char *stack_
if ((sp->value >= kt->init_begin) && (sp->value <
kt->init_end))
return 0;
- if ((machdep->flags & ORC) && (korc = orc_find(textaddr))) {
+ if (machdep->flags & ORC) {
+ /*
+ * Disassemble & check if the 1st previous instruction of textaddr
+ * is "call". If it does, the orc should unwind against the
+ * address of the "call" inst, rather than the textaddr itself.
+ */
+ open_tmpfile2();
+ sprintf(buf, "x/-1i 0x%lx", textaddr);
+ if (!gdb_pass_through(buf, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
+ close_tmpfile2();
+ bt->flags |= BT_FRAMESIZE_DISABLE;
+ return 0;
+ }
+ rewind(pc->tmpfile2);
+ if (fgets(buf, BUFSIZE, pc->tmpfile2)) {
+ c = parse_line(buf, arglist);
+ for (int i = 0; i < c; i++) {
+ if (!strcmp(arglist[i], "call")) {
+ reterror = 0;
+ ulong tmp_val = htol(arglist[0],
+ RETURN_ON_ERROR, &reterror);
+ if (!reterror)
+ textaddr = tmp_val;
+ break;
+ }
+ }
+ }
+ close_tmpfile2();
+
+ if ((korc = orc_find(textaddr)) == NULL)
+ goto out_orc;
+
if (CRASHDEBUG(1)) {
struct ORC_data *orc = &machdep->machspec->orc;
fprintf(fp,
@@ -8939,6 +8970,7 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp,
char *stack_
}
}
+out_orc:
framesize = max = 0;
max_instructions = textaddr - sp->value;
instr = arg = -1;
--
2.47.0