Kernel commit 3f3c8b8c4b2a34776c3470142a7c8baafcda6eb0 changed the NMI stack
layout, adding 12 more values on the stack. The fix has two parts:
1. Determine if this kernel has the nested NMI layout and set a
machine-specific flag (NESTED_NMI) if it does.
2. When backtracing an NMI stack, use the saved values instead of those
found at the top of stack.
Additionally, kernel commit 28696f434fef0efa97534b59986ad33b9c4df7f8 changed
the stack layout again, swapping the location of the "saved" and
"copied"
registers. This can be detected automatically, because the "copied" registers
contain either a copy of the "saved" registers, or point to repeat_nmi. So,
if restart_nmi is found as the return address, assume that this is the old
layout and adjust the stack pointer again.
Without the patch, wrong register values are shown in the NMI backtrace.
Signed-off-by: Petr Tesarik <ptesarik(a)suse.cz>
---
defs.h | 1 +
x86_64.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++---------------
2 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/defs.h b/defs.h
index 711b154..4054de4 100644
--- a/defs.h
+++ b/defs.h
@@ -5123,6 +5123,7 @@ struct machine_specific {
#define VM_XEN_RHEL4 (0x100)
#define FRAMEPOINTER (0x200)
#define GART_REGION (0x400)
+#define NESTED_NMI (0x800)
#define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4)
diff --git a/x86_64.c b/x86_64.c
index 5364c30..fd384ac 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -468,6 +468,8 @@ x86_64_init(int when)
else
x86_64_per_cpu_init();
x86_64_ist_init();
+ if (symbol_exists("repeat_nmi"))
+ machdep->flags |= NESTED_NMI;
machdep->in_alternate_stack = x86_64_in_alternate_stack;
if ((machdep->machspec->irqstack = (char *)
malloc(machdep->machspec->stkinfo.isize)) == NULL)
@@ -3009,6 +3011,8 @@ in_exception_stack:
}
stacktop = bt->stacktop - SIZE(pt_regs);
+ if ((machdep->flags & NESTED_NMI) && estack_index == NMI_STACK)
+ stacktop -= 12*sizeof(ulong);
bt->flags &= ~BT_FRAMESIZE_DISABLE;
@@ -3046,21 +3050,37 @@ in_exception_stack:
}
cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0,
- bt->stackbuf + (bt->stacktop - bt->stackbase) -
- SIZE(pt_regs), bt, ofp);
+ bt->stackbuf + (stacktop - bt->stackbase),
+ bt, ofp);
if (!BT_REFERENCE_CHECK(bt))
fprintf(fp, "--- <%s exception stack> ---\n",
x86_64_exception_stacks[estack_index]);
- /*
- * stack = (unsigned long *) estack_end[-2];
+ /*
+ * Find the CPU-saved, or handler-saved registers
*/
up = (ulong *)(&bt->stackbuf[bt->stacktop - bt->stackbase]);
- up -= 2;
- rsp = bt->stkptr = *up;
- up -= 3;
- bt->instptr = *up;
+ up -= 5;
+ if ((machdep->flags & NESTED_NMI) &&
+ estack_index == NMI_STACK &&
+ bt->stkptr <= bt->stacktop - 17*sizeof(ulong)) {
+ up -= 12;
+ /* Copied and saved regs are swapped in pre-3.8 kernels */
+ if (*up == symbol_value("repeat_nmi"))
+ up += 5;
+ }
+
+ /* Registers (as saved by CPU):
+ *
+ * up[4] SS
+ * up[3] RSP
+ * up[2] RFLAGS
+ * up[1] CS
+ * up[0] RIP
+ */
+ rsp = bt->stkptr = up[3];
+ bt->instptr = up[0];
if (cs & 3)
done = TRUE; /* user-mode exception */
else
@@ -3513,27 +3533,46 @@ in_exception_stack:
}
stacktop = bt->stacktop - SIZE(pt_regs);
-
+ if ((machdep->flags & NESTED_NMI) &&
+ estack_index == NMI_STACK)
+ stacktop -= 12*sizeof(ulong);
+
if (!done) {
level = dwarf_backtrace(bt, level, stacktop);
done = TRUE;
}
cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0,
- bt->stackbuf + (bt->stacktop - bt->stackbase) -
- SIZE(pt_regs), bt, ofp);
+ bt->stackbuf + (stacktop - bt->stackbase),
+ bt, ofp);
if (!BT_REFERENCE_CHECK(bt))
fprintf(fp, "--- <exception stack> ---\n");
- /*
- * stack = (unsigned long *) estack_end[-2];
+ /*
+ * Find the CPU-saved, or handler-saved registers
*/
up = (ulong *)(&bt->stackbuf[bt->stacktop - bt->stackbase]);
- up -= 2;
- rsp = bt->stkptr = *up;
- up -= 3;
- bt->instptr = *up;
+ up -= 5;
+ if ((machdep->flags & NESTED_NMI) &&
+ estack_index == NMI_STACK &&
+ bt->stkptr <= bt->stacktop - 17*sizeof(ulong)) {
+ up -= 12;
+ /* Copied and saved regs are swapped in pre-3.8 kernels */
+ if (*up == symbol_value("repeat_nmi"))
+ up += 5;
+ }
+
+ /* Registers (as saved by CPU):
+ *
+ * up[4] SS
+ * up[3] RSP
+ * up[2] RFLAGS
+ * up[1] CS
+ * up[0] RIP
+ */
+ rsp = bt->stkptr = up[3];
+ bt->instptr = up[0];
if (cs & 3)
done = TRUE; /* user-mode exception */
else
--
1.8.4.5