With kernel patch [1], x86_64 will add extra padding for kernel stack,
as a result, the pt_regs will be shift down by the offset of padding.
Without the patch, the values of registers read from pt_regs will be
incorrect.
Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by
Kconfig, according to kernel code comment [2], the value may be made
dynamicly later. In addition there might be systems compiled without
Kconfig avaliable. So in this patch, we will calculate the value of
TOP_OF_KERNEL_STACK_PADDING.
The calculation is as follows:
1) in startup_64(), there is a lea instruction as:
leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
2) in rewind_stack_and_make_dead(), there is a lea instruction as:
leaq -PTREGS_SIZE(%rax), %rsp
The disassembled 2 instructions will be like:
1) 0xffffffff93a0007d <startup_64+3>: lea 0x1e03ec4(%rip),%rsp #
0xffffffff95803f48
^^^^^^^^^^^^^^^^^^^^
2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>: lea -0xa8(%rax),%rsp
^^^^
0xffffffff95803f48 is the value of (__end_init_task -
TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of
PTREGS_SIZE, __end_init_task can be get by symbol reading.
[1]:
https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-bot...
[2]:
https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/threa...
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
x86_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 82 insertions(+), 2 deletions(-)
diff --git a/x86_64.c b/x86_64.c
index 0c21eb8..43a31c2 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong);
static orc_entry *orc_module_find(ulong);
static ulong ip_table_to_vaddr(ulong);
static void orc_dump(ulong);
+static long top_of_kernel_stack_padding(void);
struct machine_specific x86_64_machine_specific = { 0 };
@@ -4089,7 +4090,8 @@ in_exception_stack:
user_mode_eframe = bt->stacktop - SIZE(pt_regs);
if (last_process_stack_eframe < user_mode_eframe)
x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
- (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+ (bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
+ top_of_kernel_stack_padding(),
bt, ofp);
}
@@ -4410,7 +4412,8 @@ in_exception_stack:
user_mode_eframe = bt->stacktop - SIZE(pt_regs);
if (last_process_stack_eframe < user_mode_eframe)
x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
- (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+ (bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
+ top_of_kernel_stack_padding(),
bt, ofp);
}
@@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry)
return SWP_OFFSET(entry);
}
+static long
+top_of_kernel_stack_padding(void)
+{
+ char buf1[BUFSIZE];
+ char *cursor;
+ long final_value, ptregs_size_value;
+ char *arglist[MAXARGS];
+ bool found = FALSE;
+
+ static long kernel_stack_padding = -1;
+
+ if (kernel_stack_padding >= 0)
+ return kernel_stack_padding;
+
+ /*
+ * startup_64:
+ * ...
+ * mov %rsi,%r15
+ * leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
+ */
+ sprintf(buf1, "disass /r startup_64");
+ open_tmpfile2();
+ if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
+ kernel_stack_padding = 0;
+ goto out;
+ }
+
+ rewind(pc->tmpfile2);
+ while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) {
+ // machine code of "mov %rsi,%r15"
+ if (strstr(buf1, "49 89 f7"))
+ found = TRUE;
+ }
+ if (!found || !(cursor = strstr(buf1, "# 0x"))) {
+ kernel_stack_padding = 0;
+ goto out;
+ }
+
+ parse_line(cursor, arglist);
+ final_value = stol(arglist[1], FAULT_ON_ERROR, NULL);
+
+ /*
+ * rewind_stack_and_make_dead:
+ * ...
+ * leaq -PTREGS_SIZE(%rax), %rsp
+ */
+ found = FALSE;
+ rewind(pc->tmpfile2);
+ sprintf(buf1, "disass rewind_stack_and_make_dead");
+ if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
+ kernel_stack_padding = 0;
+ goto out;
+ }
+ rewind(pc->tmpfile2);
+ while (fgets(buf1, BUFSIZE, pc->tmpfile2)) {
+ // find leaq -PTREGS_SIZE(%rax), %rsp
+ if (strstr(buf1, "lea") && (cursor = strstr(buf1, "-0x")))
{
+ parse_line(cursor, arglist);
+ char *p = strchr(arglist[0], '(');
+ *p = '\0';
+ ptregs_size_value = stol(arglist[0] + 1, FAULT_ON_ERROR, NULL);
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ kernel_stack_padding = 0;
+ goto out;
+ }
+
+ struct syment *s = symbol_search("__end_init_task");
+ kernel_stack_padding = s->value - final_value - ptregs_size_value;
+out:
+ close_tmpfile2();
+ return kernel_stack_padding;
+}
+
#endif /* X86_64 */
--
2.40.1