Hi, Tao
Thank you for the fix.
On 5/23/24 12:06 PM, devel-request(a)lists.crash-utility.osci.io wrote:
Date: Thu, 23 May 2024 12:06:03 +0800
From: Tao Liu<ltao(a)redhat.com>
Subject: [Crash-utility] [PATCH] x86_64: Add
top_of_kernel_stack_padding for kernel stack
To:devel@lists.crash-utility.osci.io
Cc: Tao Liu<ltao(a)redhat.com>
Message-ID:<20240523040603.10304-1-ltao@redhat.com>
Content-Type: text/plain; charset="US-ASCII"; x-default=true
With kernel patch [1], x86_64 will add extra padding for kernel stack,
as a result, the pt_regs will be shift down by the offset of padding.
Without the patch, the values of registers read from pt_regs will be
incorrect.
Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by
Kconfig, according to kernel code comment [2], the value may be made
dynamicly later. In addition there might be systems compiled without
Kconfig avaliable. So in this patch, we will calculate the value of
TOP_OF_KERNEL_STACK_PADDING.
The calculation is as follows:
1) in startup_64(), there is a lea instruction as:
leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
2) in rewind_stack_and_make_dead(), there is a lea instruction as:
leaq -PTREGS_SIZE(%rax), %rsp
The disassembled 2 instructions will be like:
1) 0xffffffff93a0007d <startup_64+3>: lea 0x1e03ec4(%rip),%rsp #
0xffffffff95803f48
^^^^^^^^^^^^^^^^^^^^
2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>: lea -0xa8(%rax),%rsp
^^^^
0xffffffff95803f48 is the value of (__end_init_task -
TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of
PTREGS_SIZE, __end_init_task can be get by symbol reading.
Calculating the value of TOP_OF_KERNEL_STACK_PADDING, which looks good, but it heavily
relies on compiler.
Normally we would use this way unless there is no other choice.
How about the following changes? Although it doesn't handle the case that the value is
dynamic, let's see
how to change in the kernel in future, and then consider how to reflect it in
crash-utility.
diff --git a/defs.h b/defs.h
index 01f316e67dde..42d875965256 100644
--- a/defs.h
+++ b/defs.h
@@ -2414,6 +2414,7 @@ struct size_table { /* stash of commonly-used sizes */
long maple_tree;
long maple_node;
long module_memory;
+ long fred_frame;
};
struct array_table {
diff --git a/kernel.c b/kernel.c
index 1728b70c1b5c..cd3d6044cc9a 100644
--- a/kernel.c
+++ b/kernel.c
@@ -668,6 +668,7 @@ kernel_init()
STRUCT_SIZE_INIT(softirq_state, "softirq_state");
STRUCT_SIZE_INIT(softirq_action, "softirq_action");
STRUCT_SIZE_INIT(desc_struct, "desc_struct");
+ STRUCT_SIZE_INIT(fred_frame, "fred_frame");
STRUCT_SIZE_INIT(char_device_struct, "char_device_struct");
if (VALID_STRUCT(char_device_struct)) {
diff --git a/x86_64.c b/x86_64.c
index 0c21eb827e4a..6777c93e6b47 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -4086,10 +4086,11 @@ in_exception_stack:
if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
(GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
+ long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
user_mode_eframe = bt->stacktop - SIZE(pt_regs);
if (last_process_stack_eframe < user_mode_eframe)
x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
- (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+ (bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs),
bt, ofp);
}
@@ -4407,10 +4408,11 @@ in_exception_stack:
if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
(GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
+ long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
user_mode_eframe = bt->stacktop - SIZE(pt_regs);
if (last_process_stack_eframe < user_mode_eframe)
x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
- (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+ (bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs),
bt, ofp);
}
Thanks
Lianbo
[
1]:https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-...
[
2]:https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/th...
Signed-off-by: Tao Liu<ltao(a)redhat.com>
---
x86_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 82 insertions(+), 2 deletions(-)
diff --git a/x86_64.c b/x86_64.c
index 0c21eb8..43a31c2 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong);
static orc_entry *orc_module_find(ulong);
static ulong ip_table_to_vaddr(ulong);
static void orc_dump(ulong);
+static long top_of_kernel_stack_padding(void);
struct machine_specific x86_64_machine_specific = { 0 };
@@ -4089,7 +4090,8 @@ in_exception_stack:
user_mode_eframe = bt->stacktop - SIZE(pt_regs);
if (last_process_stack_eframe < user_mode_eframe)
x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
- (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+ (bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
+ top_of_kernel_stack_padding(),
bt, ofp);
}
@@ -4410,7 +4412,8 @@ in_exception_stack:
user_mode_eframe = bt->stacktop - SIZE(pt_regs);
if (last_process_stack_eframe < user_mode_eframe)
x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
- (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+ (bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
+ top_of_kernel_stack_padding(),
bt, ofp);
}
@@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry)
return SWP_OFFSET(entry);
}
+static long
+top_of_kernel_stack_padding(void)
+{
+ char buf1[BUFSIZE];
+ char *cursor;
+ long final_value, ptregs_size_value;
+ char *arglist[MAXARGS];
+ bool found = FALSE;
+
+ static long kernel_stack_padding = -1;
+
+ if (kernel_stack_padding >= 0)
+ return kernel_stack_padding;
+
+ /*
+ * startup_64:
+ * ...
+ * mov %rsi,%r15
+ * leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
+ */
+ sprintf(buf1, "disass /r startup_64");
+ open_tmpfile2();
+ if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
+ kernel_stack_padding = 0;
+ goto out;
+ }
+
+ rewind(pc->tmpfile2);
+ while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) {
+ // machine code of "mov %rsi,%r15"
+ if (strstr(buf1, "49 89 f7"))
+ found = TRUE;
+ }
+ if (!found || !(cursor = strstr(buf1, "# 0x"))) {
+ kernel_stack_padding = 0;
+ goto out;
+ }
+
+ parse_line(cursor, arglist);
+ final_value = stol(arglist[1], FAULT_ON_ERROR, NULL);
+
+ /*
+ * rewind_stack_and_make_dead:
+ * ...
+ * leaq -PTREGS_SIZE(%rax), %rsp
+ */
+ found = FALSE;
+ rewind(pc->tmpfile2);
+ sprintf(buf1, "disass rewind_stack_and_make_dead");
+ if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
+ kernel_stack_padding = 0;
+ goto out;
+ }
+ rewind(pc->tmpfile2);
+ while (fgets(buf1, BUFSIZE, pc->tmpfile2)) {
+ // find leaq -PTREGS_SIZE(%rax), %rsp
+ if (strstr(buf1, "lea") && (cursor = strstr(buf1, "-0x")))
{
+ parse_line(cursor, arglist);
+ char *p = strchr(arglist[0], '(');
+ *p = '\0';
+ ptregs_size_value = stol(arglist[0] + 1, FAULT_ON_ERROR, NULL);
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ kernel_stack_padding = 0;
+ goto out;
+ }
+
+ struct syment *s = symbol_search("__end_init_task");
+ kernel_stack_padding = s->value - final_value - ptregs_size_value;
+out:
+ close_tmpfile2();
+ return kernel_stack_padding;
+}
+
#endif /* X86_64 */
-- 2.40.1