Hi Lianbo,
On Mon, May 27, 2024 at 11:30 AM Lianbo Jiang <lijiang(a)redhat.com> wrote:
Hi, Tao
Thank you for the fix.
On 5/23/24 12:06 PM, devel-request(a)lists.crash-utility.osci.io wrote:
> Date: Thu, 23 May 2024 12:06:03 +0800
> From: Tao Liu<ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH] x86_64: Add
> top_of_kernel_stack_padding for kernel stack
> To:devel@lists.crash-utility.osci.io
> Cc: Tao Liu<ltao(a)redhat.com>
> Message-ID:<20240523040603.10304-1-ltao@redhat.com>
> Content-Type: text/plain; charset="US-ASCII"; x-default=true
>
> With kernel patch [1], x86_64 will add extra padding for kernel stack,
> as a result, the pt_regs will be shift down by the offset of padding.
> Without the patch, the values of registers read from pt_regs will be
> incorrect.
>
> Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by
> Kconfig, according to kernel code comment [2], the value may be made
> dynamicly later. In addition there might be systems compiled without
> Kconfig avaliable. So in this patch, we will calculate the value of
> TOP_OF_KERNEL_STACK_PADDING.
>
> The calculation is as follows:
>
> 1) in startup_64(), there is a lea instruction as:
> leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
>
> 2) in rewind_stack_and_make_dead(), there is a lea instruction as:
> leaq -PTREGS_SIZE(%rax), %rsp
>
> The disassembled 2 instructions will be like:
>
> 1) 0xffffffff93a0007d <startup_64+3>: lea 0x1e03ec4(%rip),%rsp
# 0xffffffff95803f48
>
^^^^^^^^^^^^^^^^^^^^
> 2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>: lea
-0xa8(%rax),%rsp
> ^^^^
> 0xffffffff95803f48 is the value of (__end_init_task -
> TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of
> PTREGS_SIZE, __end_init_task can be get by symbol reading.
Calculating the value of TOP_OF_KERNEL_STACK_PADDING, which looks good, but it heavily
relies on compiler.
Normally we would use this way unless there is no other choice.
How about the following changes? Although it doesn't handle the case that the value
is dynamic, let's see
how to change in the kernel in future, and then consider how to reflect it in
crash-utility.
Sure, looks good to me, so let's go with this, and update it later
when kernel changes.
Thanks,
Tao Liu
diff --git a/defs.h b/defs.h
index 01f316e67dde..42d875965256 100644
--- a/defs.h
+++ b/defs.h
@@ -2414,6 +2414,7 @@ struct size_table { /* stash of commonly-used sizes */
long maple_tree;
long maple_node;
long module_memory;
+ long fred_frame;
};
struct array_table {
diff --git a/kernel.c b/kernel.c
index 1728b70c1b5c..cd3d6044cc9a 100644
--- a/kernel.c
+++ b/kernel.c
@@ -668,6 +668,7 @@ kernel_init()
STRUCT_SIZE_INIT(softirq_state, "softirq_state");
STRUCT_SIZE_INIT(softirq_action, "softirq_action");
STRUCT_SIZE_INIT(desc_struct, "desc_struct");
+ STRUCT_SIZE_INIT(fred_frame, "fred_frame");
STRUCT_SIZE_INIT(char_device_struct, "char_device_struct");
if (VALID_STRUCT(char_device_struct)) {
diff --git a/x86_64.c b/x86_64.c
index 0c21eb827e4a..6777c93e6b47 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -4086,10 +4086,11 @@ in_exception_stack:
if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
(GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
+ long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
user_mode_eframe = bt->stacktop - SIZE(pt_regs);
if (last_process_stack_eframe < user_mode_eframe)
x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
- (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+ (bt->stacktop - stack_padding_size - bt->stackbase)
- SIZE(pt_regs),
bt, ofp);
}
@@ -4407,10 +4408,11 @@ in_exception_stack:
if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
(GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
+ long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
user_mode_eframe = bt->stacktop - SIZE(pt_regs);
if (last_process_stack_eframe < user_mode_eframe)
x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
- (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+ (bt->stacktop - stack_padding_size - bt->stackbase)
- SIZE(pt_regs),
bt, ofp);
}
Thanks
Lianbo
>
[
1]:https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-...
>
[
2]:https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/th...
>
> Signed-off-by: Tao Liu<ltao(a)redhat.com>
> ---
> x86_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 82 insertions(+), 2 deletions(-)
>
> diff --git a/x86_64.c b/x86_64.c
> index 0c21eb8..43a31c2 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong);
> static orc_entry *orc_module_find(ulong);
> static ulong ip_table_to_vaddr(ulong);
> static void orc_dump(ulong);
> +static long top_of_kernel_stack_padding(void);
>
> struct machine_specific x86_64_machine_specific = { 0 };
>
> @@ -4089,7 +4090,8 @@ in_exception_stack:
> user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> if (last_process_stack_eframe < user_mode_eframe)
> x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> - (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> + (bt->stacktop - bt->stackbase) - SIZE(pt_regs)
-
> + top_of_kernel_stack_padding(),
> bt, ofp);
> }
>
> @@ -4410,7 +4412,8 @@ in_exception_stack:
> user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> if (last_process_stack_eframe < user_mode_eframe)
> x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> - (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> + (bt->stacktop - bt->stackbase) - SIZE(pt_regs)
-
> + top_of_kernel_stack_padding(),
> bt, ofp);
> }
>
> @@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry)
> return SWP_OFFSET(entry);
> }
>
> +static long
> +top_of_kernel_stack_padding(void)
> +{
> + char buf1[BUFSIZE];
> + char *cursor;
> + long final_value, ptregs_size_value;
> + char *arglist[MAXARGS];
> + bool found = FALSE;
> +
> + static long kernel_stack_padding = -1;
> +
> + if (kernel_stack_padding >= 0)
> + return kernel_stack_padding;
> +
> + /*
> + * startup_64:
> + * ...
> + * mov %rsi,%r15
> + * leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip),
%rsp
> + */
> + sprintf(buf1, "disass /r startup_64");
> + open_tmpfile2();
> + if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
> + kernel_stack_padding = 0;
> + goto out;
> + }
> +
> + rewind(pc->tmpfile2);
> + while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) {
> + // machine code of "mov %rsi,%r15"
> + if (strstr(buf1, "49 89 f7"))
> + found = TRUE;
> + }
> + if (!found || !(cursor = strstr(buf1, "# 0x"))) {
> + kernel_stack_padding = 0;
> + goto out;
> + }
> +
> + parse_line(cursor, arglist);
> + final_value = stol(arglist[1], FAULT_ON_ERROR, NULL);
> +
> + /*
> + * rewind_stack_and_make_dead:
> + * ...
> + * leaq -PTREGS_SIZE(%rax), %rsp
> + */
> + found = FALSE;
> + rewind(pc->tmpfile2);
> + sprintf(buf1, "disass rewind_stack_and_make_dead");
> + if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
> + kernel_stack_padding = 0;
> + goto out;
> + }
> + rewind(pc->tmpfile2);
> + while (fgets(buf1, BUFSIZE, pc->tmpfile2)) {
> + // find leaq -PTREGS_SIZE(%rax), %rsp
> + if (strstr(buf1, "lea") && (cursor = strstr(buf1,
"-0x"))) {
> + parse_line(cursor, arglist);
> + char *p = strchr(arglist[0], '(');
> + *p = '\0';
> + ptregs_size_value = stol(arglist[0] + 1, FAULT_ON_ERROR,
NULL);
> + found = TRUE;
> + break;
> + }
> + }
> + if (!found) {
> + kernel_stack_padding = 0;
> + goto out;
> + }
> +
> + struct syment *s = symbol_search("__end_init_task");
> + kernel_stack_padding = s->value - final_value - ptregs_size_value;
> +out:
> + close_tmpfile2();
> + return kernel_stack_padding;
> +}
> +
> #endif /* X86_64 */
> -- 2.40.1