Date: Fri, 31 May 2024 13:14:02 +0800
From: Tao Liu <ltao@redhat.com>
Subject: [Crash-utility] Re: [PATCH] x86_64: Add
top_of_kernel_stack_padding for kernel stack
To: Lianbo Jiang <lijiang@redhat.com>
Cc: devel@lists.crash-utility.osci.io
Message-ID:
<CAO7dBbWZgeiJBN2sv-3e00TSU2uO_kMbSZ_NGVCft6xzdj3EMA@mail.gmail.com>
Content-Type: text/plain; charset="UTF-8"
Hi Lianbo,
On Mon, May 27, 2024 at 11:30 AM Lianbo Jiang <lijiang@redhat.com> wrote:
>
> Hi, Tao
>
> Thank you for the fix.
>
> On 5/23/24 12:06 PM, devel-request@lists.crash-utility.osci.io wrote:
> > Date: Thu, 23 May 2024 12:06:03 +0800
> > From: Tao Liu<ltao@redhat.com>
> > Subject: [Crash-utility] [PATCH] x86_64: Add
> > top_of_kernel_stack_padding for kernel stack
> > To:devel@lists.crash-utility.osci.io
> > Cc: Tao Liu<ltao@redhat.com>
> > Message-ID:<20240523040603.10304-1-ltao@redhat.com>
> > Content-Type: text/plain; charset="US-ASCII"; x-default=true
> >
> > With kernel patch [1], x86_64 will add extra padding for kernel stack,
> > as a result, the pt_regs will be shift down by the offset of padding.
> > Without the patch, the values of registers read from pt_regs will be
> > incorrect.
> >
> > Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by
> > Kconfig, according to kernel code comment [2], the value may be made
> > dynamicly later. In addition there might be systems compiled without
> > Kconfig avaliable. So in this patch, we will calculate the value of
> > TOP_OF_KERNEL_STACK_PADDING.
> >
> > The calculation is as follows:
> >
> > 1) in startup_64(), there is a lea instruction as:
> > leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
> >
> > 2) in rewind_stack_and_make_dead(), there is a lea instruction as:
> > leaq -PTREGS_SIZE(%rax), %rsp
> >
> > The disassembled 2 instructions will be like:
> >
> > 1) 0xffffffff93a0007d <startup_64+3>: lea 0x1e03ec4(%rip),%rsp # 0xffffffff95803f48
> > ^^^^^^^^^^^^^^^^^^^^
> > 2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>: lea -0xa8(%rax),%rsp
> > ^^^^
> > 0xffffffff95803f48 is the value of (__end_init_task -
> > TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of
> > PTREGS_SIZE, __end_init_task can be get by symbol reading.
>
> Calculating the value of TOP_OF_KERNEL_STACK_PADDING, which looks good, but it heavily relies on compiler.
> Normally we would use this way unless there is no other choice.
>
> How about the following changes? Although it doesn't handle the case that the value is dynamic, let's see
> how to change in the kernel in future, and then consider how to reflect it in crash-utility.
>
Sure, looks good to me, so let's go with this, and update it later
when kernel changes.
Ok. Thanks, Tao.
Applied with minor changes:
Lianbo
Thanks,
Tao Liu
>
> diff --git a/defs.h b/defs.h
> index 01f316e67dde..42d875965256 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2414,6 +2414,7 @@ struct size_table { /* stash of commonly-used sizes */
> long maple_tree;
> long maple_node;
> long module_memory;
> + long fred_frame;
> };
>
> struct array_table {
> diff --git a/kernel.c b/kernel.c
> index 1728b70c1b5c..cd3d6044cc9a 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -668,6 +668,7 @@ kernel_init()
> STRUCT_SIZE_INIT(softirq_state, "softirq_state");
> STRUCT_SIZE_INIT(softirq_action, "softirq_action");
> STRUCT_SIZE_INIT(desc_struct, "desc_struct");
> + STRUCT_SIZE_INIT(fred_frame, "fred_frame");
>
> STRUCT_SIZE_INIT(char_device_struct, "char_device_struct");
> if (VALID_STRUCT(char_device_struct)) {
> diff --git a/x86_64.c b/x86_64.c
> index 0c21eb827e4a..6777c93e6b47 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -4086,10 +4086,11 @@ in_exception_stack:
>
> if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
> (GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
> + long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
> user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> if (last_process_stack_eframe < user_mode_eframe)
> x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> - (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> + (bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs),
> bt, ofp);
> }
>
> @@ -4407,10 +4408,11 @@ in_exception_stack:
>
> if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
> (GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
> + long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
> user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> if (last_process_stack_eframe < user_mode_eframe)
> x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> - (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> + (bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs),
> bt, ofp);
> }
>
> Thanks
> Lianbo
>
> > [1]:https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-bot2@tip-bot2/
> > [2]:https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/thread_info.h#L34
> >
> > Signed-off-by: Tao Liu<ltao@redhat.com>
> > ---
> > x86_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 82 insertions(+), 2 deletions(-)
> >
> > diff --git a/x86_64.c b/x86_64.c
> > index 0c21eb8..43a31c2 100644
> > --- a/x86_64.c
> > +++ b/x86_64.c
> > @@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong);
> > static orc_entry *orc_module_find(ulong);
> > static ulong ip_table_to_vaddr(ulong);
> > static void orc_dump(ulong);
> > +static long top_of_kernel_stack_padding(void);
> >
> > struct machine_specific x86_64_machine_specific = { 0 };
> >
> > @@ -4089,7 +4090,8 @@ in_exception_stack:
> > user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> > if (last_process_stack_eframe < user_mode_eframe)
> > x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> > - (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> > + (bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
> > + top_of_kernel_stack_padding(),
> > bt, ofp);
> > }
> >
> > @@ -4410,7 +4412,8 @@ in_exception_stack:
> > user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> > if (last_process_stack_eframe < user_mode_eframe)
> > x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> > - (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> > + (bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
> > + top_of_kernel_stack_padding(),
> > bt, ofp);
> > }
> >
> > @@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry)
> > return SWP_OFFSET(entry);
> > }
> >
> > +static long
> > +top_of_kernel_stack_padding(void)
> > +{
> > + char buf1[BUFSIZE];
> > + char *cursor;
> > + long final_value, ptregs_size_value;
> > + char *arglist[MAXARGS];
> > + bool found = FALSE;
> > +
> > + static long kernel_stack_padding = -1;
> > +
> > + if (kernel_stack_padding >= 0)
> > + return kernel_stack_padding;
> > +
> > + /*
> > + * startup_64:
> > + * ...
> > + * mov %rsi,%r15
> > + * leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
> > + */
> > + sprintf(buf1, "disass /r startup_64");
> > + open_tmpfile2();
> > + if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
> > + kernel_stack_padding = 0;
> > + goto out;
> > + }
> > +
> > + rewind(pc->tmpfile2);
> > + while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) {
> > + // machine code of "mov %rsi,%r15"
> > + if (strstr(buf1, "49 89 f7"))
> > + found = TRUE;
> > + }
> > + if (!found || !(cursor = strstr(buf1, "# 0x"))) {
> > + kernel_stack_padding = 0;
> > + goto out;
> > + }
> > +
> > + parse_line(cursor, arglist);
> > + final_value = stol(arglist[1], FAULT_ON_ERROR, NULL);
> > +
> > + /*
> > + * rewind_stack_and_make_dead:
> > + * ...
> > + * leaq -PTREGS_SIZE(%rax), %rsp
> > + */
> > + found = FALSE;
> > + rewind(pc->tmpfile2);
> > + sprintf(buf1, "disass rewind_stack_and_make_dead");
> > + if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
> > + kernel_stack_padding = 0;
> > + goto out;
> > + }
> > + rewind(pc->tmpfile2);
> > + while (fgets(buf1, BUFSIZE, pc->tmpfile2)) {
> > + // find leaq -PTREGS_SIZE(%rax), %rsp
> > + if (strstr(buf1, "lea") && (cursor = strstr(buf1, "-0x"))) {
> > + parse_line(cursor, arglist);
> > + char *p = strchr(arglist[0], '(');
> > + *p = '\0';
> > + ptregs_size_value = stol(arglist[0] + 1, FAULT_ON_ERROR, NULL);
> > + found = TRUE;
> > + break;
> > + }
> > + }
> > + if (!found) {
> > + kernel_stack_padding = 0;
> > + goto out;
> > + }
> > +
> > + struct syment *s = symbol_search("__end_init_task");
> > + kernel_stack_padding = s->value - final_value - ptregs_size_value;
> > +out:
> > + close_tmpfile2();
> > + return kernel_stack_padding;
> > +}
> > +
> > #endif /* X86_64 */
> > -- 2.40.1
>