On Fri, May 31, 2024 at 5:21 PM <devel-request@lists.crash-utility.osci.io> wrote:
Date: Fri, 31 May 2024 13:14:02 +0800
From: Tao Liu <ltao@redhat.com>
Subject: [Crash-utility] Re: [PATCH] x86_64: Add
        top_of_kernel_stack_padding for kernel stack
To: Lianbo Jiang <lijiang@redhat.com>
Cc: devel@lists.crash-utility.osci.io
Message-ID:
        <CAO7dBbWZgeiJBN2sv-3e00TSU2uO_kMbSZ_NGVCft6xzdj3EMA@mail.gmail.com>
Content-Type: text/plain; charset="UTF-8"

Hi Lianbo,

On Mon, May 27, 2024 at 11:30 AM Lianbo Jiang <lijiang@redhat.com> wrote:
>
> Hi, Tao
>
> Thank you for the fix.
>
> On 5/23/24 12:06 PM, devel-request@lists.crash-utility.osci.io wrote:
> > Date: Thu, 23 May 2024 12:06:03 +0800
> > From: Tao Liu<ltao@redhat.com>
> > Subject: [Crash-utility] [PATCH] x86_64: Add
> >       top_of_kernel_stack_padding for kernel stack
> > To:devel@lists.crash-utility.osci.io
> > Cc: Tao Liu<ltao@redhat.com>
> > Message-ID:<20240523040603.10304-1-ltao@redhat.com>
> > Content-Type: text/plain; charset="US-ASCII"; x-default=true
> >
> > With kernel patch [1], x86_64 will add extra padding for kernel stack,
> > as a result, the pt_regs will be shift down by the offset of padding.
> > Without the patch, the values of registers read from pt_regs will be
> > incorrect.
> >
> > Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by
> > Kconfig, according to kernel code comment [2], the value may be made
> > dynamicly later. In addition there might be systems compiled without
> > Kconfig avaliable. So in this patch, we will calculate the value of
> > TOP_OF_KERNEL_STACK_PADDING.
> >
> > The calculation is as follows:
> >
> > 1) in startup_64(), there is a lea instruction as:
> >     leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
> >
> > 2) in rewind_stack_and_make_dead(), there is a lea instruction as:
> >     leaq      -PTREGS_SIZE(%rax), %rsp
> >
> > The disassembled 2 instructions will be like:
> >
> > 1) 0xffffffff93a0007d <startup_64+3>:      lea    0x1e03ec4(%rip),%rsp        # 0xffffffff95803f48
> >                                                                                ^^^^^^^^^^^^^^^^^^^^
> > 2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>:     lea    -0xa8(%rax),%rsp
> >                                                                     ^^^^
> > 0xffffffff95803f48 is the value of (__end_init_task -
> > TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of
> > PTREGS_SIZE, __end_init_task can be get by symbol reading.
>
> Calculating the value of TOP_OF_KERNEL_STACK_PADDING, which looks good, but it heavily relies on compiler.
> Normally we would use this way unless there is no other choice.
>
> How about the following changes? Although it doesn't handle the case that the value is dynamic, let's see
> how to change in the kernel in future, and then consider how to reflect it in crash-utility.
>
Sure, looks good to me, so let's go with this, and update it later
when kernel changes.

Ok. Thanks, Tao.

Applied with minor changes:

https://github.com/crash-utility/crash/commit/48764a14bc5856f0b0bb30685336c68b832154fc

Lianbo
 

Thanks,
Tao Liu

>
> diff --git a/defs.h b/defs.h
> index 01f316e67dde..42d875965256 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2414,6 +2414,7 @@ struct size_table {         /* stash of commonly-used sizes */
>         long maple_tree;
>         long maple_node;
>         long module_memory;
> +       long fred_frame;
>   };
>
>   struct array_table {
> diff --git a/kernel.c b/kernel.c
> index 1728b70c1b5c..cd3d6044cc9a 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -668,6 +668,7 @@ kernel_init()
>         STRUCT_SIZE_INIT(softirq_state, "softirq_state");
>         STRUCT_SIZE_INIT(softirq_action, "softirq_action");
>         STRUCT_SIZE_INIT(desc_struct, "desc_struct");
> +       STRUCT_SIZE_INIT(fred_frame, "fred_frame");
>
>         STRUCT_SIZE_INIT(char_device_struct, "char_device_struct");
>         if (VALID_STRUCT(char_device_struct)) {
> diff --git a/x86_64.c b/x86_64.c
> index 0c21eb827e4a..6777c93e6b47 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -4086,10 +4086,11 @@ in_exception_stack:
>
>           if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
>               (GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
> +               long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
>                 user_mode_eframe = bt->stacktop - SIZE(pt_regs);
>                 if (last_process_stack_eframe < user_mode_eframe)
>                         x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> -                               (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> +                               (bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs),
>                                 bt, ofp);
>         }
>
> @@ -4407,10 +4408,11 @@ in_exception_stack:
>
>           if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
>               (GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
> +               long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
>                 user_mode_eframe = bt->stacktop - SIZE(pt_regs);
>                 if (last_process_stack_eframe < user_mode_eframe)
>                         x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> -                               (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> +                               (bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs),
>                                 bt, ofp);
>         }
>
> Thanks
> Lianbo
>
> > [1]:https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-bot2@tip-bot2/
> > [2]:https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/thread_info.h#L34
> >
> > Signed-off-by: Tao Liu<ltao@redhat.com>
> > ---
> >   x86_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> >   1 file changed, 82 insertions(+), 2 deletions(-)
> >
> > diff --git a/x86_64.c b/x86_64.c
> > index 0c21eb8..43a31c2 100644
> > --- a/x86_64.c
> > +++ b/x86_64.c
> > @@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong);
> >   static orc_entry *orc_module_find(ulong);
> >   static ulong ip_table_to_vaddr(ulong);
> >   static void orc_dump(ulong);
> > +static long top_of_kernel_stack_padding(void);
> >
> >   struct machine_specific x86_64_machine_specific = { 0 };
> >
> > @@ -4089,7 +4090,8 @@ in_exception_stack:
> >               user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> >               if (last_process_stack_eframe < user_mode_eframe)
> >                       x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> > -                             (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> > +                             (bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
> > +                             top_of_kernel_stack_padding(),
> >                               bt, ofp);
> >       }
> >
> > @@ -4410,7 +4412,8 @@ in_exception_stack:
> >               user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> >               if (last_process_stack_eframe < user_mode_eframe)
> >                       x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
> > -                             (bt->stacktop - bt->stackbase) - SIZE(pt_regs),
> > +                             (bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
> > +                             top_of_kernel_stack_padding(),
> >                               bt, ofp);
> >       }
> >
> > @@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry)
> >       return SWP_OFFSET(entry);
> >   }
> >
> > +static long
> > +top_of_kernel_stack_padding(void)
> > +{
> > +     char buf1[BUFSIZE];
> > +     char *cursor;
> > +     long final_value, ptregs_size_value;
> > +     char *arglist[MAXARGS];
> > +     bool found = FALSE;
> > +
> > +     static long kernel_stack_padding = -1;
> > +
> > +     if (kernel_stack_padding >= 0)
> > +             return kernel_stack_padding;
> > +
> > +     /*
> > +     * startup_64:
> > +     * ...
> > +     * mov %rsi,%r15
> > +     * leaq  (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
> > +     */
> > +     sprintf(buf1, "disass /r startup_64");
> > +     open_tmpfile2();
> > +     if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
> > +             kernel_stack_padding = 0;
> > +             goto out;
> > +     }
> > +
> > +     rewind(pc->tmpfile2);
> > +     while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) {
> > +             // machine code of "mov %rsi,%r15"
> > +             if (strstr(buf1, "49 89 f7"))
> > +                     found = TRUE;
> > +     }
> > +     if (!found || !(cursor = strstr(buf1, "# 0x"))) {
> > +             kernel_stack_padding = 0;
> > +             goto out;
> > +     }
> > +
> > +     parse_line(cursor, arglist);
> > +     final_value = stol(arglist[1], FAULT_ON_ERROR, NULL);
> > +
> > +     /*
> > +     * rewind_stack_and_make_dead:
> > +     * ...
> > +     * leaq  -PTREGS_SIZE(%rax), %rsp
> > +     */
> > +     found = FALSE;
> > +     rewind(pc->tmpfile2);
> > +     sprintf(buf1, "disass rewind_stack_and_make_dead");
> > +     if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
> > +             kernel_stack_padding = 0;
> > +             goto out;
> > +     }
> > +     rewind(pc->tmpfile2);
> > +     while (fgets(buf1, BUFSIZE, pc->tmpfile2)) {
> > +             // find leaq -PTREGS_SIZE(%rax), %rsp
> > +             if (strstr(buf1, "lea") && (cursor = strstr(buf1, "-0x"))) {
> > +                     parse_line(cursor, arglist);
> > +                     char *p = strchr(arglist[0], '(');
> > +                     *p = '\0';
> > +                     ptregs_size_value = stol(arglist[0] + 1, FAULT_ON_ERROR, NULL);
> > +                     found = TRUE;
> > +                     break;
> > +             }
> > +     }
> > +     if (!found) {
> > +             kernel_stack_padding = 0;
> > +             goto out;
> > +     }
> > +
> > +     struct syment *s = symbol_search("__end_init_task");
> > +     kernel_stack_padding = s->value - final_value - ptregs_size_value;
> > +out:
> > +     close_tmpfile2();
> > +     return kernel_stack_padding;
> > +}
> > +
> >   #endif  /* X86_64 */
> > -- 2.40.1
>