Hi Yeping,
Thanks for the patch.
I re-thought about the issue. I want to ask a few questions:
1) What's your kernel version? Because I checked the latest kernel
source 6.10, that the related code doesn't change:
* #define IRQ_STACK_SIZE THREAD_SIZE
*
* and finding a solid usage of THREAD_SIZE is hard, but:
*
* union thread_union {
* ...
* unsigned long stack[THREAD_SIZE/sizeof(long)];
* };
So I guess we can still get the THREAD_SIZE by the old crash code right?
if (MEMBER_EXISTS("thread_union", "stack")) {
if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0)
ms->irq_stack_size = sz;
Could you please debug with your vmcore why code doesn't enter the
above if conditions, so it goes into your arm64_set_irq_stack_size()
check?
2) The commit message seems mis-formatted in your attachment patch.
Thanks,
Tao Liu
On Wed, Jul 17, 2024 at 5:16 PM yp z <wonderzyp@gmail.com> wrote:
>
> Hi Tao,
> Thank you for your suggestions for patch. I followed your suggestion and provided a new patch. Please help to review it, thanks.
>
> From 831701099a7097662ddec9de464131ad50c7134b Mon Sep 17 00:00:00 2001
> From: wonderzyp <wonderzyp@qq.com>
> Date: Wed, 17 Jul 2024 11:02:06 +0800
> Subject: [PATCH] A segfault issue was observed on KASAN enabled arm64 kernel
> due to the incorrect irq_stack_size, see the following stack trace: > (gdb)
> bt > #0 0x00005635ac2b166b in arm64_unwind_frame (frame=0x7ffdaf35cb70,
> bt=0x7ffdaf35d430) > at arm64.c:2821 > #1 arm64_back_trace_cmd
> (bt=0x7ffdaf35d430) at arm64.c:3306 > #2 0x00005635ac27b108 in back_trace
> (bt=bt@entry=0x7ffdaf35d430) at kernel.c:3239 > #3 0x00005635ac2880ae in
> cmd_bt () at kernel.c:2863 > #4 0x00005635ac1f16dc in exec_command () at
> main.c:893 > #5 0x00005635ac1f192a in main_loop () at main.c:840 > #6
> 0x00005635ac50df81 in captured_main (data=<optimized out>) at main.c:1284 >
> #7 gdb_main (args=<optimized out>) at main.c:1313 > #8 0x00005635ac50e000
> in gdb_main_entry (argc=<optimized out>, argv=<optimized out>) > at
> main.c:1338 > #9 0x00005635ac1ea2a5 in main (argc=5, argv=0x7ffdaf35dde8) at
> main.c:721
>
> The issue was caused by not setting irq_stack_size correctly, and this patch will fix the issue by two ways:
> 1. if CONFIG_IKCONFIG is set, calculate the irq_stack_size according to kernel source code
> 2. if CONFIG_IKCONFIG is not set, get THREAD_SHIFT value by disassembling the tbnz instruction to calculate the irq_stack_size
>
> Signed-off-by: Yeping.Zheng <yeping.zheng@nio.com>
> ---
> arm64.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 84 insertions(+), 2 deletions(-)
>
> diff --git a/arm64.c b/arm64.c
> index b3040d7..8347ba1 100644
> --- a/arm64.c
> +++ b/arm64.c
> @@ -93,6 +93,7 @@ static void arm64_calc_VA_BITS(void);
> static int arm64_is_uvaddr(ulong, struct task_context *);
> static void arm64_calc_KERNELPACMASK(void);
> static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char *label, int base);
> +static ulong arm64_set_irq_stack_size(struct machine_specific *ms);
>
> struct kernel_range {
> unsigned long modules_vaddr, modules_end;
> @@ -2223,8 +2224,10 @@ arm64_irq_stack_init(void)
> if (MEMBER_EXISTS("thread_union", "stack")) {
> if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0)
> ms->irq_stack_size = sz;
> - } else
> - ms->irq_stack_size = ARM64_IRQ_STACK_SIZE;
> + } else {
> + ulong res = arm64_set_irq_stack_size(ms);
> + ms->irq_stack_size = (res > 0) ? res : ARM64_IRQ_STACK_SIZE;
> + }
>
> machdep->flags |= IRQ_STACKS;
>
> @@ -4921,6 +4924,85 @@ static void arm64_calc_KERNELPACMASK(void)
> }
> }
>
> +static ulong arm64_set_irq_stack_size(struct machine_specific *ms)
> +{
> + char *string;
> + int ret;
> + int kasan_thread_shift = 0;
> + int min_thread_shift;
> + ulong arm64_page_shift;
> + ulong thread_shift = 0;
> + ulong thread_size;
> + struct syment *sp;
> + const char* tbnz_str = "tbnz";
> +
> + if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
> + if ((ret = get_kernel_config("CONFIG_KASAN_GENERIC", NULL) == IKCONFIG_Y) ||
> + (ret = get_kernel_config("CONFIG_KASAN_SW_TAGS", NULL) == IKCONFIG_Y)) {
> + kasan_thread_shift = 1;
> + }
> + min_thread_shift = 14 + kasan_thread_shift;
> +
> + if ((ret = get_kernel_config("CONFIG_VMAP_STACK", NULL)) == IKCONFIG_Y){
> + if ((ret = get_kernel_config("CONFIG_ARM64_PAGE_SHIFT", &string)) == IKCONFIG_STR){
> + arm64_page_shift = atol(string);
> + }
> + if (min_thread_shift < arm64_page_shift){
> + thread_shift = arm64_page_shift;
> + } else {
> + thread_shift = min_thread_shift;
> + }
> + }
> + } else {
> +
> + if (!(sp = symbol_search("vectors"))) {
> + return -1;
> + }
> +
> + const char* tbnz_str = "tbnz";
> + struct gnu_request *req;
> + req = (struct gnu_request *)GETBUF(sizeof(struct gnu_request));
> + req->command = GNU_PASS_THROUGH;
> + req->buf = GETBUF(BUFSIZE);
> + strcat(req->buf, "x/1024i ");
> +
> + char tmp[100];
> + sprintf(tmp, "0x%lx", sp->value);
> + strcat(req->buf, tmp);
> + req->flags = (GNU_RETURN_ON_ERROR);
> + open_tmpfile();
> + req->fp = pc->tmpfile;
> + gdb_interface(req);
> +
> +
> + rewind(pc->tmpfile);
> + char line[BUFSIZE];
> + while (fgets(line, BUFSIZE, pc->tmpfile) != NULL) {
> + // printf("the buffer is not null\n");
> + char* tbnz_pos = strstr(line, tbnz_str);
> + if (tbnz_pos != NULL) {
> + char* thread_shift_pos = strstr(tbnz_pos, "#");
> + if (sscanf(thread_shift_pos + 1, "%ld", &thread_shift) == 1) {
> + if (CRASHDEBUG(1)){
> + error(INFO, "Detect thread shift via tbnz %ld\n", thread_shift);
> + }
> + break;
> + }
> + }
> + }
> + close_tmpfile();
> + FREEBUF(req->buf);
> + FREEBUF(req);
> + }
> +
> + if (thread_shift == 0) {
> + return -1;
> + }
> +
> + thread_size = ((1UL) << thread_shift);
> + return thread_size;
> +}
> +
> #endif /* ARM64 */
>
>
> --
> 2.25.1
>
>
> Tao Liu <ltao@redhat.com> 于2024年7月16日周二 16:22写道:
>>
>> Hi Yeping,
>>
>> Thanks for the fix.
>>
>> On Thu, Jul 11, 2024 at 1:38 PM <wonderzyp@gmail.com> wrote:
>> >
>> > When using the crash tool to parse the ARM64 dump file with KASAN enabled, I found that using the bt -a command will cause this tool to crash, the following is the backtrace infomation.
>> >
>> > (gdb) bt
>> > #0 0x00005635ac2b166b in arm64_unwind_frame (frame=0x7ffdaf35cb70, bt=0x7ffdaf35d430)
>> > at arm64.c:2821
>> > #1 arm64_back_trace_cmd (bt=0x7ffdaf35d430) at arm64.c:3306
>> > #2 0x00005635ac27b108 in back_trace (bt=bt@entry=0x7ffdaf35d430) at kernel.c:3239
>> > #3 0x00005635ac2880ae in cmd_bt () at kernel.c:2863
>> > #4 0x00005635ac1f16dc in exec_command () at main.c:893
>> > #5 0x00005635ac1f192a in main_loop () at main.c:840
>> > #6 0x00005635ac50df81 in captured_main (data=<optimized out>) at main.c:1284
>> > #7 gdb_main (args=<optimized out>) at main.c:1313
>> > #8 0x00005635ac50e000 in gdb_main_entry (argc=<optimized out>, argv=<optimized out>)
>> > at main.c:1338
>> > #9 0x00005635ac1ea2a5 in main (argc=5, argv=0x7ffdaf35dde8) at main.c:721
>> > Eventually, I found that it was may caused by not setting irq_stack_size properly, and provide this patch to solve it.
>> >
>>
>> Could you please re-draft your commit message? The original one looks
>> informal. E.g:
>>
>> A segfault issue was observed on KASAN enabled arm64 kernel due to the
>> incorrect irq_stack_size, see the following stack trace:
>> ...
>> The issue was caused by ...., and this patch will fix the issue by ....
>>
>> >
>> > From 34b28aa8c11e77d20adec4f7705a14d239c8a55f Mon Sep 17 00:00:00 2001
>> > From: wonderzyp <wonderzyp@qq.com>
>> > Date: Mon, 8 Jul 2024 20:11:38 +0800
>> > Subject: [PATCH 1131/1131] set_arm64_irq_stack_size
>> >
>> > Signed-off-by: Yeping Zheng <wonderzyp@gmail.com>
>> > ---
>> > arm64.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
>> > 1 file changed, 47 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/arm64.c b/arm64.c
>> > index b3040d7..39d891b 100644
>> > --- a/arm64.c
>> > +++ b/arm64.c
>> > @@ -93,6 +93,7 @@ static void arm64_calc_VA_BITS(void);
>> > static int arm64_is_uvaddr(ulong, struct task_context *);
>> > static void arm64_calc_KERNELPACMASK(void);
>> > static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char *label, int base);
>> > +static ulong arm64_set_irq_stack_size(struct machine_specific *ms);
>> >
>> > struct kernel_range {
>> > unsigned long modules_vaddr, modules_end;
>> > @@ -2223,8 +2224,14 @@ arm64_irq_stack_init(void)
>> > if (MEMBER_EXISTS("thread_union", "stack")) {
>> > if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0)
>> > ms->irq_stack_size = sz;
>> > - } else
>> > - ms->irq_stack_size = ARM64_IRQ_STACK_SIZE;
>> > + } else {
>> > + ulong res = arm64_set_irq_stack_size(ms);
>> > + if (res > 0){
>> > + ms->irq_stack_size = res;
>> > + } else {
>> > + ms->irq_stack_size = ARM64_IRQ_STACK_SIZE;
>> > + }
>> > + }
>> >
>> > machdep->flags |= IRQ_STACKS;
>> >
>> > @@ -4921,6 +4928,44 @@ static void arm64_calc_KERNELPACMASK(void)
>> > }
>> > }
>> >
>> > +static ulong arm64_set_irq_stack_size(struct machine_specific *ms)
>> > +{
>> > + char *string;
>> > + int ret;
>> > + int KASAN_THREAD_SHIFT = 0;
>> > + int MIN_THREAD_SHIFT;
>> > + ulong ARM64_PAGE_SHIFT;
>> > + ulong THREAD_SHIFT = 0;
>> > + ulong THREAD_SIZE;
>>
>> I guess the upper case of variable names is not encouraged, though it
>> is the variable that comes from kernel config file.
>>
>> > + if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
>> > + if ((ret = get_kernel_config("CONFIG_KASAN_GENERIC", NULL) == IKCONFIG_Y) ||
>> > + (ret = get_kernel_config("CONFIG_KASAN_SW_TAGS", NULL) == IKCONFIG_Y)) {
>> > + KASAN_THREAD_SHIFT = 1;
>> > + }
>> > + }
>> > + MIN_THREAD_SHIFT = 14 + KASAN_THREAD_SHIFT;
>> > +
>> > + if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
>>
>> Could the if condition be merged with the prior one?
>>
>> > + if ((ret = get_kernel_config("CONFIG_VMAP_STACK", NULL)) == IKCONFIG_Y){
>> > + if ((ret = get_kernel_config("CONFIG_ARM64_PAGE_SHIFT", &string)) == IKCONFIG_STR){
>> > + ARM64_PAGE_SHIFT = atol(string);
>> > + }
>> > + if (MIN_THREAD_SHIFT < ARM64_PAGE_SHIFT){
>> > + THREAD_SHIFT = ARM64_PAGE_SHIFT;
>> > + } else {
>> > + THREAD_SHIFT = MIN_THREAD_SHIFT;
>> > + }
>> > + }
>> > + }
>> > +
>> > + if (THREAD_SHIFT == 0) {
>> > + return -1;
>> > + }
>> > +
>> > + THREAD_SIZE = ((1UL) << THREAD_SHIFT);
>> > + return THREAD_SIZE;
>> > +}
>>
>> I'm OK with the approach above, since it directly came from the kernel
>> source. However I'm not a fan of checking kernel configs, there might
>> be kernels which are compiled without CONFIG_IKCONFIG.
>>
>> Could we add an approach here, to get the value from disassembly when
>> CONFIG_IKCONFIG is negative?
>>
>> kernel source: arch/arm64/kernel/entry.S:
>>
>> .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
>> ....
>> add sp, sp, x0 // sp' = sp + x0
>> sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
>> tbnz x0, #THREAD_SHIFT, 0f <<<<<<<<
>>
>> $ objdump -d vmlinux
>> ...
>> ffff800080010800 <vectors>:
>> ffff800080010800: d10543ff sub sp, sp, #0x150
>> ffff800080010804: 8b2063ff add sp, sp, x0
>> ffff800080010808: cb2063e0 sub x0, sp, x0
>> ffff80008001080c: 37800080 tbnz w0, #16,
>> ffff80008001081c <vectors+0x1c> <<<<<<<<<<
>>
>> It is easy to get the THREAD_SHIFT value by disassembling the tbnz
>> instruction. What do you think @Lianbo Jiang
>>
>> Thanks,
>> Tao Liu
>>
>> > +
>> > #endif /* ARM64 */
>> >
>> >
>> > --
>> > 2.25.1
>> > --
>> > Crash-utility mailing list -- devel@lists.crash-utility.osci.io
>> > To unsubscribe send an email to devel-leave@lists.crash-utility.osci.io
>> > https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
>> > Contribution Guidelines: https://github.com/crash-utility/crash/wiki
>>