On 2022/12/2 15:44, HAGIO KAZUHITO(萩尾 一仁) wrote:
 On 2022/12/01 16:01, Ding Hui wrote:
> We met "bt" cmd on KASAN kernel vmcore display truncated backtraces
> like this:
>
> crash> bt
> PID: 4131   TASK: ffff8001521df000  CPU: 3   COMMAND: "bash"
>    #0 [ffff2000224b0cb0] machine_kexec_prepare at ffff2000200bff4c
>
> After digging the root cause, it turns out that arm64_in_kdump_text()
> found wrong bt->bptr at "machine_kexec" branch.
>
> Disassemble machine_kexec() of KASAN vmlinux (gcc 7.3.0):
>
> crash> dis -x machine_kexec
> 0xffff2000200bff50 <machine_kexec>:     stp     x29, x30, [sp,#-208]!
> 0xffff2000200bff54 <machine_kexec+0x4>: mov     x29, sp
> 0xffff2000200bff58 <machine_kexec+0x8>: stp     x19, x20, [sp,#16]
> 0xffff2000200bff5c <machine_kexec+0xc>: str     x24, [sp,#56]
> 0xffff2000200bff60 <machine_kexec+0x10>:        str     x26, [sp,#72]
> 0xffff2000200bff64 <machine_kexec+0x14>:        mov     x2, #0x8ab3
> 0xffff2000200bff68 <machine_kexec+0x18>:        add     x1, x29, #0x70
> 0xffff2000200bff6c <machine_kexec+0x1c>:        lsr     x1, x1, #3
> 0xffff2000200bff70 <machine_kexec+0x20>:        movk    x2, #0x41b5, lsl #16
> 0xffff2000200bff74 <machine_kexec+0x24>:        mov     x19, #0x200000000000
> 0xffff2000200bff78 <machine_kexec+0x28>:        adrp    x3, 0xffff2000224b0000
> 0xffff2000200bff7c <machine_kexec+0x2c>:        movk    x19, #0xdfff, lsl #48
> 0xffff2000200bff80 <machine_kexec+0x30>:        add     x3, x3, #0xcb0
> 0xffff2000200bff84 <machine_kexec+0x34>:        add     x4, x1, x19
> 0xffff2000200bff88 <machine_kexec+0x38>:        stp     x2, x3, [x29,#112]
> 0xffff2000200bff8c <machine_kexec+0x3c>:        adrp    x2, 0xffff2000200bf000
<swsusp_arch_resume+0x1e8>
> 0xffff2000200bff90 <machine_kexec+0x40>:        add     x2, x2, #0xf50
> 0xffff2000200bff94 <machine_kexec+0x44>:        str     x2, [x29,#128]
> 0xffff2000200bff98 <machine_kexec+0x48>:        mov     w2, #0xf1f1f1f1
> 0xffff2000200bff9c <machine_kexec+0x4c>:        str     w2, [x1,x19]
> 0xffff2000200bffa0 <machine_kexec+0x50>:        mov     w2, #0xf200
> 0xffff2000200bffa4 <machine_kexec+0x54>:        mov     w1, #0xf3f3f3f3
> 0xffff2000200bffa8 <machine_kexec+0x58>:        movk    w2, #0xf2f2, lsl #16
> 0xffff2000200bffac <machine_kexec+0x5c>:        stp     w2, w1, [x4,#4]
>
> We notice that:
> 1. machine_kexec() start address is 0xffff2000200bff50
> 2. the instruction at machine_kexec+0x44 store the same value
>      0xffff2000200bff50 (comes from 0xffff2000200bf000 + 0xf50)
>      into stack postion [x29,#128].
>
> When arm64_in_kdump_text() search LR from stack, it met
> 0xffff2000200bff50 firstly, so got wrong bt->bptr.
>
> We know that the real LR is always great than the start address
 
 Seems true.
 
 One question, do you see which kernel code stores that value?
  
Actually, there is no C code stores that value. The source code like this:
void machine_kexec(struct kimage *kimage)
{
	phys_addr_t reboot_code_buffer_phys;
	void *reboot_code_buffer;
	bool in_kexec_crash = (kimage == kexec_crash_image);
	bool stuck_cpus = cpus_are_stuck_in_kernel();
	BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
	WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
		"Some CPUs may be stale, kdump will be unreliable.\n");
...
The point is CONFIG_KASAN=y
I compared the gcc args when compiling machine_kexec.o between kasan 
eanble [1] and kasan enable but set KASAN_SANITIZE_machine_kexec.o := n 
[2], the difference is:
[1]: -fsanitize=kernel-address -fasan-shadow-offset=0xdfff200000000000 
--param asan-globals=1   --param 
asan-instrumentation-with-call-threshold=10000   --param asan-stack=1
[2]: -fno-builtin
If I remove `--param asan-stack=1` but keep other asan args to compile 
machine_kexec.o, those assembly statement disappear.
-- 
Thanks,
- Ding Hui
 Thanks,
 Kazu
 
 
> of a function, so let's fix it by change the search conditon to
> (*ptr > xxx_start) && (*ptr < xxx_end).
>
> Signed-off-by: Ding Hui <dinghui(a)sangfor.com.cn>
> ---
>    arm64.c | 18 +++++++++---------
>    1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/arm64.c b/arm64.c
> index c3e26a3..7e8a7db 100644
> --- a/arm64.c
> +++ b/arm64.c
> @@ -3479,7 +3479,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe
*frame)
>    	ms = machdep->machspec;
>    	for (ptr = start - 8; ptr >= base; ptr--) {
>    		if (bt->flags & BT_OPT_BACK_TRACE) {
> -			if ((*ptr >= ms->crash_kexec_start) &&
> +			if ((*ptr > ms->crash_kexec_start) &&
>    			    (*ptr < ms->crash_kexec_end) &&
>    			    INSTACK(*(ptr - 1), bt)) {
>    				bt->bptr = ((ulong)(ptr - 1) - (ulong)base)
> @@ -3488,7 +3488,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe
*frame)
>    					fprintf(fp, "%lx: %lx (crash_kexec)\n", bt->bptr, *ptr);
>    				return TRUE;
>    			}
> -			if ((*ptr >= ms->crash_save_cpu_start) &&
> +			if ((*ptr > ms->crash_save_cpu_start) &&
>    			    (*ptr < ms->crash_save_cpu_end) &&
>    			    INSTACK(*(ptr - 1), bt)) {
>    				bt->bptr = ((ulong)(ptr - 1) - (ulong)base)
> @@ -3498,14 +3498,14 @@ arm64_in_kdump_text(struct bt_info *bt, struct
arm64_stackframe *frame)
>    				return TRUE;
>    			}
>    		} else {
> -			if ((*ptr >= ms->machine_kexec_start) && (*ptr <
ms->machine_kexec_end)) {
> +			if ((*ptr > ms->machine_kexec_start) && (*ptr <
ms->machine_kexec_end)) {
>    				bt->bptr = ((ulong)ptr - (ulong)base)
>    					   + task_to_stackbase(bt->tc->task);
>    				if (CRASHDEBUG(1))
>    					fprintf(fp, "%lx: %lx (machine_kexec)\n", bt->bptr, *ptr);
>    				return TRUE;
>    			}
> -			if ((*ptr >= ms->crash_kexec_start) && (*ptr <
ms->crash_kexec_end)) {
> +			if ((*ptr > ms->crash_kexec_start) && (*ptr <
ms->crash_kexec_end)) {
>    				/*
>    				 *  Stash the first crash_kexec frame in case the machine_kexec
>    				 *  frame is not found.
> @@ -3519,7 +3519,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe
*frame)
>    				}
>    				continue;
>    			}
> -			if ((*ptr >= ms->crash_save_cpu_start) && (*ptr <
ms->crash_save_cpu_end)) {
> +			if ((*ptr > ms->crash_save_cpu_start) && (*ptr <
ms->crash_save_cpu_end)) {
>    				bt->bptr = ((ulong)ptr - (ulong)base)
>    					   + task_to_stackbase(bt->tc->task);
>    				if (CRASHDEBUG(1))
> @@ -3566,7 +3566,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
>    
>    	for (ptr = start - 8; ptr >= base; ptr--) {
>    		if (bt->flags & BT_OPT_BACK_TRACE) {
> -			if ((*ptr >= ms->crash_kexec_start) &&
> +			if ((*ptr > ms->crash_kexec_start) &&
>    			    (*ptr < ms->crash_kexec_end) &&
>    			    INSTACK(*(ptr - 1), bt)) {
>    				bt->bptr = ((ulong)(ptr - 1) - (ulong)base) + stackbase;
> @@ -3576,7 +3576,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
>    				FREEBUF(stackbuf);
>    				return TRUE;
>    			}
> -			if ((*ptr >= ms->crash_save_cpu_start) &&
> +			if ((*ptr > ms->crash_save_cpu_start) &&
>    			    (*ptr < ms->crash_save_cpu_end) &&
>    			    INSTACK(*(ptr - 1), bt)) {
>    				bt->bptr = ((ulong)(ptr - 1) - (ulong)base) + stackbase;
> @@ -3587,7 +3587,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
>    				return TRUE;
>    			}
>    		} else {
> -			if ((*ptr >= ms->crash_kexec_start) && (*ptr <
ms->crash_kexec_end)) {
> +			if ((*ptr > ms->crash_kexec_start) && (*ptr <
ms->crash_kexec_end)) {
>    				bt->bptr = ((ulong)ptr - (ulong)base) + stackbase;
>    				if (CRASHDEBUG(1))
>    					fprintf(fp, "%lx: %lx (crash_kexec on IRQ stack)\n",
> @@ -3595,7 +3595,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
>    				FREEBUF(stackbuf);
>    				return TRUE;
>    			}
> -			if ((*ptr >= ms->crash_save_cpu_start) && (*ptr <
ms->crash_save_cpu_end)) {
> +			if ((*ptr > ms->crash_save_cpu_start) && (*ptr <
ms->crash_save_cpu_end)) {
>    				bt->bptr = ((ulong)ptr - (ulong)base) + stackbase;
>    				if (CRASHDEBUG(1))
>    					fprintf(fp, "%lx: %lx (crash_save_cpu on IRQ stack)\n",