Thank you for the comment, Kazu.

On Thu, Feb 2, 2023 at 5:16 PM HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab@nec.com> wrote:
Hi Lianbo,

thanks for the patch.

On 2023/02/02 16:01, Lianbo Jiang wrote:
> Currently, the "bt" command may print a bogus exception frame
> and the rest frame will be truncated on x86 64 when using the
> "virsh send-key <kvm guest> KEY_LEFTALT KEY_SYSRQ KEY_C" command
> to trigger a panic from the KVM host. For example:
>
>    crash> bt
>    PID: 0        TASK: ffff9e7a47e32f00  CPU: 3    COMMAND: "swapper/3"
>     #0 [ffffba7900118bb8] machine_kexec at ffffffff87e5c2c7
>     #1 [ffffba7900118c08] __crash_kexec at ffffffff87f9500d
>     #2 [ffffba7900118cd0] panic at ffffffff87edfff9
>     #3 [ffffba7900118d50] sysrq_handle_crash at ffffffff883ce2c1
>     #4 [ffffba7900118d58] __handle_sysrq.cold.15 at ffffffff883ceb56
>     #5 [ffffba7900118d88] sysrq_filter at ffffffff883ce9a2
>     #6 [ffffba7900118dc0] input_to_handler at ffffffff884fb0bf
>     #7 [ffffba7900118df8] input_pass_values at ffffffff884fc1b7
>     #8 [ffffba7900118e20] input_handle_event at ffffffff884fe278
>     #9 [ffffba7900118e50] input_event at ffffffff884fe74b
>     #10 [ffffba7900118e88] atkbd_interrupt at ffffffff88504e2f
>     #11 [ffffba7900118ee0] serio_interrupt at ffffffff884f7516
>     #12 [ffffba7900118f10] i8042_interrupt at ffffffff884f8b04
>     #13 [ffffba7900118f50] __handle_irq_event_percpu at ffffffff87f51430
>     #14 [ffffba7900118f90] handle_irq_event_percpu at ffffffff87f51590
>     #15 [ffffba7900118fb8] handle_irq_event at ffffffff87f51616
>     #16 [ffffba7900118fd8] handle_edge_irq at ffffffff87f559f2
>     #17 [ffffba7900118ff0] asm_call_on_stack at ffffffff88800fa2
>     --- <IRQ stack> ---
>     #18 [ffffba790008bda0] asm_call_on_stack at ffffffff88800fa2
>         RIP: ffffffffffffffff  RSP: 0000000000000124  RFLAGS: 00000003
>         RAX: 0000000000000000  RBX: 0000000000000001  RCX: 0000000000000000
>         RDX: ffffffff88800c1e  RSI: 0000000000000000  RDI: 0000000000000000
>         RBP: 0000000000000001   R8: 0000000000000000   R9: 0000000000000000
>         R10: 0000000000000000  R11: ffffffff88760555  R12: ffffba790008be08
>         R13: ffffffff87f18002  R14: ffff9e7a47e32f00  R15: ffff9e7bb6198e00
>         ORIG_RAX: 0000000000000000  CS: 0003  SS: 0000
>    bt: WARNING: possibly bogus exception frame
>    crash>

Could I have the bt result with the patch?

 
Sure. I tested it on upstream kernel with the patch:

crash> bt
PID: 0        TASK: ffff9e7a47e32f00  CPU: 3    COMMAND: "swapper/3"
 #0 [ffffba7900118bb8] machine_kexec at ffffffff87e5c2c7
 #1 [ffffba7900118c08] __crash_kexec at ffffffff87f9500d
 #2 [ffffba7900118cd0] panic at ffffffff87edfff9
 #3 [ffffba7900118d50] sysrq_handle_crash at ffffffff883ce2c1
 #4 [ffffba7900118d58] __handle_sysrq.cold.15 at ffffffff883ceb56
 #5 [ffffba7900118d88] sysrq_filter at ffffffff883ce9a2
 #6 [ffffba7900118dc0] input_to_handler at ffffffff884fb0bf
 #7 [ffffba7900118df8] input_pass_values at ffffffff884fc1b7
 #8 [ffffba7900118e20] input_handle_event at ffffffff884fe278
 #9 [ffffba7900118e50] input_event at ffffffff884fe74b
#10 [ffffba7900118e88] atkbd_interrupt at ffffffff88504e2f
#11 [ffffba7900118ee0] serio_interrupt at ffffffff884f7516
#12 [ffffba7900118f10] i8042_interrupt at ffffffff884f8b04
#13 [ffffba7900118f50] __handle_irq_event_percpu at ffffffff87f51430
#14 [ffffba7900118f90] handle_irq_event_percpu at ffffffff87f51590
#15 [ffffba7900118fb8] handle_irq_event at ffffffff87f51616
#16 [ffffba7900118fd8] handle_edge_irq at ffffffff87f559f2
#17 [ffffba7900118ff0] asm_call_on_stack at ffffffff88800fa2
--- <IRQ stack> ---
#18 [ffffba790008bd50] asm_call_on_stack at ffffffff88800fa2
    [exception RIP: common_interrupt+181]
    RIP: ffffffff88760555  RSP: 0000000000000000  RFLAGS: 00000000
    RAX: ffff9e7bb6198e00  RBX: ffff9e7bb6198e00  RCX: ffff9e7a47e32f00
    RDX: ffffffff87f18002  RSI: ffffba790008be08  RDI: 0000000000000001
    RBP: 0000000d25808fab   R8: ffffffff87f17d1c   R9: ffffffff87e29ec5
    R10: ffffffff87e63dcd  R11: ffff9e7a47e32f00  R12: ffffffff8808ea76
    R13: ffffffff8808e1d2  R14: 0000000000000046  R15: ffffffff87f762f1
    ORIG_RAX: 0000000000000001  CS: 0000  SS: 0000
#19 [ffffba790008be00] asm_common_interrupt at ffffffff88800c1e
#20 [ffffba790008be88] default_idle at ffffffff8876ffc5
#21 [ffffba790008bed8] do_idle at ffffffff87f1982d
#22 [ffffba790008bf20] cpu_startup_entry at ffffffff87f19a29
#23 [ffffba790008bf30] start_secondary at ffffffff87e5118f
#24 [ffffba790008bf50] secondary_startup_64 at ffffffff87e000e6
crash> 
 
I've still not learned the idtentry and etc. enough, but at least,
a wrong exception frame is still shown at my end.  This is a RHEL9.1
vmcore that reproduces the bogus exception frame error.

crash> bt
PID: 0        TASK: ffffffff8881a940  CPU: 0    COMMAND: "swapper/0"
  #0 [ffffa5ab40003bb0] machine_kexec at ffffffff86e6973d
  #1 [ffffa5ab40003c00] __crash_kexec at ffffffff86fbe29d
  #2 [ffffa5ab40003cc8] panic at ffffffff8788cb6e
...
#16 [ffffa5ab40003fc8] __common_interrupt at ffffffff86e2666e
#17 [ffffa5ab40003ff0] common_interrupt at ffffffff878d83ee
--- <IRQ stack> ---
#18 [ffffffff88803d80] common_interrupt at ffffffff878d83ee
     [exception RIP: unknown or invalid address]             <<<-- invalid
     RIP: 0000000000000000  RSP: 000000007d3187b0  RFLAGS: ffffffff87a00ce2
     RAX: ffffffff86e71564  RBX: 0000000000000c56  RCX: ffffffff878da876
     RDX: 0000000000000000  RSI: ffffffff878dab3e  RDI: ffffffff878d8384
     RBP: 0000000000000000   R8: 0000000000000000   R9: 000000134ab46fed
     R10: ffff9143bbc2afc0  R11: ffff9143b9f2d540  R12: 0000000000000000
     R13: ffffffff86f390ae  R14: ffff9143b9f2d540  R15: ffff9143b9f2d540
     ORIG_RAX: 0000000000000000  CS: 0000  SS: 7d3187a8
#19 [ffffffff88803e98] default_idle at ffffffff878e8570
#20 [ffffffff88803ec0] default_idle_call at ffffffff878e86d3
#21 [ffffffff88803ec8] cpuidle_idle_call at ffffffff86f4ac3d
#22 [ffffffff88803f00] do_idle at ffffffff86f4ad1b
...

crash> dis default_idle
0xffffffff878e8560 <default_idle>:      nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffff878e8565 <default_idle+5>:    xchg   %ax,%ax
0xffffffff878e8567 <default_idle+7>:    verw   0x51b2c2(%rip)        # 0xffffffff87e03830 <ds.0>
0xffffffff878e856e <default_idle+14>:   sti
0xffffffff878e856f <default_idle+15>:   hlt
0xffffffff878e8570 <default_idle+16>:   ret

I guess "0xffffffff878e8570 <default_idle+16>" should be the exception
RIP here..


The above address of RIP is zero. Maybe this is a specific RHEL9 vmcore issue, or it
is related to some kernel specific config? I will dig into the details later.

>
> Kernel commit fa5e5c409213 ("x86/entry: Use idtentry for interrupts")
> change causes the current issue, crash needs to adjust the value of
> irq eframe.
>
> Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
> ---
>   x86_64.c | 15 +++++++++++----
>   1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/x86_64.c b/x86_64.c
> index 7a5d6f050c89..6504d16fb17e 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -4647,10 +4647,15 @@ x86_64_exception_frame(ulong flags, ulong kvaddr, char *local,
>       r14 = ULONG(pt_regs_buf + ms->pto.r14);
>       r15 = ULONG(pt_regs_buf + ms->pto.r15);
>   
> -        verified = x86_64_eframe_verify(bt,
> -             kvaddr ? kvaddr : (local - bt->stackbuf) + bt->stackbase,
> -             cs, ss, rip, rsp, rflags);
> -
> +     /*
> +      * Do not verify when the flags is not set to EFRAME_VERIFY.
> +      */
> +     if (symbol_search("asm_common_interrupt") && !(flags & EFRAME_VERIFY))
> +             verified = TRUE;
> +     else
> +             verified = x86_64_eframe_verify(bt,
> +                     kvaddr ? kvaddr : (local - bt->stackbuf) + bt->stackbase,
> +                     cs, ss, rip, rsp, rflags);
>       /*
>        *  If it's print-if-verified request, don't print bogus eframes.
>        */
> @@ -6577,6 +6582,8 @@ x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp)
>               return stkref;
>   
>       irq_eframe = stkref - machdep->machspec->irq_eframe_link;
> +     if (symbol_search("asm_common_interrupt"))
> +             irq_eframe -= 80;

So, where does this 80 come from?

crash> dis asm_common_interrupt
0xffffffff88800c00 <asm_common_interrupt>:      data16 xchg %ax,%ax
0xffffffff88800c03 <asm_common_interrupt+3>:    call   0xffffffff88801130 <error_entry>
0xffffffff88800c08 <asm_common_interrupt+8>:    mov    %rsp,%rdi
0xffffffff88800c0b <asm_common_interrupt+11>:   mov    0x78(%rsp),%rsi
0xffffffff88800c10 <asm_common_interrupt+16>:   movq   $0xffffffffffffffff,0x78(%rsp)
0xffffffff88800c19 <asm_common_interrupt+25>:   call   0xffffffff887604a0 <common_interrupt>
0xffffffff88800c1e <asm_common_interrupt+30>:   jmp    0xffffffff88801210 <error_return>
crash>

The rsp offset is 0x78, because the irq_eframe_link is initialized to 40, so the irq eframe offset needs to
subtract 80( 0x78 = 120 = 40 + 80).

Thanks.
Lianbo


Thanks,
Kazu