Thanks for the comments.
-----Original Message-----
在 2021年04月06日 16:46, HAGIO KAZUHITO(萩尾 一仁) 写道:
> Fix for 'bt' command and options on Linux 5.8-rc1 or later kernels
> that contain merge commit 076f14be7fc942e112c94c841baec44124275cd0.
> The merged patches changed the name of exception functions that
> have been used by the crash utility to check the exception frame.
> Without the patch, the command and options cannot display it.
>
> Before:
> crash> bt
> PID: 8752 TASK: ffff8f80cb244380 CPU: 2 COMMAND: "insmod"
> #0 [ffffa3e40187f9f8] machine_kexec at ffffffffab25d267
> #1 [ffffa3e40187fa48] __crash_kexec at ffffffffab38e2ed
> #2 [ffffa3e40187fb10] crash_kexec at ffffffffab38f1dd
> #3 [ffffa3e40187fb28] oops_end at ffffffffab222cbd
> #4 [ffffa3e40187fb48] do_trap at ffffffffab21fea1
> #5 [ffffa3e40187fb90] do_error_trap at ffffffffab21ff75
> #6 [ffffa3e40187fbd0] exc_invalid_op at ffffffffabb76a2c
> #7 [ffffa3e40187fbf0] asm_exc_invalid_op at ffffffffabc00a72
> #8 [ffffa3e40187fc78] init_module at ffffffffc042b018 [invalid]
> #9 [ffffa3e40187fca0] init_module at ffffffffc042b018 [invalid]
> #10 [ffffa3e40187fca8] do_one_initcall at ffffffffab202806
> #11 [ffffa3e40187fd18] do_init_module at ffffffffab3888ba
> #12 [ffffa3e40187fd38] load_module at ffffffffab38afde
>
> After:
> crash> bt
> PID: 8752 TASK: ffff8f80cb244380 CPU: 2 COMMAND: "insmod"
> #0 [ffffa3e40187f9f8] machine_kexec at ffffffffab25d267
> #1 [ffffa3e40187fa48] __crash_kexec at ffffffffab38e2ed
> #2 [ffffa3e40187fb10] crash_kexec at ffffffffab38f1dd
> #3 [ffffa3e40187fb28] oops_end at ffffffffab222cbd
> #4 [ffffa3e40187fb48] do_trap at ffffffffab21fea1
> #5 [ffffa3e40187fb90] do_error_trap at ffffffffab21ff75
> #6 [ffffa3e40187fbd0] exc_invalid_op at ffffffffabb76a2c
> #7 [ffffa3e40187fbf0] asm_exc_invalid_op at ffffffffabc00a72
> [exception RIP: init_module+24]
> RIP: ffffffffc042b018 RSP: ffffa3e40187fca8 RFLAGS: 00010246
> RAX: 000000000000001c RBX: 0000000000000000 RCX: 0000000000000000
> RDX: 0000000000000000 RSI: ffff8f80fbd18000 RDI: ffff8f80fbd18000
> RBP: ffffffffc042b000 R8: 000000000000029d R9: 000000000000002c
> R10: 0000000000000000 R11: ffffa3e40187fb58 R12: ffffffffc042d018
> R13: ffffa3e40187fdf0 R14: ffffffffc042d000 R15: ffffa3e40187fe90
> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
> #8 [ffffa3e40187fca0] init_module at ffffffffc042b018 [invalid]
> #9 [ffffa3e40187fca8] do_one_initcall at ffffffffab202806
> #10 [ffffa3e40187fd18] do_init_module at ffffffffab3888ba
> #11 [ffffa3e40187fd38] load_module at ffffffffab38afde
>
> Signed-off-by: Kazuhito Hagio <k-hagio-ab(a)nec.com>
> ---
> defs.h | 1 +
> x86_64.c | 43 ++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 41 insertions(+), 3 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index c29b3fa..f9c711c 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -6026,6 +6026,7 @@ struct machine_specific {
> ulong cpu_entry_area_start;
> ulong cpu_entry_area_end;
> ulong page_offset_force;
> + char **exception_functions;
> };
>
> #define KSYMS_START (0x1)
> diff --git a/x86_64.c b/x86_64.c
> index f5b2f7b..3f4a969 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -139,6 +139,9 @@ static void orc_dump(ulong);
>
> struct machine_specific x86_64_machine_specific = { 0 };
>
> +static const char *exception_functions_orig[];
> +static const char *exception_functions_5_8[];
> +
> /*
> * Do all necessary machine-specific setup here. This is called several
> * times during initialization.
> @@ -735,6 +738,12 @@ x86_64_init(int when)
> STRUCT_SIZE_INIT(percpu_data, "percpu_data");
>
> GART_init();
> +
> + if (kernel_symbol_exists("asm_exc_divide_error"))
This symbol may be also changed in the future, I'm not sure if it could be better to
use the kernel
version. That can be consistent with the name of exception_functions_5_8. For example:
if (THIS_KERNEL_VERSION >= LINUX(5,8,0))
...
Hmm, but the symbol name in exception_function_5_8 has to be updated anyway
when it's changed, and the symbol search can provide "backport"
compatibility.
So I thought this way would be better.
> + machdep->machspec->exception_functions = (char
**)exception_functions_5_8;
> + else
> + machdep->machspec->exception_functions = (char
**)exception_functions_orig;
> +
> break;
>
> case POST_VM:
> @@ -1104,6 +1113,12 @@ x86_64_dump_machdep_table(ulong arg)
> fprintf(fp, "%016lx\n", (ulong)ms->cpu_entry_area_end);
> else
> fprintf(fp, "(unused)\n");
> +
> + fprintf(fp, " excpetion_functions: ");
> + if (ms->exception_functions == (char **)exception_functions_5_8)
> + fprintf(fp, "excpetion_functions_5_8\n");
> + else
> + fprintf(fp, "excpetion_functions_orig\n");
> }
>
> /*
> @@ -3086,7 +3101,7 @@ text_lock_function(char *name, struct bt_info *bt, ulong
locktext)
> * zeroentry xen_debug do_debug
> * zeroentry xen_int3 do_int3
> */
> -static const char *exception_functions[] = {
> +static const char *exception_functions_orig[] = {
> "invalid_TSS",
> "segment_not_present",
> "alignment_check",
> @@ -3109,6 +3124,28 @@ static const char *exception_functions[] = {
> NULL,
> };
>
> +static const char *exception_functions_5_8[] = {
> + "asm_exc_invalid_tss",
> + "asm_exc_segment_not_present",
> + "asm_exc_alignment_check",
> + "asm_exc_general_protection",
> + "asm_exc_page_fault",
> + "asm_exc_divide_error",
> + "asm_exc_overflow",
> + "asm_exc_bounds",
> + "asm_exc_invalid_op",
> + "asm_exc_device_not_available",
> + "asm_exc_coproc_segment_overrun",
> + "asm_exc_spurious_interrupt_bug",
> + "asm_exc_coprocessor_error",
> + "asm_exc_simd_coprocessor_error",
> + "xen_asm_exc_stack_segment",
> + "xen_asm_exc_xen_hypervisor_callback",
> + "xen_asm_exc_debug"
For the asm_exc_debug, seems that it is not a specific xen exception any more? Can we
remove the prefix
"xen_" as "asm_exc_debug"?
Thanks, will fix.
BTW: I didn't see the asm_exc_nmi and asm_exc_double_fault in the above exception
table. Are they missed
or unnecessary?
My understanding is that for interrupts/exceptions like NMI and
double fault that have their specific exception stack, the crash
utility does not need their functions in the table to print the
exception frame:
crash> bt
PID: 0 TASK: ffffffffbca14840 CPU: 0 COMMAND: "swapper/0"
#0 [fffffe000000bd20] machine_kexec at ffffffffbb064a1e
#1 [fffffe000000bd70] __crash_kexec at ffffffffbb18ebaa
#2 [fffffe000000be30] panic at ffffffffbb0dc5eb
#3 [fffffe000000beb0] unknown_nmi_error.cold at ffffffffbb02c6b1
#4 [fffffe000000bec8] exc_nmi at ffffffffbbb63e87
#5 [fffffe000000bef0] end_repeat_nmi at ffffffffbbc014db
[exception RIP: native_safe_halt+14]
RIP: ffffffffbbb7384e RSP: ffffffffbca03ea0 RFLAGS: 00000246
RAX: ffffffffbbb73480 RBX: 0000000000000000 RCX: 0000000000000001
RDX: 0000000000000000 RSI: 0000000000000083 RDI: 0000000000000000
RBP: 0000000000000000 R8: ffff9e1b7bc1d5e0 R9: 0000000000000201
R10: 00000e565d5115ef R11: 0000000001dafec8 R12: 0000000000000000
R13: 0000000000000000 R14: 00000000000000a5 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
--- <NMI exception stack> ---
#6 [ffffffffbca03ea0] native_safe_halt at ffffffffbbb7384e
Actually the old exception function table does not have the functions
for NMI and double fault.
Thanks,
Kazu
Thanks.
Lianbo
> + "xen_asm_exc_int3"
> + NULL,
> +};
> +
> /*
> * print one entry of a stack trace
> */
> @@ -3185,8 +3222,8 @@ x86_64_print_stack_entry(struct bt_info *bt, FILE *ofp, int
level,
> if ((THIS_KERNEL_VERSION >= LINUX(2,6,29)) &&
> (eframe_check == -1) && offset &&
> !(bt->flags & (BT_EXCEPTION_FRAME|BT_START|BT_SCHEDULE))) {
> - for (i = 0; exception_functions[i]; i++) {
> - if (STREQ(name, exception_functions[i])) {
> + for (i = 0; machdep->machspec->exception_functions[i]; i++) {
> + if (STREQ(name, machdep->machspec->exception_functions[i])) {
> eframe_check = 8;
> break;
> }
>