在 2021年04月15日 18:06, HAGIO KAZUHITO(萩尾 一仁) 写道:
Thanks for the comments.
-----Original Message-----
> 在 2021年04月06日 16:46, HAGIO KAZUHITO(萩尾 一仁) 写道:
>> Fix for 'bt' command and options on Linux 5.8-rc1 or later kernels
>> that contain merge commit 076f14be7fc942e112c94c841baec44124275cd0.
>> The merged patches changed the name of exception functions that
>> have been used by the crash utility to check the exception frame.
>> Without the patch, the command and options cannot display it.
>>
>> Before:
>> crash> bt
>> PID: 8752 TASK: ffff8f80cb244380 CPU: 2 COMMAND: "insmod"
>> #0 [ffffa3e40187f9f8] machine_kexec at ffffffffab25d267
>> #1 [ffffa3e40187fa48] __crash_kexec at ffffffffab38e2ed
>> #2 [ffffa3e40187fb10] crash_kexec at ffffffffab38f1dd
>> #3 [ffffa3e40187fb28] oops_end at ffffffffab222cbd
>> #4 [ffffa3e40187fb48] do_trap at ffffffffab21fea1
>> #5 [ffffa3e40187fb90] do_error_trap at ffffffffab21ff75
>> #6 [ffffa3e40187fbd0] exc_invalid_op at ffffffffabb76a2c
>> #7 [ffffa3e40187fbf0] asm_exc_invalid_op at ffffffffabc00a72
>> #8 [ffffa3e40187fc78] init_module at ffffffffc042b018 [invalid]
>> #9 [ffffa3e40187fca0] init_module at ffffffffc042b018 [invalid]
>> #10 [ffffa3e40187fca8] do_one_initcall at ffffffffab202806
>> #11 [ffffa3e40187fd18] do_init_module at ffffffffab3888ba
>> #12 [ffffa3e40187fd38] load_module at ffffffffab38afde
>>
>> After:
>> crash> bt
>> PID: 8752 TASK: ffff8f80cb244380 CPU: 2 COMMAND: "insmod"
>> #0 [ffffa3e40187f9f8] machine_kexec at ffffffffab25d267
>> #1 [ffffa3e40187fa48] __crash_kexec at ffffffffab38e2ed
>> #2 [ffffa3e40187fb10] crash_kexec at ffffffffab38f1dd
>> #3 [ffffa3e40187fb28] oops_end at ffffffffab222cbd
>> #4 [ffffa3e40187fb48] do_trap at ffffffffab21fea1
>> #5 [ffffa3e40187fb90] do_error_trap at ffffffffab21ff75
>> #6 [ffffa3e40187fbd0] exc_invalid_op at ffffffffabb76a2c
>> #7 [ffffa3e40187fbf0] asm_exc_invalid_op at ffffffffabc00a72
>> [exception RIP: init_module+24]
>> RIP: ffffffffc042b018 RSP: ffffa3e40187fca8 RFLAGS: 00010246
>> RAX: 000000000000001c RBX: 0000000000000000 RCX: 0000000000000000
>> RDX: 0000000000000000 RSI: ffff8f80fbd18000 RDI: ffff8f80fbd18000
>> RBP: ffffffffc042b000 R8: 000000000000029d R9: 000000000000002c
>> R10: 0000000000000000 R11: ffffa3e40187fb58 R12: ffffffffc042d018
>> R13: ffffa3e40187fdf0 R14: ffffffffc042d000 R15: ffffa3e40187fe90
>> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
>> #8 [ffffa3e40187fca0] init_module at ffffffffc042b018 [invalid]
>> #9 [ffffa3e40187fca8] do_one_initcall at ffffffffab202806
>> #10 [ffffa3e40187fd18] do_init_module at ffffffffab3888ba
>> #11 [ffffa3e40187fd38] load_module at ffffffffab38afde
>>
>> Signed-off-by: Kazuhito Hagio <k-hagio-ab(a)nec.com>
>> ---
>> defs.h | 1 +
>> x86_64.c | 43 ++++++++++++++++++++++++++++++++++++++++---
>> 2 files changed, 41 insertions(+), 3 deletions(-)
>>
>> diff --git a/defs.h b/defs.h
>> index c29b3fa..f9c711c 100644
>> --- a/defs.h
>> +++ b/defs.h
>> @@ -6026,6 +6026,7 @@ struct machine_specific {
>> ulong cpu_entry_area_start;
>> ulong cpu_entry_area_end;
>> ulong page_offset_force;
>> + char **exception_functions;
>> };
>>
>> #define KSYMS_START (0x1)
>> diff --git a/x86_64.c b/x86_64.c
>> index f5b2f7b..3f4a969 100644
>> --- a/x86_64.c
>> +++ b/x86_64.c
>> @@ -139,6 +139,9 @@ static void orc_dump(ulong);
>>
>> struct machine_specific x86_64_machine_specific = { 0 };
>>
>> +static const char *exception_functions_orig[];
>> +static const char *exception_functions_5_8[];
>> +
>> /*
>> * Do all necessary machine-specific setup here. This is called several
>> * times during initialization.
>> @@ -735,6 +738,12 @@ x86_64_init(int when)
>> STRUCT_SIZE_INIT(percpu_data, "percpu_data");
>>
>> GART_init();
>> +
>> + if (kernel_symbol_exists("asm_exc_divide_error"))
>
> This symbol may be also changed in the future, I'm not sure if it could be better
to use the kernel
> version. That can be consistent with the name of exception_functions_5_8. For
example:
>
> if (THIS_KERNEL_VERSION >= LINUX(5,8,0))
> ...
Hmm, but the symbol name in exception_function_5_8 has to be updated anyway
when it's changed, and the symbol search can provide "backport"
compatibility.
So I thought this way would be better.
Seems reasonable.
>
>
>> + machdep->machspec->exception_functions = (char
**)exception_functions_5_8;
>> + else
>> + machdep->machspec->exception_functions = (char
**)exception_functions_orig;
>> +
>> break;
>>
>> case POST_VM:
>> @@ -1104,6 +1113,12 @@ x86_64_dump_machdep_table(ulong arg)
>> fprintf(fp, "%016lx\n", (ulong)ms->cpu_entry_area_end);
>> else
>> fprintf(fp, "(unused)\n");
>> +
>> + fprintf(fp, " excpetion_functions: ");
>> + if (ms->exception_functions == (char **)exception_functions_5_8)
>> + fprintf(fp, "excpetion_functions_5_8\n");
>> + else
>> + fprintf(fp, "excpetion_functions_orig\n");
>> }
>>
>> /*
>> @@ -3086,7 +3101,7 @@ text_lock_function(char *name, struct bt_info *bt, ulong
locktext)
>> * zeroentry xen_debug do_debug
>> * zeroentry xen_int3 do_int3
>> */
>> -static const char *exception_functions[] = {
>> +static const char *exception_functions_orig[] = {
>> "invalid_TSS",
>> "segment_not_present",
>> "alignment_check",
>> @@ -3109,6 +3124,28 @@ static const char *exception_functions[] = {
>> NULL,
>> };
>>
>> +static const char *exception_functions_5_8[] = {
>> + "asm_exc_invalid_tss",
>> + "asm_exc_segment_not_present",
>> + "asm_exc_alignment_check",
>> + "asm_exc_general_protection",
>> + "asm_exc_page_fault",
>> + "asm_exc_divide_error",
>> + "asm_exc_overflow",
>> + "asm_exc_bounds",
>> + "asm_exc_invalid_op",
>> + "asm_exc_device_not_available",
>> + "asm_exc_coproc_segment_overrun",
>> + "asm_exc_spurious_interrupt_bug",
>> + "asm_exc_coprocessor_error",
>> + "asm_exc_simd_coprocessor_error",
>> + "xen_asm_exc_stack_segment",
>> + "xen_asm_exc_xen_hypervisor_callback",
>> + "xen_asm_exc_debug"
>
> For the asm_exc_debug, seems that it is not a specific xen exception any more? Can we
remove the prefix
> "xen_" as "asm_exc_debug"?
Thanks, will fix.
>>
> BTW: I didn't see the asm_exc_nmi and asm_exc_double_fault in the above exception
table. Are they missed
> or unnecessary?
My understanding is that for interrupts/exceptions like NMI and
double fault that have their specific exception stack, the crash
utility does not need their functions in the table to print the
exception frame:
OK, that answered my questions. Thanks for the explanation.
crash> bt
PID: 0 TASK: ffffffffbca14840 CPU: 0 COMMAND: "swapper/0"
#0 [fffffe000000bd20] machine_kexec at ffffffffbb064a1e
#1 [fffffe000000bd70] __crash_kexec at ffffffffbb18ebaa
#2 [fffffe000000be30] panic at ffffffffbb0dc5eb
#3 [fffffe000000beb0] unknown_nmi_error.cold at ffffffffbb02c6b1
#4 [fffffe000000bec8] exc_nmi at ffffffffbbb63e87
#5 [fffffe000000bef0] end_repeat_nmi at ffffffffbbc014db
[exception RIP: native_safe_halt+14]
RIP: ffffffffbbb7384e RSP: ffffffffbca03ea0 RFLAGS: 00000246
RAX: ffffffffbbb73480 RBX: 0000000000000000 RCX: 0000000000000001
RDX: 0000000000000000 RSI: 0000000000000083 RDI: 0000000000000000
RBP: 0000000000000000 R8: ffff9e1b7bc1d5e0 R9: 0000000000000201
R10: 00000e565d5115ef R11: 0000000001dafec8 R12: 0000000000000000
R13: 0000000000000000 R14: 00000000000000a5 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
--- <NMI exception stack> ---
#6 [ffffffffbca03ea0] native_safe_halt at ffffffffbbb7384e
Actually the old exception function table does not have the functions
for NMI and double fault.
Thanks,
Kazu
>
>
> Thanks.
> Lianbo
>
>> + "xen_asm_exc_int3"
>> + NULL,
>> +};
>> +
>> /*
>> * print one entry of a stack trace
>> */
>> @@ -3185,8 +3222,8 @@ x86_64_print_stack_entry(struct bt_info *bt, FILE *ofp, int
level,
>> if ((THIS_KERNEL_VERSION >= LINUX(2,6,29)) &&
>> (eframe_check == -1) && offset &&
>> !(bt->flags & (BT_EXCEPTION_FRAME|BT_START|BT_SCHEDULE))) {
>> - for (i = 0; exception_functions[i]; i++) {
>> - if (STREQ(name, exception_functions[i])) {
>> + for (i = 0; machdep->machspec->exception_functions[i]; i++) {
>> + if (STREQ(name, machdep->machspec->exception_functions[i])) {
>> eframe_check = 8;
>> break;
>> }
>>