Hi Qi,
thanks for the update.
On 2022/05/19 22:48, Qi Zheng wrote:
When we use crash to troubleshoot softlockup and other problems,
we often use the 'bt -a' command to print the stacks of running
processes on all CPUs. But now some servers have hundreds of CPUs
(such as AMD machines), which causes the 'bt -a' command to output
a lot of process stacks. And many of these stacks are the stacks
of the idle process, which are not needed by us.
Therefore, in order to reduce this part of the interference information,
this patch adds the -n option to the bt command. When we specify
'-n idle' (meaning no idle), the stack of the idle process will be
filtered out, thus speeding up our troubleshooting.
And like option '-a' of bt, option '-n idle' works
only for dumpfiles
captured by kdump.
It's a bit different from '-a', will change to:
And the option works only for crash dumps captured by kdump.
because '-a' works for live dumps but '-n idle' doesn't work like
this:
crash> bt -a -n idle
PID: 0 TASK: ffffffff82212780 CPU: 0 COMMAND: "swapper/0"
[exception RIP: native_safe_halt+2]
RIP: ffffffff81820c92 RSP: ffffffff82203e90 RFLAGS: 00000246
RAX: ffffffff818209c0 RBX: 0000000000000000 RCX: 0000000000000001
RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000000
RBP: 0000000000000000 R8: 00007584cc726bcd R9: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 000000007f46e168 R15: 000000007ff922a0
CS: 0010 SS: 0018
#0 [ffffffff82203e90] default_idle at ffffffff818209da
#1 [ffffffff82203eb0] do_idle at ffffffff810de691
#2 [ffffffff82203ef0] cpu_startup_entry at ffffffff810de8ff
#3 [ffffffff82203f10] start_kernel at ffffffff827821eb
#4 [ffffffff82203f50] secondary_startup_64 at ffffffff810000e7
...
And with the following patch,
Acked-by: Kazuhito Hagio <k-hagio-ab(a)nec.com>
--- a/help.c
+++ b/help.c
@@ -1909,7 +1909,7 @@ char *help_bt[] = {
"bt",
"backtrace",
"[-a|-c cpu(s)|-g|-r|-t|-T|-l|-e|-E|-f|-F|-o|-O|-v|-p] [-R ref] [-s [-x|d]]"
-"\n [-I ip] [-S sp] [pid | task]",
+"\n [-I ip] [-S sp] [-n idle] [pid | task]",
" Display a kernel stack backtrace. If no arguments are given, the stack",
" trace of the current context will be displayed.\n",
" -a displays the stack traces of the active task on each CPU.",
No need to repost, we can fix these when merging.
Thanks,
Kazu
>
> The command output is as follows:
> crash> bt -a -n idle
> [...]
> PID: 0 TASK: ffff889ff93f5a00 CPU: 5 COMMAND: "swapper/5"
>
> PID: 0 TASK: ffff889ff93f0000 CPU: 6 COMMAND: "swapper/6"
>
> PID: 0 TASK: ffff889ff8c30000 CPU: 7 COMMAND: "swapper/7"
>
> PID: 0 TASK: ffff889ff8c34380 CPU: 8 COMMAND: "swapper/8"
>
> PID: 0 TASK: ffff889ff8c32d00 CPU: 9 COMMAND: "swapper/9"
>
> PID: 0 TASK: ffff889ff8c31680 CPU: 10 COMMAND: "swapper/10"
>
> PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: "swapper/11"
>
> PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: "swapper/12"
>
> PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: "bash"
> #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407
> #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d
> #2 [ffffc9000d35bdf0] panic at ffffffff81081930
> #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1
> #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175
> #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b
> #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86
> #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5
> #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579
> #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259
> RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246
> RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274
> RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001
> RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80
> R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760
> R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002
> ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b
> [...]
>
> Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
> ---
> defs.h | 1 +
> help.c | 30 ++++++++++++++++++++++++++++++
> kernel.c | 11 ++++++++++-
> x86_64.c | 8 ++++++++
> 4 files changed, 49 insertions(+), 1 deletion(-)
>
> diff --git a/defs.h b/defs.h
> index a6735d0..96a7429 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -5830,6 +5830,7 @@ ulong cpu_map_addr(const char *type);
> #define BT_SHOW_ALL_REGS (0x2000000000000ULL)
> #define BT_REGS_NOT_FOUND (0x4000000000000ULL)
> #define BT_OVERFLOW_STACK (0x8000000000000ULL)
> +#define BT_SKIP_IDLE (0x10000000000000ULL)
> #define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
>
> #define BT_REF_HEXVAL (0x1)
> diff --git a/help.c b/help.c
> index 51a0fe3..6d8dc4f 100644
> --- a/help.c
> +++ b/help.c
> @@ -1915,6 +1915,8 @@ char *help_bt[] = {
> " -a displays the stack traces of the active task on each CPU.",
> " (only applicable to crash dumps)",
> " -A same as -a, but also displays vector registers (S390X
only).",
> +" -n idle filter the stack of idle tasks (x86_64).",
> +" (only applicable to crash dumps)",
> " -p display the stack trace of the panic task only.",
> " (only applicable to crash dumps)",
> " -c cpu display the stack trace of the active task on one or more
CPUs,",
> @@ -2004,6 +2006,34 @@ char *help_bt[] = {
> " DS: 002b ESI: bfffc8a0 ES: 002b EDI: 00000000 ",
> " SS: 002b ESP: bfffc82c EBP: bfffd224 ",
> " CS: 0023 EIP: 400d032e ERR: 0000008e EFLAGS: 00000246
",
> +" ",
> +" Display the stack trace of the active task(s) when the kernel
panicked,",
> +" and filter out the stack of the idle tasks:",
> +" %s> bt -a -n idle",
> +" [...]",
> +" PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND:
\"swapper/11\"",
> +" ",
> +" PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND:
\"swapper/12\"",
> +" ",
> +" PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND:
\"bash\"",
> +" #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407",
> +" #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d",
> +" #2 [ffffc9000d35bdf0] panic at ffffffff81081930",
> +" #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1",
> +" #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175",
> +" #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b",
> +" #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86",
> +" #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5",
> +" #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579",
> +" #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259",
> +" RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246",
> +" RAX: ffffffffffffffda RBX: 0000000000000002 RCX:
00007fa7abcdc274",
> +" RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI:
0000000000000001",
> +" RBP: 0000563ca51ee6d0 R8: 000000000000000a R9:
00007fa7abd6be80",
> +" R10: 000000000000000a R11: 0000000000000246 R12:
00007fa7abdad760",
> +" R13: 0000000000000002 R14: 00007fa7abda8760 R15:
0000000000000002",
> +" ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b",
> +" [...]",
> "\n Display the stack trace of the active task on CPU 0 and 1:\n",
> " %s> bt -c 0,1",
> " PID: 0 TASK: ffffffff81a8d020 CPU: 0 COMMAND:
\"swapper\"",
> diff --git a/kernel.c b/kernel.c
> index d0921cf..acfacaf 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -2503,7 +2503,7 @@ cmd_bt(void)
> if (kt->flags & USE_OPT_BT)
> bt->flags |= BT_OPT_BACK_TRACE;
>
> - while ((c = getopt(argcnt, args, "D:fFI:S:c:aAloreEgstTdxR:Ovp")) != EOF)
{
> + while ((c = getopt(argcnt, args, "D:fFI:S:c:n:aAloreEgstTdxR:Ovp")) !=
EOF) {
> switch (c)
> {
> case 'f':
> @@ -2672,6 +2672,11 @@ cmd_bt(void)
> active++;
> break;
>
> + case 'n':
> + if (machine_type("X86_64") && STREQ(optarg,
"idle"))
> + bt->flags |= BT_SKIP_IDLE;
> + break;
> +
> case 'r':
> bt->flags |= BT_RAW;
> break;
> @@ -3092,6 +3097,10 @@ back_trace(struct bt_info *bt)
> } else
> machdep->get_stack_frame(bt, &eip, &esp);
>
> + /* skip idle task stack */
> + if (bt->flags & BT_SKIP_IDLE)
> + return;
> +
> if (bt->flags & BT_KSTACKP) {
> bt->stkptr = esp;
> return;
> diff --git a/x86_64.c b/x86_64.c
> index ecaefd2..cfafbcc 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -4918,6 +4918,9 @@ x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong
*spp)
> if (bt->flags & BT_DUMPFILE_SEARCH)
> return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
>
> + if (bt->flags & BT_SKIP_IDLE)
> + bt->flags &= ~BT_SKIP_IDLE;
> +
> if (pcp)
> *pcp = x86_64_get_pc(bt);
> if (spp)
> @@ -4960,6 +4963,9 @@ x86_64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong
*rip, ulong *rsp)
> estack = -1;
> panic = FALSE;
>
> + if (bt_in->flags & BT_SKIP_IDLE)
> + bt_in->flags &= ~BT_SKIP_IDLE;
> +
> panic_task = tt->panic_task == bt->task ? TRUE : FALSE;
>
> if (panic_task && bt->machdep) {
> @@ -5098,6 +5104,8 @@ next_sysrq:
> if (!panic_task && STREQ(sym,
"crash_nmi_callback")) {
> *rip = *up;
> *rsp = bt->stackbase + ((char *)(up) -
bt->stackbuf);
> + if ((bt->flags & BT_SKIP_IDLE) && is_idle_thread(bt->task))
> + bt_in->flags |= BT_SKIP_IDLE;
> return;
> }
>