On 2022/5/20 10:26 AM, HAGIO KAZUHITO(萩尾 一仁) wrote:
 Hi Qi,
 
 thanks for the update.
 
 On 2022/05/19 22:48, Qi Zheng wrote:
> When we use crash to troubleshoot softlockup and other problems,
> we often use the 'bt -a' command to print the stacks of running
> processes on all CPUs. But now some servers have hundreds of CPUs
> (such as AMD machines), which causes the 'bt -a' command to output
> a lot of process stacks. And many of these stacks are the stacks
> of the idle process, which are not needed by us.
>
> Therefore, in order to reduce this part of the interference information,
> this patch adds the -n option to the bt command. When we specify
> '-n idle' (meaning no idle), the stack of the idle process will be
> filtered out, thus speeding up our troubleshooting.
>
 
> And like option '-a' of bt, option '-n idle' works only for
dumpfiles
> captured by kdump.
 
 It's a bit different from '-a', will change to:
 
     And the option works only for crash dumps captured by kdump.
 
 because '-a' works for live dumps but '-n idle' doesn't work like
this: 
Got it.
 
 crash> bt -a -n idle
 PID: 0      TASK: ffffffff82212780  CPU: 0   COMMAND: "swapper/0"
       [exception RIP: native_safe_halt+2]
       RIP: ffffffff81820c92  RSP: ffffffff82203e90  RFLAGS: 00000246
       RAX: ffffffff818209c0  RBX: 0000000000000000  RCX: 0000000000000001
       RDX: 0000000000000001  RSI: 0000000000000087  RDI: 0000000000000000
       RBP: 0000000000000000   R8: 00007584cc726bcd   R9: 0000000000000000
       R10: 0000000000000000  R11: 0000000000000000  R12: 0000000000000000
       R13: 0000000000000000  R14: 000000007f46e168  R15: 000000007ff922a0
       CS: 0010  SS: 0018
    #0 [ffffffff82203e90] default_idle at ffffffff818209da
    #1 [ffffffff82203eb0] do_idle at ffffffff810de691
    #2 [ffffffff82203ef0] cpu_startup_entry at ffffffff810de8ff
    #3 [ffffffff82203f10] start_kernel at ffffffff827821eb
    #4 [ffffffff82203f50] secondary_startup_64 at ffffffff810000e7
 ...
 
 And with the following patch,
 
 Acked-by: Kazuhito Hagio <k-hagio-ab(a)nec.com> 
Thanks.
 
 --- a/help.c
 +++ b/help.c
 @@ -1909,7 +1909,7 @@ char *help_bt[] = {
    "bt",
    "backtrace",
    "[-a|-c cpu(s)|-g|-r|-t|-T|-l|-e|-E|-f|-F|-o|-O|-v|-p] [-R ref] [-s
[-x|d]]"
 -"\n     [-I ip] [-S sp] [pid | task]",
 +"\n     [-I ip] [-S sp] [-n idle] [pid | task]",
    "  Display a kernel stack backtrace.  If no arguments are given, the
stack",
    "  trace of the current context will be displayed.\n",
    "       -a  displays the stack traces of the active task on each CPU.",
 
 No need to repost, we can fix these when merging. 
Thank you very much! :)
 
 Thanks,
 Kazu
 
>
> The command output is as follows:
> crash> bt -a -n idle
> [...]
> PID: 0      TASK: ffff889ff93f5a00  CPU: 5   COMMAND: "swapper/5"
>
> PID: 0      TASK: ffff889ff93f0000  CPU: 6   COMMAND: "swapper/6"
>
> PID: 0      TASK: ffff889ff8c30000  CPU: 7   COMMAND: "swapper/7"
>
> PID: 0      TASK: ffff889ff8c34380  CPU: 8   COMMAND: "swapper/8"
>
> PID: 0      TASK: ffff889ff8c32d00  CPU: 9   COMMAND: "swapper/9"
>
> PID: 0      TASK: ffff889ff8c31680  CPU: 10  COMMAND: "swapper/10"
>
> PID: 0      TASK: ffff889ff8c35a00  CPU: 11  COMMAND: "swapper/11"
>
> PID: 0      TASK: ffff889ff8c3c380  CPU: 12  COMMAND: "swapper/12"
>
> PID: 150773  TASK: ffff889fe85a1680  CPU: 13  COMMAND: "bash"
>    #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407
>    #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d
>    #2 [ffffc9000d35bdf0] panic at ffffffff81081930
>    #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1
>    #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175
>    #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b
>    #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86
>    #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5
>    #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579
>    #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259
>       RIP: 00007fa7abcdc274  RSP: 00007fffa731f678  RFLAGS: 00000246
>       RAX: ffffffffffffffda  RBX: 0000000000000002  RCX: 00007fa7abcdc274
>       RDX: 0000000000000002  RSI: 0000563ca51ee6d0  RDI: 0000000000000001
>       RBP: 0000563ca51ee6d0   R8: 000000000000000a   R9: 00007fa7abd6be80
>       R10: 000000000000000a  R11: 0000000000000246  R12: 00007fa7abdad760
>       R13: 0000000000000002  R14: 00007fa7abda8760  R15: 0000000000000002
>       ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b
> [...]
>
> Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
> ---
>    defs.h   |  1 +
>    help.c   | 30 ++++++++++++++++++++++++++++++
>    kernel.c | 11 ++++++++++-
>    x86_64.c |  8 ++++++++
>    4 files changed, 49 insertions(+), 1 deletion(-)
>
> diff --git a/defs.h b/defs.h
> index a6735d0..96a7429 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -5830,6 +5830,7 @@ ulong cpu_map_addr(const char *type);
>    #define BT_SHOW_ALL_REGS  (0x2000000000000ULL)
>    #define BT_REGS_NOT_FOUND (0x4000000000000ULL)
>    #define BT_OVERFLOW_STACK (0x8000000000000ULL)
> +#define BT_SKIP_IDLE     (0x10000000000000ULL)
>    #define BT_SYMBOL_OFFSET   (BT_SYMBOLIC_ARGS)
>    
>    #define BT_REF_HEXVAL         (0x1)
> diff --git a/help.c b/help.c
> index 51a0fe3..6d8dc4f 100644
> --- a/help.c
> +++ b/help.c
> @@ -1915,6 +1915,8 @@ char *help_bt[] = {
>    "       -a  displays the stack traces of the active task on each CPU.",
>    "           (only applicable to crash dumps)",
>    "       -A  same as -a, but also displays vector registers (S390X
only).",
> +"  -n idle  filter the stack of idle tasks (x86_64).",
> +"           (only applicable to crash dumps)",
>    "       -p  display the stack trace of the panic task only.",
>    "           (only applicable to crash dumps)",
>    "   -c cpu  display the stack trace of the active task on one or more
CPUs,",
> @@ -2004,6 +2006,34 @@ char *help_bt[] = {
>    "       DS:  002b      ESI: bfffc8a0  ES:  002b      EDI: 00000000 ",
>    "       SS:  002b      ESP: bfffc82c  EBP: bfffd224 ",
>    "       CS:  0023      EIP: 400d032e  ERR: 0000008e  EFLAGS: 00000246 
",
> +" ",
> +"  Display the stack trace of the active task(s) when the kernel
panicked,",
> +"  and filter out the stack of the idle tasks:",
> +"    %s> bt -a -n idle",
> +"    [...]",
> +"    PID: 0      TASK: ffff889ff8c35a00  CPU: 11  COMMAND:
\"swapper/11\"",
> +" ",
> +"    PID: 0      TASK: ffff889ff8c3c380  CPU: 12  COMMAND:
\"swapper/12\"",
> +" ",
> +"    PID: 150773  TASK: ffff889fe85a1680  CPU: 13  COMMAND:
\"bash\"",
> +"    #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407",
> +"    #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d",
> +"    #2 [ffffc9000d35bdf0] panic at ffffffff81081930",
> +"    #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1",
> +"    #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175",
> +"    #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b",
> +"    #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86",
> +"    #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5",
> +"    #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579",
> +"    #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259",
> +"       RIP: 00007fa7abcdc274  RSP: 00007fffa731f678  RFLAGS: 00000246",
> +"       RAX: ffffffffffffffda  RBX: 0000000000000002  RCX:
00007fa7abcdc274",
> +"       RDX: 0000000000000002  RSI: 0000563ca51ee6d0  RDI:
0000000000000001",
> +"       RBP: 0000563ca51ee6d0   R8: 000000000000000a   R9:
00007fa7abd6be80",
> +"       R10: 000000000000000a  R11: 0000000000000246  R12:
00007fa7abdad760",
> +"       R13: 0000000000000002  R14: 00007fa7abda8760  R15:
0000000000000002",
> +"       ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b",
> +"    [...]",
>    "\n  Display the stack trace of the active task on CPU 0 and 1:\n",
>    "    %s> bt -c 0,1",
>    "    PID: 0      TASK: ffffffff81a8d020  CPU: 0   COMMAND:
\"swapper\"",
> diff --git a/kernel.c b/kernel.c
> index d0921cf..acfacaf 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -2503,7 +2503,7 @@ cmd_bt(void)
>    	if (kt->flags & USE_OPT_BT)
>    		bt->flags |= BT_OPT_BACK_TRACE;
>    
> -	while ((c = getopt(argcnt, args, "D:fFI:S:c:aAloreEgstTdxR:Ovp")) != EOF)
{
> +	while ((c = getopt(argcnt, args, "D:fFI:S:c:n:aAloreEgstTdxR:Ovp")) !=
EOF) {
>                    switch (c)
>    		{
>    		case 'f':
> @@ -2672,6 +2672,11 @@ cmd_bt(void)
>    			active++;
>    			break;
>    
> +		case 'n':
> +			if (machine_type("X86_64") && STREQ(optarg, "idle"))
> +				bt->flags |= BT_SKIP_IDLE;
> +			break;
> +
>    		case 'r':
>    			bt->flags |= BT_RAW;
>    			break;
> @@ -3092,6 +3097,10 @@ back_trace(struct bt_info *bt)
>    	} else
>                    machdep->get_stack_frame(bt, &eip, &esp);
>    
> +	/* skip idle task stack */
> +	if (bt->flags & BT_SKIP_IDLE)
> +		return;
> +
>    	if (bt->flags & BT_KSTACKP) {
>    		bt->stkptr = esp;
>    		return;
> diff --git a/x86_64.c b/x86_64.c
> index ecaefd2..cfafbcc 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -4918,6 +4918,9 @@ x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong
*spp)
>    	if (bt->flags & BT_DUMPFILE_SEARCH)
>    		return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
>    
> +	if (bt->flags & BT_SKIP_IDLE)
> +		bt->flags &= ~BT_SKIP_IDLE;
> +
>            if (pcp)
>                    *pcp = x86_64_get_pc(bt);
>            if (spp)
> @@ -4960,6 +4963,9 @@ x86_64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong
*rip, ulong *rsp)
>    	estack = -1;
>    	panic = FALSE;
>    
> +	if (bt_in->flags & BT_SKIP_IDLE)
> +		bt_in->flags &= ~BT_SKIP_IDLE;
> +
>    	panic_task = tt->panic_task == bt->task ? TRUE : FALSE;
>    
>    	if (panic_task && bt->machdep) {
> @@ -5098,6 +5104,8 @@ next_sysrq:
>                    if (!panic_task && STREQ(sym,
"crash_nmi_callback")) {
>                            *rip = *up;
>                            *rsp = bt->stackbase + ((char *)(up) -
bt->stackbuf);
> +			if ((bt->flags & BT_SKIP_IDLE) && is_idle_thread(bt->task))
> +				bt_in->flags |= BT_SKIP_IDLE;
>                            return;
>                    }
>     
-- 
Thanks,
Qi