Hi lianbo,
On Thu, Jun 19, 2025 at 2:37 PM lijiang <lijiang(a)redhat.com> wrote:
Thank you for working on this.
On Tue, Jun 3, 2025 at 1:15 PM <devel-request(a)lists.crash-utility.osci.io> wrote:
>
> Date: Tue, 3 Jun 2025 17:11:35 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v3 2/5] Call cmd_bt silently after
> "set pid"
> To: devel(a)lists.crash-utility.osci.io
> Cc: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Message-ID: <20250603051138.59896-3-ltao(a)redhat.com>
> Content-Type: text/plain; charset="US-ASCII"; x-default=true
>
> Cmd bt will list multi-stacks of one task. After we "set <pid>"
switch
> context to one task, we first need a bt call to detect the multi-stacks,
> however we don't want any console output from it, so a nullfp is used for
> output receive. The silent bt call is only triggered once as part of task
> context switch by cmd set.
>
> A array of user_regs pointers is reserved for each supported arch. If one
> extra stack found, a user_regs structure will be allocated for storing regs
> value of the stack.
>
> Co-developed-by: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Co-developed-by: Tao Liu <ltao(a)redhat.com>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> arm64.c | 4 ++++
> crash_target.c | 7 +++++++
> kernel.c | 43 +++++++++++++++++++++++++++++++++++++++++++
> ppc64.c | 4 ++++
> task.c | 4 ++--
> x86_64.c | 3 +++
> 6 files changed, 63 insertions(+), 2 deletions(-)
>
> diff --git a/arm64.c b/arm64.c
> index 1cdde5f..8291301 100644
> --- a/arm64.c
> +++ b/arm64.c
> @@ -126,6 +126,10 @@ struct user_regs_bitmap_struct {
> ulong bitmap[32];
> };
>
> +#define MAX_EXCEPTION_STACKS 7
> +ulong extra_stacks_idx = 0;
> +struct user_regs_bitmap_struct *extra_stacks_regs[MAX_EXCEPTION_STACKS] = {0};
> +
> static inline bool is_mte_kvaddr(ulong addr)
> {
> /* check for ARM64_MTE enabled */
> diff --git a/crash_target.c b/crash_target.c
> index 71998ef..ad1480c 100644
> --- a/crash_target.c
> +++ b/crash_target.c
> @@ -31,6 +31,9 @@ extern "C" int crash_get_current_task_reg (int regno,
const char *regname,
> extern "C" int gdb_change_thread_context (void);
> extern "C" int gdb_add_substack (int);
> extern "C" void crash_get_current_task_info(unsigned long *pid, char
**comm);
> +#if defined (X86_64) || defined (ARM64) || defined (PPC64)
> +extern "C" void silent_call_bt(void);
> +#endif
>
> /* The crash target. */
>
> @@ -164,6 +167,10 @@ gdb_change_thread_context (void)
> /* 3rd, refresh regcache for tid 0 */
> target_fetch_registers(get_thread_regcache(inferior_thread()), -1);
> reinit_frame_cache();
> +#if defined (X86_64) || defined (ARM64) || defined (PPC64)
> + /* 4th, invoke bt silently to refresh the additional stacks */
> + silent_call_bt();
> +#endif
> return TRUE;
> }
>
> diff --git a/kernel.c b/kernel.c
> index b8d3b79..b15c32c 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -12002,3 +12002,46 @@ int get_linux_banner_from_vmlinux(char *buf, size_t size)
>
> return TRUE;
> }
> +
> +#if defined(X86_64) || defined(ARM64) || defined(PPC64)
> +extern ulong extra_stacks_idx;
> +extern void *extra_stacks_regs[];
> +void silent_call_bt(void)
> +{
> + jmp_buf main_loop_env_save;
> + unsigned long long flags_save = pc->flags;
> + FILE *fp_save = fp;
> + FILE *error_fp_save = pc->error_fp;
> + /* Redirect all cmd_bt() outputs into null */
> + fp = pc->nullfp;
> + pc->error_fp = pc->nullfp;
> +
> + for (int i = 0; i < extra_stacks_idx; i++) {
> + /* Note: GETBUF/FREEBUF is not applicable for extra_stacks_regs,
> + because we are reserving extra_stacks_regs by cmd_bt()
> + for later use. But GETBUF/FREEBUF is designed for use only
> + within one cmd. See process_command_line() -> restore_sanity()
> + -> free_all_bufs(). So we use malloc/free instead. */
> + free(extra_stacks_regs[i]);
> + extra_stacks_regs[i] = NULL;
> + }
> + /* Prepare args used by cmd_bt() */
> + sprintf(pc->command_line, "bt\n");
> + argcnt = parse_line(pc->command_line, args);
> + optind = 1;
> + pc->flags |= RUNTIME;
Is this unnecessary? The flag 'RUNTIME' is set only once when initializing(see
the main_loop()), and will never be cleared.
Yes, the flag is needed.
The RUNTIME is a must for error(FATAL) to enter RESTART(), then get
captured by setjmp() in the following patch.
The original pc->flags |= RUNTIME is happend after task_init(). But in
task_init(), the set_context(,,TRUE) is used to switch context to
panic task, and silient_bt() will be called then. In other words, if
not set, silent_bt() is called without RUNTIME flag, so any FATAL
error cannot be captured.
Again: a warning observed
gcc -c -g -DX86_64 -DLZO -DGDB_16_2 kernel.c -I./gdb-16.2/bfd -I./gdb-16.2/include -Wall
-O2 -Wstrict-prototypes -Wmissing-prototypes -fstack-protector -Wformat-security
kernel.c:12009:6: warning: no previous prototype for ‘silent_call_bt’
[-Wmissing-prototypes]
12009 | void silent_call_bt(void)
| ^~~~~~~~~~~~~~
OK, will get it updated in v4.
Thanks,
Tao Liu
Thanks
Lianbo
> +
> + /* Catch error FATAL generated by cmd_bt() if any */
> + memcpy(&main_loop_env_save, &pc->main_loop_env, sizeof(jmp_buf));
> + if (setjmp(pc->main_loop_env)) {
> + goto out;
> + }
> + cmd_bt();
> +out:
> + /* Restore all */
> + memcpy(&pc->main_loop_env, &main_loop_env_save, sizeof(jmp_buf));
> + pc->flags = flags_save;
> + fp = fp_save;
> + pc->error_fp = error_fp_save;
> +}
> +#endif
> diff --git a/ppc64.c b/ppc64.c
> index 7ac12fe..532eb3f 100644
> --- a/ppc64.c
> +++ b/ppc64.c
> @@ -80,6 +80,10 @@ struct user_regs_bitmap_struct {
> ulong bitmap[32];
> };
>
> +#define MAX_EXCEPTION_STACKS 7
> +ulong extra_stacks_idx = 0;
> +struct user_regs_bitmap_struct *extra_stacks_regs[MAX_EXCEPTION_STACKS] = {0};
> +
> static int is_opal_context(ulong sp, ulong nip)
> {
> uint64_t opal_start, opal_end;
> diff --git a/task.c b/task.c
> index e07b479..ec04b55 100644
> --- a/task.c
> +++ b/task.c
> @@ -3062,7 +3062,7 @@ sort_context_array(void)
> curtask = CURRENT_TASK();
> qsort((void *)tt->context_array, (size_t)tt->running_tasks,
> sizeof(struct task_context), sort_by_pid);
> - set_context(curtask, NO_PID, TRUE);
> + set_context(curtask, NO_PID, FALSE);
>
> sort_context_by_task();
> }
> @@ -3109,7 +3109,7 @@ sort_context_array_by_last_run(void)
> curtask = CURRENT_TASK();
> qsort((void *)tt->context_array, (size_t)tt->running_tasks,
> sizeof(struct task_context), sort_by_last_run);
> - set_context(curtask, NO_PID, TRUE);
> + set_context(curtask, NO_PID, FALSE);
>
> sort_context_by_task();
> }
> diff --git a/x86_64.c b/x86_64.c
> index a46fb9d..ee23d8b 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -160,6 +160,9 @@ struct user_regs_bitmap_struct {
> ulong bitmap[32];
> };
>
> +ulong extra_stacks_idx = 0;
> +struct user_regs_bitmap_struct *extra_stacks_regs[MAX_EXCEPTION_STACKS] = {0};
> +
> /*
> * Do all necessary machine-specific setup here. This is called several
> * times during initialization.
> --
> 2.47.0
--
Crash-utility mailing list -- devel(a)lists.crash-utility.osci.io
To unsubscribe send an email to devel-leave(a)lists.crash-utility.osci.io
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines:
https://github.com/crash-utility/crash/wiki