[PATCH] arm64: Fix broken/incomplete gdb backtrace and unify output format
by lipengfei28@xiaomi.com
From 4d832a33ebd80bd109cc5a47f98c6b35fdcbd956 Mon Sep 17 00:00:00 2001
From: lipengfei28 <lipengfei28(a)xiaomi.com>
Date: Fri, 23 Jan 2026 16:24:07 +0800
Subject: [PATCH] arm64: Fix broken/incomplete gdb backtrace and unify output
format
This patch fixes multiple issues with 'gdb bt' on ARM64, where the backtrace
would be interrupted, contain garbage threads, or display fragmented output.
1. Fix Out-of-Bounds Read in Exception Frame Handling:
In `arm64_print_exception_frame`, the code previously used `memcpy` to copy
`sizeof(struct arm64_pt_regs)` bytes from a `struct arm64_stackframe *` source.
Since `stackframe` is significantly smaller than `pt_regs`, this caused an
out-of-bounds read, populating the GDB thread registers with stack garbage
(often resulting in invalid addresses like -3/0xff...fd).
This is fixed by manually copying only the valid registers (PC, SP, FP, etc.)
and properly initializing the bitmap.
2. Bridge the Gap Between IRQ and Process Stacks:
Previously, GDB unwinding would stop at `call_on_irq_stack` because it could
not automatically unwind through the assembly trampoline back to the process
stack.
Modified `arm64_switch_stack` (and the overflow variant) to "peek" one frame
ahead (reading the saved FP/PC of the caller) before registering the new
GDB substack. This effectively bridges the discontinuity, allowing GDB to
show frames like `do_interrupt_handler` that were previously missing.
3. Unify and Format GDB Output:
Modified `gdb_interface.c` to:
- Strip "Thread <id>" headers to present a continuous backtrace similar to
the native `crash bt`.
- Renumber stack frames sequentially (e.g., #0 to #30) instead of resetting
at each stack switch.
- Add indentation/alignment for frames where GDB omits the address (e.g.,
inline functions) to improve readability.
4. Prevent Invalid Thread Creation:
Added checks to ensure a GDB substack is only created if the Program Counter
(PC) is non-zero, preventing the display of "corrupt" or empty threads.
Tested on: Android 6.x ARM64
Signed-off-by: lipengfei28 <lipengfei28(a)xiaomi.com>
---
arm64.c | 117 +++++++++++++++++++++++++++++++++++++------
gdb_interface.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 229 insertions(+), 18 deletions(-)
diff --git a/arm64.c b/arm64.c
index c125655..f842739 100644
--- a/arm64.c
+++ b/arm64.c
@@ -3026,6 +3026,9 @@ static char *arm64_exception_functions[] = {
"do_el0_irq_bp_hardening",
"do_sp_pc_abort",
"handle_bad_stack",
+ "el1h_64_sync",
+ "el1h_64_irq",
+ "el1h_64_error",
NULL
};
@@ -3123,6 +3126,11 @@ arm64_print_stackframe_entry(struct bt_info *bt, int level, struct arm64_stackfr
fprintf(ofp, "\n");
+ if (STREQ(name, "el1h_64_irq") || STREQ(name, "el1h_64_sync"))
+ if (arm64_is_kernel_exception_frame(bt, frame->sp)) {
+ arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
+ }
+
if (bt->flags & BT_LINE_NUMBERS) {
get_line_number(branch_pc, buf, FALSE);
if (strlen(buf))
@@ -3775,11 +3783,12 @@ arm64_back_trace_cmd(struct bt_info *bt)
REG_SEQ(arm64_pt_regs, pc));
SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap,
REG_SEQ(arm64_pt_regs, sp));
- if (!bt->machdep ||
+ if (extra_stacks_regs[extra_stacks_idx]->ur.pc &&
+ (!bt->machdep ||
(extra_stacks_regs[extra_stacks_idx]->ur.sp !=
((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp &&
extra_stacks_regs[extra_stacks_idx]->ur.pc !=
- ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc)) {
+ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc))) {
gdb_add_substack (extra_stacks_idx++);
}
}
@@ -3925,11 +3934,12 @@ arm64_back_trace_cmd_v2(struct bt_info *bt)
REG_SEQ(arm64_pt_regs, pc));
SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap,
REG_SEQ(arm64_pt_regs, sp));
- if (!bt->machdep ||
+ if (extra_stacks_regs[extra_stacks_idx]->ur.pc &&
+ (!bt->machdep ||
(extra_stacks_regs[extra_stacks_idx]->ur.sp !=
((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp &&
extra_stacks_regs[extra_stacks_idx]->ur.pc !=
- ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc)) {
+ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc))) {
gdb_add_substack (extra_stacks_idx++);
}
}
@@ -4263,8 +4273,40 @@ arm64_switch_stack(struct bt_info *bt, struct arm64_stackframe *frame, FILE *ofp
if (frame->fp == 0)
return USER_MODE;
- if (!(machdep->flags & UNW_4_14))
+ if (!(machdep->flags & UNW_4_14)) {
arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
+ } else {
+ if (!extra_stacks_regs[extra_stacks_idx]) {
+ extra_stacks_regs[extra_stacks_idx] = (struct user_regs_bitmap_struct *)
+ malloc(sizeof(struct user_regs_bitmap_struct));
+ }
+ memset(extra_stacks_regs[extra_stacks_idx], 0, sizeof(struct user_regs_bitmap_struct));
+ struct user_regs_bitmap_struct *ur_ptr = extra_stacks_regs[extra_stacks_idx];
+
+ ulong next_fp = GET_STACK_ULONG(frame->fp);
+ ulong next_pc = GET_STACK_ULONG(frame->fp + 8);
+ ulong next_sp = frame->fp + 16;
+
+ if (is_kernel_text(next_pc | ms->CONFIG_ARM64_KERNELPACMASK))
+ next_pc |= ms->CONFIG_ARM64_KERNELPACMASK;
+
+ ur_ptr->ur.pc = next_pc;
+ ur_ptr->ur.sp = next_sp;
+ ur_ptr->ur.regs[29] = next_fp;
+
+ SET_BIT(ur_ptr->bitmap, REG_SEQ(arm64_pt_regs, pc));
+ SET_BIT(ur_ptr->bitmap, REG_SEQ(arm64_pt_regs, sp));
+ SET_BIT(ur_ptr->bitmap, REG_SEQ(arm64_pt_regs, regs[0]) + 29);
+
+ if (ur_ptr->ur.pc &&
+ (!bt->machdep ||
+ (ur_ptr->ur.sp !=
+ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp &&
+ ur_ptr->ur.pc !=
+ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc))) {
+ gdb_add_substack (extra_stacks_idx++);
+ }
+ }
return KERNEL_MODE;
}
@@ -4300,8 +4342,40 @@ arm64_switch_stack_from_overflow(struct bt_info *bt, struct arm64_stackframe *fr
if (frame->fp == 0)
return USER_MODE;
- if (!(machdep->flags & UNW_4_14))
+ if (!(machdep->flags & UNW_4_14)) {
arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
+ } else {
+ if (!extra_stacks_regs[extra_stacks_idx]) {
+ extra_stacks_regs[extra_stacks_idx] = (struct user_regs_bitmap_struct *)
+ malloc(sizeof(struct user_regs_bitmap_struct));
+ }
+ memset(extra_stacks_regs[extra_stacks_idx], 0, sizeof(struct user_regs_bitmap_struct));
+ struct user_regs_bitmap_struct *ur_ptr = extra_stacks_regs[extra_stacks_idx];
+
+ ulong next_fp = GET_STACK_ULONG(frame->fp);
+ ulong next_pc = GET_STACK_ULONG(frame->fp + 8);
+ ulong next_sp = frame->fp + 16;
+
+ if (is_kernel_text(next_pc | ms->CONFIG_ARM64_KERNELPACMASK))
+ next_pc |= ms->CONFIG_ARM64_KERNELPACMASK;
+
+ ur_ptr->ur.pc = next_pc;
+ ur_ptr->ur.sp = next_sp;
+ ur_ptr->ur.regs[29] = next_fp;
+
+ SET_BIT(ur_ptr->bitmap, REG_SEQ(arm64_pt_regs, pc));
+ SET_BIT(ur_ptr->bitmap, REG_SEQ(arm64_pt_regs, sp));
+ SET_BIT(ur_ptr->bitmap, REG_SEQ(arm64_pt_regs, regs[0]) + 29);
+
+ if (ur_ptr->ur.pc &&
+ (!bt->machdep ||
+ (ur_ptr->ur.sp !=
+ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp &&
+ ur_ptr->ur.pc !=
+ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc))) {
+ gdb_add_substack (extra_stacks_idx++);
+ }
+ }
return KERNEL_MODE;
}
@@ -4556,16 +4630,27 @@ arm64_print_exception_frame(struct bt_info *bt, ulong pt_regs, int mode, FILE *o
}
memset(extra_stacks_regs[extra_stacks_idx], 0,
sizeof(struct user_regs_bitmap_struct));
- memcpy(&extra_stacks_regs[extra_stacks_idx]->ur, regs,
- sizeof(struct arm64_pt_regs));
- for (int i = 0; i < sizeof(struct arm64_pt_regs)/sizeof(long); i++)
- SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, i);
- if (!bt->machdep ||
- (extra_stacks_regs[extra_stacks_idx]->ur.sp !=
- ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp &&
- extra_stacks_regs[extra_stacks_idx]->ur.pc !=
- ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc)) {
- gdb_add_substack (extra_stacks_idx++);
+ {
+ struct user_regs_bitmap_struct *ur_ptr = extra_stacks_regs[extra_stacks_idx];
+ int i;
+
+ ur_ptr->ur.pc = regs->pc;
+ ur_ptr->ur.sp = regs->sp;
+ ur_ptr->ur.pstate = regs->pstate;
+ for (i = 0; i < 31; i++)
+ ur_ptr->ur.regs[i] = regs->regs[i];
+
+ for (i = 0; i < 34; i++)
+ SET_BIT(ur_ptr->bitmap, i);
+
+ if (ur_ptr->ur.pc &&
+ (!bt->machdep ||
+ (ur_ptr->ur.sp !=
+ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp &&
+ ur_ptr->ur.pc !=
+ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc))) {
+ gdb_add_substack (extra_stacks_idx++);
+ }
}
}
}
diff --git a/gdb_interface.c b/gdb_interface.c
index 9f76f85..d14144e 100644
--- a/gdb_interface.c
+++ b/gdb_interface.c
@@ -16,6 +16,7 @@
*/
#include "defs.h"
+#include <ctype.h>
#if !defined(GDB_10_2) && !defined(GDB_16_2)
static void exit_after_gdb_info(void);
@@ -779,22 +780,53 @@ strip_redirection(char *buf)
/*
* Command for passing strings directly to gdb.
*/
+static void format_and_print_gdb_bt(FILE *input_fp);
+
void
cmd_gdb(void)
{
char buf[BUFSIZE];
char **argv;
+ int i;
argv = STREQ(args[0], "gdb") ? &args[1] : &args[0];
if (*argv == NULL)
cmd_usage(pc->curcmd, SYNOPSIS);
+ if (STREQ(*argv, "bt")) {
+ FILE *tmp_fp;
+
+ if ((tmp_fp = tmpfile()) == NULL) {
+ error(FATAL, "cannot create temporary file for GDB output\n");
+ }
+
+ strcpy(buf, "thread apply all bt");
+ /* Append any arguments that were passed to 'bt' */
+ /* args[0] is "gdb", args[1] is "bt", so options start at index 2 */
+ for (i = 2; i < argcnt; i++) {
+ strcat(buf, " ");
+ strcat(buf, args[i]);
+ }
+
+ if (!gdb_pass_through(buf, tmp_fp, GNU_RETURN_ON_ERROR)) {
+ fclose(tmp_fp);
+ error(INFO, "gdb request failed: %s\n", buf);
+ return;
+ }
+
+ rewind(tmp_fp);
+ format_and_print_gdb_bt(tmp_fp);
+
+ fclose(tmp_fp);
+ return;
+ }
+
if (STREQ(*argv, "set") && argv[1]) {
/*
* Intercept set commands in case something has to be done
- * here or elsewhere.
- */
+ * here or elsewhere.
+ */
if (STREQ(argv[1], "gdb")) {
cmd_set();
return;
@@ -821,6 +853,100 @@ cmd_gdb(void)
}
}
+#define MAX_BT_LINES 1024
+#define MAX_THREAD_BLOCKS 64
+
+/*
+ * Helper function to parse 'thread apply all bt' output, reverse the
+ * thread blocks, and print the result.
+ */
+static void format_and_print_gdb_bt(FILE *input_fp)
+{
+ char *lines[MAX_BT_LINES];
+ int line_count = 0;
+ int thread_starts[MAX_THREAD_BLOCKS];
+ int thread_count = 0;
+ char line_buffer[BUFSIZE];
+ int i, j;
+ int global_frame_cnt = 0;
+
+ // Read all lines into memory
+ while (fgets(line_buffer, BUFSIZE, input_fp) && line_count < MAX_BT_LINES) {
+ if (strstr(line_buffer, "Thread ") == line_buffer) {
+ if (thread_count < MAX_THREAD_BLOCKS) {
+ thread_starts[thread_count++] = line_count;
+ }
+ }
+ lines[line_count] = strdup(line_buffer);
+ if (!lines[line_count]) {
+ error(FATAL, "strdup failed while reading gdb output\n");
+ }
+ line_count++;
+ }
+
+ if (thread_count == 0) { // If no threads, just print everything as is.
+ for (i = 0; i < line_count; i++) {
+ fputs(lines[i], fp);
+ }
+ } else {
+ // Iterate threads in reverse order to fix the display order
+ for (i = thread_count - 1; i >= 0; i--) {
+ int start_line = thread_starts[i];
+ int end_line = (i == thread_count - 1) ? line_count : thread_starts[i+1];
+
+ // Print thread frames, stripping header and renumbering
+ for (j = start_line; j < end_line; j++) {
+ char *line = lines[j];
+ // Skip "Thread " line
+ if (strncmp(line, "Thread ", 7) == 0) continue;
+ // Skip "Backtrace stopped" or similar noise if desired,
+ // but usually GDB prints "Backtrace stopped" at the very end.
+ // We'll keep it for info unless it's in the middle of our merge.
+ // Actually, duplicate frames or stops might appear.
+ // For now, simple renumbering.
+
+ char *ptr = line;
+ while (*ptr == ' ' || *ptr == '\t') ptr++;
+
+ if (*ptr == '#') {
+ // Frame line: #0 0x...
+ char *rest = ptr + 1;
+ while (isdigit(*rest)) rest++; // Skip old number
+
+ // Parse content to check for address
+ char *p = rest;
+ while (*p == ' ' || *p == '\t') p++;
+
+ int has_addr = (strncmp(p, "0x", 2) == 0);
+
+ // Print number
+ fprintf(fp, "#%-3d", global_frame_cnt++);
+
+ // Align if address is missing
+ if (!has_addr) {
+ // Align function name with frames that have addresses
+ // 64-bit: 0x... (18 chars) + " in " (4 chars) = 22 chars
+ // 32-bit: 0x... (10 chars) + " in " (4 chars) = 14 chars
+ int width = (machdep->bits == 64) ? 22 : 14;
+ for (int k = 0; k < width; k++) fputc(' ', fp);
+ }
+
+ fputs(rest, fp);
+ } else {
+ // Other lines (e.g. variable info, code)
+ fputs(line, fp);
+ }
+ }
+ }
+ }
+
+ // Cleanup
+ for (i = 0; i < line_count; i++) {
+ free(lines[i]);
+ }
+}
+
+
/*
* The gdb target_xfer_memory() has a hook installed to re-route
* all memory accesses back here; reads of 1 or 4 bytes come primarily
--
2.34.1
2 weeks, 6 days
[PATCH v3 0/4] xarray: add large folio support
by Huang Shijie
The linux kernel supports the large folio for page cache by default.
But the current CRASH does not support the large folio.
So we may meet the errors when we detected the large folio sometimes,
such as in the email:
https://www.spinics.net/linux/fedora/redhat-crash-utility/msg11238.html
------------------------------
files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 60
files: page_to_nid: invalid page: 60
------------------------------
The first 3 patches are used to add large folio support for CRASH.
The last patch is newly version of an old patch:
it add "files -n" command.
v2-->v3:
Rewrited the folio_order() in patch 2 to work with different
kernel versions.
v1-->v2:
1.) Rebase the kernel to later 7.0-rc1(merge window)
2.) Fixed a bug in the patch 3, the latest kernel supports folios
whose page order is bigger then 5:
"xarray: add large folio support"
Huang Shijie (4):
xarray: add a new parameter for do_xarray
add folio_order function
xarray: add large folio support
add "files -n" command for an inode
bpf.c | 8 +++---
defs.h | 14 +++++++++-
dev.c | 4 +--
diskdump.c | 10 +++++--
filesys.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++--------
help.c | 24 ++++++++++++++++-
ipcs.c | 4 +--
kernel.c | 4 +--
memory.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++--
symbols.c | 6 +++++
task.c | 4 +--
tools.c | 16 +++++++++---
12 files changed, 213 insertions(+), 32 deletions(-)
--
2.43.0
3 months, 1 week
Re: [PATCH] Fix dis -lr xxx+0x1 not showing target address instruction
by Lianbo Jiang
On 1/28/26 9:42 AM, devel-request(a)lists.crash-utility.osci.io wrote:
> Date: Mon, 15 Dec 2025 17:15:15 +0800
> From: neilfsun<loyou85(a)gmail.com>
> Subject: [Crash-utility] [PATCH] Fix dis -lr xxx+0x1 not showing
> target address instruction
> To:devel@lists.crash-utility.osci.io
> Cc: neilfsun<neilfsun(a)tencent.com>, Feng Sun<loyou85(a)gmail.com>
> Message-ID:<20251215091515.36428-1-neilfsun(a)tencent.com>
>
> When using "dis -lr xxx+0x1", it is not correctly shown, for example:
> crash> dis -lr rb_next+0x1
> /kernel/lib/rbtree.c: 445
> 0xffffffff8133ff60 <rb_next>: push %rbp
>
> However, dis -lr rb+next+0x4 is correctly shown,
> crash> dis -lr rb_next+0x4
> /kernel/lib/rbtree.c: 445
> 0xffffffff8133ff60 <rb_next>: push %rbp
> /kernel/lib/rbtree.c: 448
> 0xffffffff8133ff61 <rb_next+0x1>: mov (%rdi),%rdx
> /kernel/lib/rbtree.c: 445
> 0xffffffff8133ff64 <rb_next+0x4>: mov %rsp,%rbp
>
> The reverse mode only disassembled (target - function_start) bytes, which
> was insufficient to reach the target instruction.
>
> Signed-off-by: neilfsun<neilfsun(a)tencent.com>
> Signed-off-by: Feng Sun<loyou85(a)gmail.com>
> ---
> kernel.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel.c b/kernel.c
> index bb148d0..93ea7a6 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -2120,7 +2120,7 @@ cmd_dis(void)
>
> if (reverse)
> sprintf(buf5, "x/%ldi 0x%lx",
> - (target - req->addr) ? target - req->addr : 1,
> + req->addr2 - req->addr,
This will always ask gdb to disassemble a lot of asm instructions, can
you help try this one?
diff --git a/kernel.c b/kernel.c
index ccc4b3d..550f61a 100644
--- a/kernel.c
+++ b/kernel.c
@@ -2124,8 +2124,7 @@ cmd_dis(void)
open_tmpfile();
if (reverse)
- sprintf(buf5, "x/%ldi 0x%lx",
- (target - req->addr) ? target -
req->addr : 1,
+ sprintf(buf5, "x/%ldi 0x%lx", roundup(abs(target
- req->addr), 2),
req->addr);
else
sprintf(buf5, "x/%ldi 0x%lx",
Thanks
Lianbo
> req->addr);
> else
> sprintf(buf5, "x/%ldi 0x%lx",
> -- 2.50.1
3 months, 2 weeks
[PATCH] Fix external command output not redirected to pipe
by neilfsun
When executing external commands (cat, etc.) with pipes,
the command output was not properly redirected to the pipe.
crash> cat /tmp/testfile
line1
line2
line3
crash> cat /tmp/testfile | head -1
line1
line2
line3
crash>
Signed-off-by: neilfsun <neilfsun(a)tencent.com>
Signed-off-by: Feng Sun <loyou85(a)gmail.com>
---
main.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/main.c b/main.c
index 71bcc15..95d455a 100644
--- a/main.c
+++ b/main.c
@@ -1034,6 +1034,8 @@ is_external_command(void)
int i;
char *cmd;
char command[BUFSIZE];
+ FILE *pipe;
+ char buf[BUFSIZE];
cmd = args[0];
@@ -1057,8 +1059,19 @@ is_external_command(void)
else
strcat(command, args[i]);
}
- if (system(command) == -1)
- perror(command);
+
+ if (pc->redirect & REDIRECT_TO_PIPE) {
+ if ((pipe = popen(command, "r")) == NULL) {
+ error(INFO, "cannot execute: %s\n", command);
+ return TRUE;
+ }
+ while (fgets(buf, BUFSIZE, pipe))
+ fputs(buf, fp);
+ pclose(pipe);
+ } else {
+ if (system(command) == -1)
+ perror(command);
+ }
return TRUE;
}
--
2.50.1
3 months, 2 weeks
[PATCH] Fix for "help -r/-D" to display register values and notes[]
by Lianbo Jiang
Currently, when executing the "help -r" command on the aarch64
architecture, only the register values of crashing cpu are displayed,
but the register values of other cpus are not shown with the following
messages:
crash> help -r
CPU 0:
help: registers not collected for cpu 0
CPU 1:
X0: 4000000000000000 X1: ffffc8c40d453d30 X2: ffff37436ecd0000
X3: ffffc8c40d475340 X4: 00000000000968a4 X5: 4000000000000000
X6: 4000000000000000 X7: 7fffffffffffffff X8: 0000016f2a240000
X9: ffffc8c40b0c64fc X10: 0000000000001000 X11: 00000000000000c0
X12: 0000000000000000 X13: 0000000000000072 X14: 0000000000000072
X15: 0000aaab0f98a320 X16: 0000000000000000 X17: 0000000000000000
X18: 0000000000000000 X19: 0000000000000000 X20: 0000000000000000
X21: ffffc8c40d475340 X22: ffffc8c40d460b80 X23: 00000000000000c0
X24: 00000007bf68b1b0 X25: 00000007b9284000 X26: 00000007bf68b1a8
X27: 00000007bbc95a40 X28: 00000007bbc6805a X29: ffffc8c40d453d80
LR: ffffc8c40bebd5d8 SP: ffffc8c40d453d80 PC: ffffc8c40bebd5e8
PSTATE: 60400005 FPVALID: 00000000
CPU 2:
help: registers not collected for cpu 2
CPU 3:
help: registers not collected for cpu 3
crash> help -D
...
num_prstatus_notes: 4
notes[0]: 0
notes[1]: dafa380 (NT_PRSTATUS)
si.signo: 0 si.code: 0 si.errno: 0
cursig: 0 sigpend: 0 sighold: 0
pid: 0 ppid: 0 pgrp: 0 sid:0
utime: 0.000000 stime: 0.000000
cutime: 0.000000 cstime: 0.000000
X0: 4000000000000000 X1: ffffc8c40d453d30 X2: ffff37436ecd0000
X3: ffffc8c40d475340 X4: 00000000000968a4 X5: 4000000000000000
X6: 4000000000000000 X7: 7fffffffffffffff X8: 0000016f2a240000
X9: ffffc8c40b0c64fc X10: 0000000000001000 X11: 00000000000000c0
X12: 0000000000000000 X13: 0000000000000072 X14: 0000000000000072
X15: 0000aaab0f98a320 X16: 0000000000000000 X17: 0000000000000000
X18: 0000000000000000 X19: 0000000000000000 X20: 0000000000000000
X21: ffffc8c40d475340 X22: ffffc8c40d460b80 X23: 00000000000000c0
X24: 00000007bf68b1b0 X25: 00000007b9284000 X26: 00000007bf68b1a8
X27: 00000007bbc95a40 X28: 00000007bbc6805a X29: ffffc8c40d453d80
LR: ffffc8c40bebd5d8 SP: ffffc8c40d453d80 PC: ffffc8c40bebd5e8
PSTATE: 60400005 FPVALID: 00000000
notes[2]: 0
notes[3]: 0
...
To fix the current issue, let's map to present cpus instead of online
cpus.
Signed-off-by: Lianbo Jiang <lijiang(a)redhat.com>
---
diskdump.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/diskdump.c b/diskdump.c
index 0ff87822732a..6c81c52e58b6 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -181,12 +181,12 @@ map_cpus_to_prstatus_kdump_cmprs(void)
BZERO(dd->nt_prstatus_percpu, size);
/*
- * Re-populate the array with the notes mapping to online cpus
+ * Re-populate the array with the notes mapping to present cpus
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i) && machdep->is_cpu_prstatus_valid(i)) {
+ if (in_cpu_map(PRESENT_MAP, i) && machdep->is_cpu_prstatus_valid(i)) {
dd->nt_prstatus_percpu[i] = nt_ptr[j++];
dd->num_prstatus_notes =
MAX(dd->num_prstatus_notes, i+1);
--
2.51.1
3 months, 2 weeks
[PATCH v2] Reapply "vmcoreinfo: read vmcoreinfo using 'vmcoreinfo_data' when unavailable in elf note"
by Shivang Upadhyay
Commit 7636c13 ("vmcoreinfo: read vmcoreinfo using 'vmcoreinfo_data'
when unavailable in elf note") moved the vmcoreinfo reading to always
read from memory instead of relying on diskdump/netdump's local
handlers. This was later reverted to fix regression in X86_64 kslar
images.
Reintroduce the `vmcoreinfo_read_from_memory` as fallback to
diskdump/netdump vmcores.
Sometimes, vmcoreinfo_data is found within the primary format handler,
but the symbol is not availbale. Pairing this case with vmcoreinfo missing
from memory we can hit the following warning.
$ ~/crash-dev/crash.aft vmcore vmlinux.gz
...
GNU gdb (GDB) 16.2
Copyright (C) 2024 Free Software Foundation, Inc.
...
Type "apropos word" to search for commands related to "word"...
crash.aft: cannot read vmcoreinfo_data <<--------------
KERNEL: vmlinux.gz
DUMPFILE: vmcore [PARTIAL DUMP]
...
crash.aft>
To fix this, We will only fallback to `vmcoreinfo_read_from_memory`,
when the vmcoreinfo_data is not found within the format handler.
This reverts commit 72e2776caf1ca41dffcc8aba11c55c636565725b.
Cc: Aditya Gupta <adityag(a)linux.ibm.com>
Cc: Tao Liu <ltao(a)redhat.com>
Cc: Misbah Anjum N <misanjum(a)linux.ibm.com>
Signed-off-by: Shivang Upadhyay <shivangu(a)linux.ibm.com>
---
defs.h | 1 +
diskdump.c | 5 +++++
kernel.c | 17 ++++++++++++-----
netdump.c | 5 +++--
4 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/defs.h b/defs.h
index cfdfa08..a6f4372 100644
--- a/defs.h
+++ b/defs.h
@@ -6236,6 +6236,7 @@ void dump_kernel_table(int);
void dump_bt_info(struct bt_info *, char *where);
void dump_log(int);
void parse_kernel_version(char *);
+char *vmcoreinfo_read_from_memory(const char *);
#define LOG_LEVEL(v) ((v) & 0x07)
#define SHOW_LOG_LEVEL (0x1)
diff --git a/diskdump.c b/diskdump.c
index 0ff8782..fc26915 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -2389,6 +2389,7 @@ vmcoreinfo_read_string(const char *key)
off_t offset;
char keybuf[BUFSIZE];
const off_t failed = (off_t)-1;
+ bool info_found = false;
if (dd->header->header_version < 3)
return NULL;
@@ -2421,6 +2422,7 @@ vmcoreinfo_read_string(const char *key)
}
buf[size_vmcoreinfo] = '\n';
+ info_found = true;
if ((p1 = strstr(buf, keybuf))) {
p2 = p1 + strlen(keybuf);
@@ -2434,6 +2436,9 @@ err:
if (buf)
free(buf);
+ if (!info_found && value_string == NULL)
+ return vmcoreinfo_read_from_memory(key);
+
return value_string;
}
diff --git a/kernel.c b/kernel.c
index ccc4b3d..8781d6a 100644
--- a/kernel.c
+++ b/kernel.c
@@ -100,7 +100,6 @@ static ulong dump_audit_skb_queue(ulong);
static ulong __dump_audit(char *);
static void dump_audit(void);
static void dump_printk_safe_seq_buf(int);
-static char *vmcoreinfo_read_string(const char *);
static void check_vmcoreinfo(void);
static int is_pvops_xen(void);
static int get_linux_banner_from_vmlinux(char *, size_t);
@@ -12031,8 +12030,8 @@ dump_printk_safe_seq_buf(int msg_flags)
* Returns a string (that has to be freed by the caller) that contains the
* value for key or NULL if the key has not been found.
*/
-static char *
-vmcoreinfo_read_string(const char *key)
+char *
+vmcoreinfo_read_from_memory(const char *key)
{
char *buf, *value_string, *p1, *p2;
size_t value_length;
@@ -12042,6 +12041,14 @@ vmcoreinfo_read_string(const char *key)
buf = value_string = NULL;
+ if (!(pc->flags & GDB_INIT)) {
+ /*
+ * GDB interface hasn't been initialised yet, so can't
+ * access vmcoreinfo_data
+ */
+ return NULL;
+ }
+
switch (get_symbol_type("vmcoreinfo_data", NULL, NULL))
{
case TYPE_CODE_PTR:
@@ -12097,10 +12104,10 @@ check_vmcoreinfo(void)
switch (get_symbol_type("vmcoreinfo_data", NULL, NULL))
{
case TYPE_CODE_PTR:
- pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
break;
case TYPE_CODE_ARRAY:
- pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
break;
}
}
diff --git a/netdump.c b/netdump.c
index ba1c6c4..452ef72 100644
--- a/netdump.c
+++ b/netdump.c
@@ -1977,8 +1977,9 @@ vmcoreinfo_read_string(const char *key)
size_vmcoreinfo = 0;
}
- if (!vmcoreinfo)
- return NULL;
+ if (!vmcoreinfo) {
+ return vmcoreinfo_read_from_memory(key);
+ }
/* the '+ 1' is the equal sign */
for (i = 0; i < (int)(size_vmcoreinfo - key_length + 1); i++) {
--
2.52.0
3 months, 2 weeks
[PATCH] Fix file redirection not working for external commands in input files
by neilfsun
When executing external commands (echo, cat, etc.) from input files
with file redirection (>), the output was not properly redirected to
the file but instead printed to stdout.
crash> cat crashrc
echo hi > 1
cat 1
crash> <crashrc
crash> echo hi > 1
hi
crash> cat 1
crash>
When file redirection was used, the code fell through to system() which
outputs to stdout instead of using the global fp that was correctly set
by setup_redirect().
Signed-off-by: neilfsun <neilfsun(a)tencent.com>
Signed-off-by: Feng Sun <loyou85(a)gmail.com>
---
main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/main.c b/main.c
index 95d455a..2427768 100644
--- a/main.c
+++ b/main.c
@@ -1060,7 +1060,7 @@ is_external_command(void)
strcat(command, args[i]);
}
- if (pc->redirect & REDIRECT_TO_PIPE) {
+ if (pc->redirect & (REDIRECT_TO_PIPE|REDIRECT_TO_FILE)) {
if ((pipe = popen(command, "r")) == NULL) {
error(INFO, "cannot execute: %s\n", command);
return TRUE;
--
2.52.0
3 months, 2 weeks
[PATCH] Fix pipe parsing to correctly handle quotes
by neilfsun
When parsing command lines with pipes, the previous logic incorrectly
handled nested quote types. The string detection used a simple
boolean toggle that would flip state for ANY quote character (single or
double), causing pipes inside quoted strings to be incorrectly recognized
as command separators. Without the fix, the following commands will stuck:
crash> help -t | awk '/panic_processor/{print "p cpu_info:"$2" | grep phys_proc_id"}'
Signed-off-by: neilfsun <neilfsun(a)tencent.com>
Signed-off-by: Feng Sun <loyou85(a)gmail.com>
---
cmdline.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/cmdline.c b/cmdline.c
index e9fe92a..43af77c 100644
--- a/cmdline.c
+++ b/cmdline.c
@@ -486,7 +486,7 @@ setup_redirect(int origin)
char *p, which;
int append;
int expression;
- int string;
+ char string;
int ret ATTRIBUTE_UNUSED;
FILE *pipe;
FILE *ofile;
@@ -506,7 +506,7 @@ setup_redirect(int origin)
pc->redirect |= REDIRECT_SHELL_COMMAND;
expression = 0;
- string = FALSE;
+ string = 0;
while (*p) {
if (*p == '(')
@@ -514,10 +514,12 @@ setup_redirect(int origin)
if (*p == ')')
expression--;
- if ((*p == '"') || (*p == '\''))
- string = !string;
+ if (*p == '\'' && string != '"')
+ string = (string == '\'') ? 0 : '\'';
+ else if (*p == '"' && string != '\'')
+ string = (string == '"') ? 0 : '"';
- if (!(expression || string) &&
+ if (!(expression || string) &&
((*p == '|') || (*p == '!'))) {
which = *p;
*p = NULLCHAR;
@@ -674,16 +676,18 @@ int
multiple_pipes(char **input)
{
char *p, *found;
- int quote;
+ char quote;
found = NULL;
- quote = FALSE;
+ quote = 0;
for (p = *input; *p; p++) {
- if ((*p == '\'') || (*p == '"')) {
- quote = !quote;
- continue;
- } else if (quote)
+ if (*p == '\'' && quote != '"')
+ quote = (quote == '\'') ? 0 : '\'';
+ else if (*p == '"' && quote != '\'')
+ quote = (quote == '"') ? 0 : '"';
+
+ if (quote)
continue;
if (*p == '|') {
--
2.52.0
3 months, 2 weeks
[PATCH v2] memory: Handle crash failure in linux-next caused by struct kmem_cache changes
by Mikhail Zaslonko
Since kernel commit cf338283652f ("slab: remove struct kmem_cache_cpu")
in linux-next there is no more 'cpu_slab' member in struct kmem_cache.
Pick 'cpu_sheaves' as an alternative kmem_cache member for kernel-next to
identify the SLUB case.
Without the patch, crash fails to start on kernel-next with the error
message:
crash: invalid structure member offset: kmem_cache_s_num
FILE: memory.c LINE: 9988 FUNCTION: kmem_cache_init()
This fix:
- allows the crash to start with no errors for kernel-next.
- make the 'kmap -s' command working again for kernel-next.
- throws an error message on 'kmem -S' command if no 'cpu_slab' member
presents in struct kmem_cache:
kmem: -S not supported for this kernel
Signed-off-by: Mikhail Zaslonko <zaslonko(a)linux.ibm.com>
---
memory.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/memory.c b/memory.c
index cbc8d2f..17423a5 100644
--- a/memory.c
+++ b/memory.c
@@ -672,6 +672,7 @@ vm_init(void)
} else if (!VALID_STRUCT(kmem_slab_s) &&
!VALID_STRUCT(slab_s) &&
!MEMBER_EXISTS("kmem_cache", "cpu_slab") &&
+ !MEMBER_EXISTS("kmem_cache", "cpu_sheaves") &&
(VALID_STRUCT(slab) || (vt->flags & SLAB_OVERLOAD_PAGE))) {
vt->flags |= PERCPU_KMALLOC_V2;
@@ -816,7 +817,7 @@ vm_init(void)
if (INVALID_MEMBER(page_first_page))
ANON_MEMBER_OFFSET_INIT(page_first_page, "page", "first_page");
- } else if (MEMBER_EXISTS("kmem_cache", "cpu_slab") &&
+ } else if ((MEMBER_EXISTS("kmem_cache", "cpu_slab") || MEMBER_EXISTS("kmem_cache", "cpu_sheaves")) &&
STRUCT_EXISTS("kmem_cache_node")) {
vt->flags |= KMALLOC_SLUB;
@@ -5270,6 +5271,9 @@ cmd_kmem(void)
if (sflag || Sflag || rflag || !(vt->flags & KMEM_CACHE_INIT))
kmem_cache_init();
+ if (Sflag && !MEMBER_EXISTS("kmem_cache", "cpu_slab"))
+ error(FATAL, "-S not supported for this kernel\n");
+
while (args[optind]) {
if (hexadecimal(args[optind], 0)) {
value[spec_addr++] =
@@ -20401,7 +20405,6 @@ get_cpu_slab_ptr(struct meminfo *si, int cpu, ulong *cpu_freelist)
default:
cpu_slab_ptr = 0;
- error(FATAL, "cannot determine location of kmem_cache.cpu_slab page\n");
}
return cpu_slab_ptr;
--
2.52.0
3 months, 2 weeks
[PATCH 0/4] xarray: add large folio support
by Huang Shijie
The linux kernel supports the large folio for page cache by default.
But the current CRASH does not support the large folio.
So we may meet the errors when we detected the large folio sometimes,
such as in the email:
https://www.spinics.net/linux/fedora/redhat-crash-utility/msg11238.html
------------------------------
files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 60
files: page_to_nid: invalid page: 60
------------------------------
The first 3 patches are used to add large folio support for CRASH.
The last patch is newly version of an old patch:
it add "files -n" command.
Huang Shijie (4):
xarray: add a new parameter for do_xarray
add folio_order function
xarray: add large folio support
add "files -n" command for an inode
bpf.c | 8 +++---
defs.h | 10 ++++++-
dev.c | 4 +--
diskdump.c | 10 +++++--
filesys.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++--------
help.c | 24 ++++++++++++++++-
ipcs.c | 4 +--
kernel.c | 4 +--
memory.c | 26 ++++++++++++++++--
symbols.c | 2 ++
task.c | 4 +--
tools.c | 10 ++++---
12 files changed, 151 insertions(+), 32 deletions(-)
--
2.43.0
3 months, 2 weeks