Hi Guanyou,
On Sat, Nov 2, 2024 at 1:35 AM Guanyou Chen <chenguanyou9338(a)gmail.com> wrote:
Hi Lianbo, Tao
Remove offline status check, We can query the registers of
each CPU at any time and obtain their stack.
CPU 0: [OFFLINE]
X0: 0000000000000000 X1: 0000000000000000 X2: 0000000000000000
X3: 000000000003fcbc X4: 0000000000000001 X5: 0000000000000000
X6: 0000000000000000 X7: 0000000000000000 X8: 00000000ffffffff
X9: ffffffc009e6ae48 X10: ffffffc009e6ae20 X11: 0000000000000000
X12: 0000000000000002 X13: 0000000000000004 X14: 0000000000000000
X15: 0000000000004000 X16: 00000000f90f05f6 X17: 00000000f90f05f6
X18: 0000000000000000 X19: 0000000000000002 X20: ffffffc009e3b008
X21: ffffffc00a01d020 X22: ffffffc009f798f0 X23: 0000000060001000
X24: 0000000000000000 X25: 0000000000000000 X26: 0000000000000000
X27: 0000000000000000 X28: ffffff8111eecb00 X29: ffffffc008003f50
LR: ffffffc00802df88 SP: ffffffc008003f40 PC: ffffffc00802df94
PSTATE: 024003c5 FPVALID: 00000000
crash> bt -c 0
PID: 1842 TASK: ffffff8111eecb00 CPU: 0 COMMAND: "android.bg"
00 [ffffffc008003f50] ipi_handler at ffffffc00802df90
01 [ffffffc008003f90] handle_percpu_devid_irq at ffffffc008146f50
02 [ffffffc008003fd0] generic_handle_domain_irq at ffffffc00813f484
03 [ffffffc008003fe0] gic_handle_irq at ffffffc008010140
--- <IRQ stack> ---
04 [ffffffc019c3be20] call_on_irq_stack at ffffffc008016ed4
05 [ffffffc019c3be40] do_interrupt_handler at ffffffc008019cb4
06 [ffffffc019c3be60] el0_interrupt at ffffffc008f7b848
07 [ffffffc019c3be90] __el0_irq_handler_common at ffffffc008f7b368
08 [ffffffc019c3bea0] el0t_64_irq_handler at ffffffc008f7b344
09 [ffffffc019c3bfe0] el0t_64_irq at ffffffc008011720
PC: 0000000072415108 LR: 00000000724150d0 SP: 0000007691d2bfa0
X29: 00000000734f60e0 X28: 000000001a2fa678 X27: 0000000000000063
X26: 000000001a2fa678 X25: 000000001a2fa678 X24: 000000001a7bb718
X23: 000000001a7ba198 X22: 000000001a7ba190 X21: b4000076f9a828c8
X20: 0000000000000000 X19: b4000076f9a82800 X18: 000000768d68a000
X17: 00000000708f89f8 X16: 00000000000000f0 X15: 0000000000000000
X14: 0000007691d2bca0 X13: 0000000080100000 X12: 0000000000000000
X11: 0000000000000000 X10: 0000000000000000 X9: 9636716211228cd4
X8: 9636716211228cd4 X7: 0000000000000010 X6: 000000001a7bb728
X5: 0000000070845200 X4: 0000000018a40d38 X3: 00000000707e8f98
X2: 000000001a2fa678 X1: 000000001a7ba198 X0: 0000000070847aa8
ORIG_X0: 00000000ffffff9c SYSCALLNO: ffffffff PSTATE: 60001000
Signed-off-by: Guanyou.Chen <chenguanyou(a)xiaomi.com>
---
netdump.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/netdump.c b/netdump.c
index 435793b..455f90e 100644
--- a/netdump.c
+++ b/netdump.c
@@ -101,7 +101,7 @@ map_cpus_to_prstatus(void)
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
for (i = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i) && machdep->is_cpu_prstatus_valid(i)) {
+ if (machdep->is_cpu_prstatus_valid(i)) {
nd->nt_prstatus_percpu[i] = nt_ptr[i];
This patch has dependency on your previous "bugfix map cpus register"
patch. I'm not sure about the relations of the 2 patches, but they
don't seem to be independent. So please send them within one patchset
is preferred.
However, for this patch, it will cause regressions after removing
in_cpu_map(ONLINE_MAP, i) check before
machdep->is_cpu_prstatus_valid(i), see the following stacktrace:
...
WARNING: cpu 2027: invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)
WARNING: cpu 2028: invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)
malloc_bp[1999]: 585a3c0
smallest: 32
largest: 65536
embedded: 2032
max_embedded: 2032
mallocs: 2000
frees: 0
reqs/total: 2063/837500
average size: 406
crash: cannot allocate any more memory!
...
(gdb) bt
#0 getbuf (reqsize=368) at tools.c:6130
#1 0x000000000065be0b in have_crash_notes (cpu=2029) at diskdump.c:123
#2 0x000000000065bf57 in diskdump_is_cpu_prstatus_valid (cpu=2029) at
diskdump.c:155
#3 0x000000000064b055 in map_cpus_to_prstatus () at netdump.c:104
...
The reason is, kt->kernel_NR_CPUS might be large(5120 in this case),
without the filter of in_cpu_map(), it will exhaust the memory
buffer.
Thanks,
Tao Liu
nd->num_prstatus_notes =
MAX(nd->num_prstatus_notes, i+1);
@@ -2998,15 +2998,10 @@ dump_registers_for_elf_dumpfiles(void)
return;
}
- for (c = 0; c < kt->cpus; c++) {
- if (check_offline_cpu(c)) {
- fprintf(fp, "%sCPU %d: [OFFLINE]\n", c ? "\n" :
"", c);
- continue;
- }
-
- fprintf(fp, "%sCPU %d:\n", c ? "\n" : "",
c);
- display_regs_from_elf_notes(c, fp);
- }
+ for (c = 0; c < kt->cpus; c++) {
+ fprintf(fp, "%sCPU %d: %s\n", c ? "\n" : "", c,
check_offline_cpu(c) ? "[OFFLINE]" : "[ONLINE]");
+ display_regs_from_elf_notes(c, fp);
+ }
}
struct x86_64_user_regs_struct {
--
2.34.1
Guanyou.
Thanks.