There's no NMI on ARM. Hence, stopping the non-panicking CPUs from the
panicking CPU via IPI can fail easily if interrupts are being masked
in those moment. Moreover, crash_notes are not initialized for such
unstopped CPUs and the corresponding NT_PRSTATUS notes are not
attached to vmcore. However, crash utility never takes it
consideration such uninitialized crash_notes and then ends with
mapping different NT_PRSTATUS to actually unstopped CPUs. This corrupt
mapping can result crash utility into segmentation fault in the
operations where register values in NT_PRSTATUS notes are used.
For example:
crash> bt 1408
PID: 1408 TASK: ffff000003e22200 CPU: 2 COMMAND: "repro"
Segmentation fault (core dumped)
crash> help -D
diskdump_data:
filename: 127.0.0.1-2023-05-26-02:21:27/vmcore-ld1
flags: 46 (KDUMP_CMPRS_LOCAL|ERROR_EXCLUDED|LZO_SUPPORTED)
...snip...
notes_buf: 1815df0
num_vmcoredd_notes: 0
num_prstatus_notes: 5
notes[0]: 1815df0 (NT_PRSTATUS)
si.signo: 0 si.code: 0 si.errno: 0
...snip...
PSTATE: 80400005 FPVALID: 00000000
notes[4]: 1808f10 (NT_PRSTATUS)
Segmentation fault (core dumped)
To fix this issue, let's map NT_PRSTATUS to some CPU only if the
corresponding crash_notes is checked to be initialized.
Signed-off-by: HATAYAMA Daisuke <d.hatayama(a)fujitsu.com>
---
defs.h | 1 +
diskdump.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
netdump.c | 5 ++++-
3 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index bfa07c3..655de55 100644
--- a/defs.h
+++ b/defs.h
@@ -7113,6 +7113,7 @@ int dumpfile_is_split(void);
void show_split_dumpfiles(void);
void x86_process_elf_notes(void *, unsigned long);
void *diskdump_get_prstatus_percpu(int);
+int have_crash_notes(int cpu);
void map_cpus_to_prstatus_kdump_cmprs(void);
void diskdump_display_regs(int, FILE *);
void process_elf32_notes(void *, ulong);
diff --git a/diskdump.c b/diskdump.c
index 94bca4d..af65816 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -101,6 +101,55 @@ int dumpfile_is_split(void)
return KDUMP_SPLIT();
}
+int have_crash_notes(int cpu)
+{
+ ulong crash_notes, notes_ptr;
+ char *buf, *p;
+ Elf64_Nhdr *note = NULL;
+
+ if (!readmem(symbol_value("crash_notes"),
+ KVADDR,
+ &crash_notes,
+ sizeof(crash_notes),
+ "crash_notes",
+ RETURN_ON_ERROR)) {
+ error(WARNING, "cannot read \"crash_notes\"\n");
+ return FALSE;
+ }
+
+ if ((kt->flags & SMP) && (kt->flags & PER_CPU_OFF))
+ notes_ptr = crash_notes + kt->__per_cpu_offset[cpu];
+ else
+ notes_ptr = crash_notes;
+
+ buf = GETBUF(SIZE(note_buf));
+
+ if (!readmem(notes_ptr,
+ KVADDR,
+ buf,
+ SIZE(note_buf),
+ "note_buf_t",
+ RETURN_ON_ERROR)) {
+ error(WARNING, "cpu %d: cannot read NT_PRSTATUS note\n", cpu);
+ return FALSE;
+ }
+
+ note = (Elf64_Nhdr *)buf;
+ p = buf + sizeof(Elf64_Nhdr);
+
+ if (note->n_type != NT_PRSTATUS) {
+ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n",
cpu);
+ return FALSE;
+ }
+
+ if (!STRNEQ(p, "CORE")) {
+ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (name !=
\"CORE\")\n", cpu);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
void
map_cpus_to_prstatus_kdump_cmprs(void)
{
@@ -131,7 +180,7 @@ map_cpus_to_prstatus_kdump_cmprs(void)
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i)) {
+ if (in_cpu_map(ONLINE_MAP, i) && (!symbol_exists("crash_notes") ||
have_crash_notes(i))) {
dd->nt_prstatus_percpu[i] = nt_ptr[j++];
dd->num_prstatus_notes =
MAX(dd->num_prstatus_notes, i+1);
diff --git a/netdump.c b/netdump.c
index 4eba66c..9500cf4 100644
--- a/netdump.c
+++ b/netdump.c
@@ -99,8 +99,11 @@ map_cpus_to_prstatus(void)
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i))
+ if (in_cpu_map(ONLINE_MAP, i) && (!symbol_exists("crash_notes") ||
have_crash_notes(i))) {
nd->nt_prstatus_percpu[i] = nt_ptr[j++];
+ nd->num_prstatus_notes =
+ MAX(nd->num_prstatus_notes, i+1);
+ }
}
FREEBUF(nt_ptr);
--
2.25.1