Hi Dave,
Above your suggestion, I made changes for patch v2.
Best regards,
Qiwu
-----Original Message-----
From: Dave Anderson <anderson(a)redhat.com>
Sent: Wednesday, December 11, 2019 12:51 AM
To: qiwuchen55(a)gmail.com
Cc: crash-utility(a)redhat.com; 陈启武 <chenqiwu(a)xiaomi.com>
Subject: [External Mail]Re: [PATCH] Bugfix and optimization for ARM64 getting crash
notes
----- Original Message -----
From: chenqiwu <chenqiwu(a)xiaomi.com>
1) ARM64 call arm64_get_crash_notes() to retrieve active task
registers when POST_VM before calling map_cpus_to_prstatus() to remap
the NT_PRSTATUS elf notes to the online cpus. It's better to call
arm64_get_crash_notes() when POST_INIT.
2) arm64_get_crash_notes() check the sanity of NT_PRSTATUS notes only
for online cpus. If one cpu contains invalid note, it's better to
continue finding the crash notes for other online cpus.
So we can extract the backtraces for the online cpus which contain
valid note by using command "bt -a".
3) map_cpus_to_prstatus() remap the NT_PRSTATUS notes only to the
online cpus. Make sure there must be a one-to-one relationship between
the number of online cpus and the number of notes.
The code in map_cpus_to_prstatus() and map_cpus_to_prstatus_kdump_cmprs()
has been in place forever. Both the nd->nt_prstatus_percpu[] and
dd->nt_prstatus_percpu[] arrays are per-cpu regardless whether
they are online or offline. However, since kdump only creates NT_PRSTATUS notes for
on-line cpus, the "i" index is needed for each cpu, and the "j" index
is needed for the existing NT_PRSTATUS notes. If a cpu is offline, its
nt_prstatus_percpu[] entry will be zeroed out.
I'm not arguing that the arm64 online-cpu handling may be suspect, but your patch
should not be making changes to architectural-neutral code unless the issue affects all
architectures. So please leave those two functions alone.
Dave
Signed-off-by: chenqiwu <chenqiwu(a)xiaomi.com>
---
arm64.c | 49 +++++++++++++++++++++++++++++--------------------
diskdump.c | 9 +++------
netdump.c | 4 ++--
3 files changed, 34 insertions(+), 28 deletions(-)
diff --git a/arm64.c b/arm64.c
index 233029d..cbad461 100644
--- a/arm64.c
+++ b/arm64.c
@@ -458,7 +458,7 @@ arm64_init(int when)
arm64_stackframe_init()
break;
-case POST_VM:
+case POST_INIT:
/*
* crash_notes contains machine specific information about the
* crash. In particular, it contains CPU registers at the time @@
-3587,7 +3587,7 @@ arm64_get_crash_notes(void)
ulong offset;
char *buf, *p;
ulong *notes_ptrs;
-ulong i;
+ulong i, j;
if (!symbol_exists("crash_notes"))
return FALSE;
@@ -3620,12 +3620,12 @@ arm64_get_crash_notes(void)
if (!(ms->panic_task_regs = calloc((size_t)kt->cpus, sizeof(struct
arm64_pt_regs))))
error(FATAL, "cannot calloc panic_task_regs space\n");
-for (i = 0; i < kt->cpus; i++) {
-
+for (i = 0, j = 0; i < kt->cpus; i++) {
if (!readmem(notes_ptrs[i], KVADDR, buf, SIZE(note_buf),
"note_buf_t", RETURN_ON_ERROR)) {
-error(WARNING, "failed to read note_buf_t\n");
-goto fail;
+error(WARNING, "cpu#%d: failed to read note_buf_t\n", i);
+++j;
+continue;
}
/*
@@ -3655,19 +3655,29 @@ arm64_get_crash_notes(void)
note->n_descsz == notesz)
BCOPY((char *)note, buf, notesz);
} else {
-error(WARNING,
-"cannot find NT_PRSTATUS note for cpu: %d\n", i);
+if (CRASHDEBUG(1))
+error(WARNING,
+"cpu#%d: cannot find NT_PRSTATUS note\n", i);
+++j;
continue;
}
}
+/*
+ * Check the sanity of NT_PRSTATUS note only for each online cpu.
+ * If this cpu has invalid note, continue to find the crash notes
+ * for other online cpus.
+ */
if (note->n_type != NT_PRSTATUS) {
-error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n");
-goto fail;
+error(WARNING, "cpu#%d: invalid note (n_type != NT_PRSTATUS)\n", i);
+++j;
+continue;
}
-if (p[0] != 'C' || p[1] != 'O' || p[2] != 'R' || p[3] !=
'E') {
-error(WARNING, "invalid note (name != \"CORE\"\n");
-goto fail;
+
+if (!STRNEQ(p, "CORE")) {
+error(WARNING, "cpu#%d: invalid note (name != \"CORE\")\n", i);
+++j;
+continue;
}
/*
@@ -3684,14 +3694,13 @@ arm64_get_crash_notes(void)
FREEBUF(buf);
FREEBUF(notes_ptrs);
-return TRUE;
-fail:
-FREEBUF(buf);
-FREEBUF(notes_ptrs);
-free(ms->panic_task_regs);
-ms->panic_task_regs = NULL;
-return FALSE;
+if (j == kt->cpus) {
+free(ms->panic_task_regs);
+ms->panic_task_regs = NULL;
+return FALSE;
+}
+return TRUE;
}
static void
diff --git a/diskdump.c b/diskdump.c
index e88243e..12d8e9c 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -130,12 +130,9 @@ map_cpus_to_prstatus_kdump_cmprs(void)
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
-for (i = 0, j = 0; i < nrcpus; i++) {
-if (in_cpu_map(ONLINE_MAP, i)) {
-dd->nt_prstatus_percpu[i] = nt_ptr[j++];
-dd->num_prstatus_notes =
-MAX(dd->num_prstatus_notes, i+1);
-}
+for (i = 0; i < nrcpus; i++) {
+if (in_cpu_map(ONLINE_MAP, i))
+dd->nt_prstatus_percpu[i] = nt_ptr[i];
}
FREEBUF(nt_ptr);
diff --git a/netdump.c b/netdump.c
index 406416a..849638a 100644
--- a/netdump.c
+++ b/netdump.c
@@ -97,9 +97,9 @@ map_cpus_to_prstatus(void)
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
-for (i = 0, j = 0; i < nrcpus; i++) {
+for (i = 0; i < nrcpus; i++) {
if (in_cpu_map(ONLINE_MAP, i))
-nd->nt_prstatus_percpu[i] = nt_ptr[j++];
+nd->nt_prstatus_percpu[i] = nt_ptr[i];
}
FREEBUF(nt_ptr);
--
1.9.1
#/******本邮件及其附件含有小米公司的保密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本邮件!
This e-mail and its attachments contain confidential information from XIAOMI, which is
intended only for the person or entity whose address is listed above. Any use of the
information contained herein in any way (including, but not limited to, total or partial
disclosure, reproduction, or dissemination) by persons other than the intended
recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender
by phone or email immediately and delete it!******/#