On Sat, Oct 14, 2023 at 9:39 PM <crash-utility-request@redhat.com> wrote:
Date: Sat, 14 Oct 2023 19:09:30 +0530
From: Aditya Gupta <adityag@linux.ibm.com>
To: crash-utility@redhat.com, <lijiang@redhat.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>, Mahesh J Salgaonkar
        <mahesh@linux.ibm.com>, Sourabh Jain <sourabhjain@linux.ibm.com>,
        d.hatayama@fujitsu.com
Subject: [Crash-utility] [PATCH RESEND v2] diskdump: add hook for
        additional checks on prstatus notes validity
Message-ID: <20231014133930.147343-1-adityag@linux.ibm.com>
Content-Type: text/plain; charset="US-ASCII"; x-default=true

Upstream crash reports these warnings on PowerPC64:

    WARNING: cpu 0 invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)
    ...

Apart from these warnings, register values are also invalid.

This warning was found in the commit:

    commit db8c030857b4 ("diskdump/netdump: fix segmentation fault
    caused by failure of stopping CPUs")

With above commit, crash checks whether 'crash_notes' is initialised,
before mapping PRSTATUS notes.

But some architectures such as PowerPC64, in fadump case
(firmware-assisted dump), don't populate 'crash_notes' since the
registers are already stored in the cpu notes in the vmcore.

Instead of checking 'crash_notes' for all architectures, introduce
a machdep hook ('is_cpu_prstatus_valid'), for architectures to
decide validity checks for PRSTATUS notes

A default hook ('diskdump_is_cpu_prstatus_valid') has also been provided
for all architectures other than PowerPC64, which checks if 'crash_notes'
for a given cpu is valid, maintaining the current behaviour

PowerPC64 doesn't utilise 'crash_notes' to get register values, so no
additional checks are required

Fixes: db8c030857b4 ("diskdump/netdump: fix segmentation fault caused by failure of stopping CPUs")
Signed-off-by: Aditya Gupta <adityag@linux.ibm.com>

---
Testing
=======

NOTE: To test this on PowerPC64 with upstream kernel dump, AND on system
with Radix MMU, following patch will also be needed to be applied:

Link: https://listman.redhat.com/archives/crash-utility/2023-September/010961.html

This is due to change in vmemmap address mapping for Radix MMU, since
following patch in the kernel:

    368a0590d954: (powerpc/book3s64/vmemmap: switch radix to use a
    different vmemmap handling function)

More details about the change are in the linked patch. Basically what
changed is, the address mapping for vmemmap address is now in kernel
page table, in case of Radix MMU, instead of 'vmemmap_list' which is currently
used in crash.

Git Tree for Testing
====================

1. With this patch (diskdump: add hook for additional ...) applied:

https://github.com/adi-g15-ibm/crash/tree/bugzilla-203256-list-v2

2. With both this and the linked patch (ppc64: do page traversal ...) applied:

https://github.com/adi-g15-ibm/crash/tree/bugzilla-203256-withupstreamradix

Changelog
=========

V2
+ added fix for netdump also, same as diskdump, as that was also modified by
db8c030857b4
+ fixed compile warnings


Thank  you for the update,  Aditya.

For the v2: Ack.

Thanks
Lianbo

---

---
 defs.h     |  2 ++
 diskdump.c | 15 ++++++++++++---
 netdump.c  |  5 ++---
 ppc64.c    | 10 ++++++++++
 4 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/defs.h b/defs.h
index 96a7a2a31471..23dee48759fb 100644
--- a/defs.h
+++ b/defs.h
@@ -1075,6 +1075,7 @@ struct machdep_table {
         void (*show_interrupts)(int, ulong *);
        int (*is_page_ptr)(ulong, physaddr_t *);
        int (*get_cpu_reg)(int, int, const char *, int, void *);
+       int (*is_cpu_prstatus_valid)(int cpu);
 };

 /*
@@ -7181,6 +7182,7 @@ int dumpfile_is_split(void);
 void show_split_dumpfiles(void);
 void x86_process_elf_notes(void *, unsigned long);
 void *diskdump_get_prstatus_percpu(int);
+int diskdump_is_cpu_prstatus_valid(int cpu);
 int have_crash_notes(int cpu);
 void map_cpus_to_prstatus_kdump_cmprs(void);
 void diskdump_display_regs(int, FILE *);
diff --git a/diskdump.c b/diskdump.c
index 2c284ff3f97f..ad9a00b08ce1 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -142,13 +142,22 @@ int have_crash_notes(int cpu)
        return TRUE;
 }

+int diskdump_is_cpu_prstatus_valid(int cpu)
+{
+       static int crash_notes_exists = -1;
+
+       if (crash_notes_exists == -1)
+               crash_notes_exists = kernel_symbol_exists("crash_notes");
+
+       return (!crash_notes_exists || have_crash_notes(cpu));
+}
+
 void
 map_cpus_to_prstatus_kdump_cmprs(void)
 {
        void **nt_ptr;
        int online, i, j, nrcpus;
        size_t size;
-       int crash_notes_exists;

        if (pc->flags2 & QEMU_MEM_DUMP_COMPRESSED)  /* notes exist for all cpus */
                goto resize_note_pointers;
@@ -171,10 +180,9 @@ map_cpus_to_prstatus_kdump_cmprs(void)
         *  Re-populate the array with the notes mapping to online cpus
         */
        nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
-       crash_notes_exists = kernel_symbol_exists("crash_notes");

        for (i = 0, j = 0; i < nrcpus; i++) {
-               if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) {
+               if (in_cpu_map(ONLINE_MAP, i) && machdep->is_cpu_prstatus_valid(i)) {
                        dd->nt_prstatus_percpu[i] = nt_ptr[j++];
                        dd->num_prstatus_notes =
                                MAX(dd->num_prstatus_notes, i+1);
@@ -1076,6 +1084,7 @@ diskdump_init(char *unused, FILE *fptr)
        if (!DISKDUMP_VALID() && !KDUMP_CMPRS_VALID())
                return FALSE;

+       machdep->is_cpu_prstatus_valid = diskdump_is_cpu_prstatus_valid;
        dd->ofp = fptr;
        return TRUE;
 }
diff --git a/netdump.c b/netdump.c
index 61ddeaa08831..390786364959 100644
--- a/netdump.c
+++ b/netdump.c
@@ -75,7 +75,6 @@ map_cpus_to_prstatus(void)
        void **nt_ptr;
        int online, i, j, nrcpus;
        size_t size;
-       int crash_notes_exists;

        if (pc->flags2 & QEMU_MEM_DUMP_ELF)  /* notes exist for all cpus */
                return;
@@ -98,10 +97,9 @@ map_cpus_to_prstatus(void)
         *  Re-populate the array with the notes mapping to online cpus
         */
        nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
-       crash_notes_exists = kernel_symbol_exists("crash_notes");

        for (i = 0, j = 0; i < nrcpus; i++) {
-               if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) {
+               if (in_cpu_map(ONLINE_MAP, i) && machdep->is_cpu_prstatus_valid(i)) {
                        nd->nt_prstatus_percpu[i] = nt_ptr[j++];
                        nd->num_prstatus_notes =
                                MAX(nd->num_prstatus_notes, i+1);
@@ -735,6 +733,7 @@ netdump_init(char *unused, FILE *fptr)
        if (!VMCORE_VALID())
                return FALSE;

+       machdep->is_cpu_prstatus_valid = diskdump_is_cpu_prstatus_valid;
        nd->ofp = fptr;

        check_dumpfile_size(pc->dumpfile);
diff --git a/ppc64.c b/ppc64.c
index fc34006f4863..5a8ef9e58173 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -298,6 +298,15 @@ struct machine_specific book3e_machine_specific = {
        .is_vmaddr = book3e_is_vmaddr,
 };

+/**
+ * No additional checks are required on PPC64, for checking if PRSTATUS notes
+ * is valid
+ */
+static int ppc64_is_cpu_prstatus_valid(int cpu)
+{
+       return TRUE;
+}
+
 #define SKIBOOT_BASE                   0x30000000

 /*
@@ -400,6 +409,7 @@ ppc64_init(int when)
                machdep->value_to_symbol = generic_machdep_value_to_symbol;
                machdep->get_kvaddr_ranges = ppc64_get_kvaddr_ranges;
                machdep->init_kernel_pgd = NULL;
+               machdep->is_cpu_prstatus_valid = ppc64_is_cpu_prstatus_valid;

                if (symbol_exists("vmemmap_populate")) {
                        if (symbol_exists("vmemmap")) {
--
2.41.0