On Tue, Oct 17, 2023 at 06:11:20PM +0800, lijiang wrote:
 On Sat, Oct 14, 2023 at 9:39 PM
<crash-utility-request(a)redhat.com> wrote:
 
 > Date: Sat, 14 Oct 2023 19:09:30 +0530
 > From: Aditya Gupta <adityag(a)linux.ibm.com>
 > To: crash-utility(a)redhat.com, <lijiang(a)redhat.com>
 > Cc: Hari Bathini <hbathini(a)linux.ibm.com>, Mahesh J Salgaonkar
 >         <mahesh(a)linux.ibm.com>, Sourabh Jain
<sourabhjain(a)linux.ibm.com>,
 >         d.hatayama(a)fujitsu.com
 > Subject: [Crash-utility] [PATCH RESEND v2] diskdump: add hook for
 >         additional checks on prstatus notes validity
 > Message-ID: <20231014133930.147343-1-adityag(a)linux.ibm.com>
 > Content-Type: text/plain; charset="US-ASCII"; x-default=true
 >
 > Upstream crash reports these warnings on PowerPC64:
 >
 >     WARNING: cpu 0 invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)
 >     ...
 >
 > Apart from these warnings, register values are also invalid.
 >
 > This warning was found in the commit:
 >
 >     commit db8c030857b4 ("diskdump/netdump: fix segmentation fault
 >     caused by failure of stopping CPUs")
 >
 > With above commit, crash checks whether 'crash_notes' is initialised,
 > before mapping PRSTATUS notes.
 >
 > But some architectures such as PowerPC64, in fadump case
 > (firmware-assisted dump), don't populate 'crash_notes' since the
 > registers are already stored in the cpu notes in the vmcore.
 >
 > Instead of checking 'crash_notes' for all architectures, introduce
 > a machdep hook ('is_cpu_prstatus_valid'), for architectures to
 > decide validity checks for PRSTATUS notes
 >
 > A default hook ('diskdump_is_cpu_prstatus_valid') has also been provided
 > for all architectures other than PowerPC64, which checks if 'crash_notes'
 > for a given cpu is valid, maintaining the current behaviour
 >
 > PowerPC64 doesn't utilise 'crash_notes' to get register values, so no
 > additional checks are required
 >
 > Fixes: db8c030857b4 ("diskdump/netdump: fix segmentation fault caused by
 > failure of stopping CPUs")
 > Signed-off-by: Aditya Gupta <adityag(a)linux.ibm.com>
 >
 > ---
 > Testing
 > =======
 >
 > NOTE: To test this on PowerPC64 with upstream kernel dump, AND on system
 > with Radix MMU, following patch will also be needed to be applied:
 >
 > Link:
 > 
https://listman.redhat.com/archives/crash-utility/2023-September/010961.html
 >
 > This is due to change in vmemmap address mapping for Radix MMU, since
 > following patch in the kernel:
 >
 >     368a0590d954: (powerpc/book3s64/vmemmap: switch radix to use a
 >     different vmemmap handling function)
 >
 > More details about the change are in the linked patch. Basically what
 > changed is, the address mapping for vmemmap address is now in kernel
 > page table, in case of Radix MMU, instead of 'vmemmap_list' which is
 > currently
 > used in crash.
 >
 > Git Tree for Testing
 > ====================
 >
 > 1. With this patch (diskdump: add hook for additional ...) applied:
 >
 > 
https://github.com/adi-g15-ibm/crash/tree/bugzilla-203256-list-v2
 >
 > 2. With both this and the linked patch (ppc64: do page traversal ...)
 > applied:
 >
 > 
https://github.com/adi-g15-ibm/crash/tree/bugzilla-203256-withupstreamradix
 >
 > Changelog
 > =========
 >
 > V2
 > + added fix for netdump also, same as diskdump, as that was also modified
 > by
 > db8c030857b4
 > + fixed compile warnings
 >
 >
 Thank  you for the update,  Aditya.
 
 For the v2: Ack. 
Thanks for the reviews Lianbo.
Thanks
Aditya Gupta
 
 Thanks
 Lianbo
 
 ---
 >
 > ---
 >  defs.h     |  2 ++
 >  diskdump.c | 15 ++++++++++++---
 >  netdump.c  |  5 ++---
 >  ppc64.c    | 10 ++++++++++
 >  4 files changed, 26 insertions(+), 6 deletions(-)
 >
 > diff --git a/defs.h b/defs.h
 > index 96a7a2a31471..23dee48759fb 100644
 > --- a/defs.h
 > +++ b/defs.h
 > @@ -1075,6 +1075,7 @@ struct machdep_table {
 >          void (*show_interrupts)(int, ulong *);
 >         int (*is_page_ptr)(ulong, physaddr_t *);
 >         int (*get_cpu_reg)(int, int, const char *, int, void *);
 > +       int (*is_cpu_prstatus_valid)(int cpu);
 >  };
 >
 >  /*
 > @@ -7181,6 +7182,7 @@ int dumpfile_is_split(void);
 >  void show_split_dumpfiles(void);
 >  void x86_process_elf_notes(void *, unsigned long);
 >  void *diskdump_get_prstatus_percpu(int);
 > +int diskdump_is_cpu_prstatus_valid(int cpu);
 >  int have_crash_notes(int cpu);
 >  void map_cpus_to_prstatus_kdump_cmprs(void);
 >  void diskdump_display_regs(int, FILE *);
 > diff --git a/diskdump.c b/diskdump.c
 > index 2c284ff3f97f..ad9a00b08ce1 100644
 > --- a/diskdump.c
 > +++ b/diskdump.c
 > @@ -142,13 +142,22 @@ int have_crash_notes(int cpu)
 >         return TRUE;
 >  }
 >
 > +int diskdump_is_cpu_prstatus_valid(int cpu)
 > +{
 > +       static int crash_notes_exists = -1;
 > +
 > +       if (crash_notes_exists == -1)
 > +               crash_notes_exists = kernel_symbol_exists("crash_notes");
 > +
 > +       return (!crash_notes_exists || have_crash_notes(cpu));
 > +}
 > +
 >  void
 >  map_cpus_to_prstatus_kdump_cmprs(void)
 >  {
 >         void **nt_ptr;
 >         int online, i, j, nrcpus;
 >         size_t size;
 > -       int crash_notes_exists;
 >
 >         if (pc->flags2 & QEMU_MEM_DUMP_COMPRESSED)  /* notes exist for all
 > cpus */
 >                 goto resize_note_pointers;
 > @@ -171,10 +180,9 @@ map_cpus_to_prstatus_kdump_cmprs(void)
 >          *  Re-populate the array with the notes mapping to online cpus
 >          */
 >         nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
 > -       crash_notes_exists = kernel_symbol_exists("crash_notes");
 >
 >         for (i = 0, j = 0; i < nrcpus; i++) {
 > -               if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists ||
 > have_crash_notes(i))) {
 > +               if (in_cpu_map(ONLINE_MAP, i) &&
 > machdep->is_cpu_prstatus_valid(i)) {
 >                         dd->nt_prstatus_percpu[i] = nt_ptr[j++];
 >                         dd->num_prstatus_notes =
 >                                 MAX(dd->num_prstatus_notes, i+1);
 > @@ -1076,6 +1084,7 @@ diskdump_init(char *unused, FILE *fptr)
 >         if (!DISKDUMP_VALID() && !KDUMP_CMPRS_VALID())
 >                 return FALSE;
 >
 > +       machdep->is_cpu_prstatus_valid = diskdump_is_cpu_prstatus_valid;
 >         dd->ofp = fptr;
 >         return TRUE;
 >  }
 > diff --git a/netdump.c b/netdump.c
 > index 61ddeaa08831..390786364959 100644
 > --- a/netdump.c
 > +++ b/netdump.c
 > @@ -75,7 +75,6 @@ map_cpus_to_prstatus(void)
 >         void **nt_ptr;
 >         int online, i, j, nrcpus;
 >         size_t size;
 > -       int crash_notes_exists;
 >
 >         if (pc->flags2 & QEMU_MEM_DUMP_ELF)  /* notes exist for all cpus */
 >                 return;
 > @@ -98,10 +97,9 @@ map_cpus_to_prstatus(void)
 >          *  Re-populate the array with the notes mapping to online cpus
 >          */
 >         nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
 > -       crash_notes_exists = kernel_symbol_exists("crash_notes");
 >
 >         for (i = 0, j = 0; i < nrcpus; i++) {
 > -               if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists ||
 > have_crash_notes(i))) {
 > +               if (in_cpu_map(ONLINE_MAP, i) &&
 > machdep->is_cpu_prstatus_valid(i)) {
 >                         nd->nt_prstatus_percpu[i] = nt_ptr[j++];
 >                         nd->num_prstatus_notes =
 >                                 MAX(nd->num_prstatus_notes, i+1);
 > @@ -735,6 +733,7 @@ netdump_init(char *unused, FILE *fptr)
 >         if (!VMCORE_VALID())
 >                 return FALSE;
 >
 > +       machdep->is_cpu_prstatus_valid = diskdump_is_cpu_prstatus_valid;
 >         nd->ofp = fptr;
 >
 >         check_dumpfile_size(pc->dumpfile);
 > diff --git a/ppc64.c b/ppc64.c
 > index fc34006f4863..5a8ef9e58173 100644
 > --- a/ppc64.c
 > +++ b/ppc64.c
 > @@ -298,6 +298,15 @@ struct machine_specific book3e_machine_specific = {
 >         .is_vmaddr = book3e_is_vmaddr,
 >  };
 >
 > +/**
 > + * No additional checks are required on PPC64, for checking if PRSTATUS
 > notes
 > + * is valid
 > + */
 > +static int ppc64_is_cpu_prstatus_valid(int cpu)
 > +{
 > +       return TRUE;
 > +}
 > +
 >  #define SKIBOOT_BASE                   0x30000000
 >
 >  /*
 > @@ -400,6 +409,7 @@ ppc64_init(int when)
 >                 machdep->value_to_symbol = generic_machdep_value_to_symbol;
 >                 machdep->get_kvaddr_ranges = ppc64_get_kvaddr_ranges;
 >                 machdep->init_kernel_pgd = NULL;
 > +               machdep->is_cpu_prstatus_valid =
 > ppc64_is_cpu_prstatus_valid;
 >
 >                 if (symbol_exists("vmemmap_populate")) {
 >                         if (symbol_exists("vmemmap")) {
 > --
 > 2.41.0
 >