On Thu, Jan 9, 2025 at 6:41 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
 Date: Thu, 9 Jan 2025 11:39:18 +1300
 From: Tao Liu <ltao(a)redhat.com>
 Subject: [Crash-utility] Re: [PATCH v3] "kmem -i" extremely slow on
         dumps from large memory systems
 To: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau(a)hpe.com>
 Cc: "devel(a)lists.crash-utility.osci.io"
         <devel(a)lists.crash-utility.osci.io>
 Message-ID:
         <
 CAO7dBbULiC-3S+1a5O8NzT7PqaAos_ocFjQew6wWR8MSv0LbLw(a)mail.gmail.com>
 Content-Type: text/plain; charset="UTF-8"
 Hi Aureau,
 Thanks for the improvements. LGTM, so ack.
 
Applied(with minor changes):
https://github.com/crash-utility/crash/commit/b39f8b558f9c71f91ae5f623a06...
Thanks
Lianbo
 Thanks,
 Tao Liu
 On Thu, Jan 9, 2025 at 3:38 AM Aureau, Georges (Kernel Tools ERT)
 <georges.aureau(a)hpe.com> wrote:
 >
 > The "kmem -i" command is extremely slow (appears to hang) on dumps from
 large memory systems.
 >
 > For example, on 120GB crash dump from a high-end server with 32TB of RAM
 (ie. 8Giga Pages),
 > the "kmem -i" command is taking over 50 minutes to execute on a DL380
 Gen10. To report basic
 > general memory usage figures, we should only be reading global counters,
 without having to walk
 > the full flat/sparse mem map page table. Hence, dump_kmeminfo() should
 first be reading globals,
 > and then only call dump_mem_map() if important information (ie. slabs or
 total ram) is missing.
 >
 > Signed-off-by: Georges Aureau <georges.aureau(a)hpe.com>
 > --
 >  help.c   |  4 +++-
 >  memory.c | 72
 +++++++++++++++++++++++++++++++++++++++++++++++-----------------
 >  2 files changed, 56 insertions(+), 20 deletions(-)
 >
 > diff --git a/help.c b/help.c
 > index 4c30047..8bd74ac 100644
 > --- a/help.c
 > +++ b/help.c
 > @@ -6825,7 +6825,9 @@ char *help_kmem[] = {
 >  "        -F  same as -f, but also dumps all pages linked to that
 header.",
 >  "        -c  walks through the page_hash_table and verifies
 page_cache_size.",
 >  "        -C  same as -c, but also dumps all pages in the
 page_hash_table.",
 > -"        -i  displays general memory usage information",
 > +"        -i  displays general memory usage information.",
 > +"            Note: SHARED is no longer printed (too slow on large
 memory systems)",
 > +"            unless specifying \"-i=shared\".",
 >  "        -v  displays the mapped virtual memory regions allocated by
 vmalloc().",
 >  "        -V  displays the kernel vm_stat table if it exists, or in more
 recent",
 >  "            kernels, the vm_zone_stat, vm_node_stat and vm_numa_stat
 tables,",
 > diff --git a/memory.c b/memory.c
 > index 8c01ed0..ea00ef1 100644
 > --- a/memory.c
 > +++ b/memory.c
 > @@ -237,7 +237,7 @@ static void dump_vmlist(struct meminfo *);
 >  static void dump_vmap_area(struct meminfo *);
 >  static int get_vmap_area_list_from_nodes(ulong **);
 >  static int dump_page_lists(struct meminfo *);
 > -static void dump_kmeminfo(void);
 > +static void dump_kmeminfo(struct meminfo *);
 >  static int page_to_phys(ulong, physaddr_t *);
 >  static void display_memory(ulonglong, long, ulong, int, void *);
 >  static char *show_opt_string(struct searchinfo *);
 > @@ -5107,7 +5107,7 @@ cmd_kmem(void)
 >         BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
 >         pc->curcmd_flags &= ~HEADER_PRINTED;
 >
 > -        while ((c = getopt(argcnt, args,
 "gI:sS::rFfm:pvczCinl:L:PVoh")) != EOF) {
 > +        while ((c = getopt(argcnt, args,
 "gI:sS::rFfm:pvczCi::nl:L:PVoh")) != EOF) {
 >                  switch(c)
 >                 {
 >                 case 'V':
 > @@ -5124,6 +5124,9 @@ cmd_kmem(void)
 >
 >                 case 'i':
 >                         iflag = 1;
 > +                       if (optarg && strcmp(optarg, "=shared") ==
0) {
 > +                               meminfo.flags = GET_SHARED_PAGES;
 > +                       }
 >                         break;
 >
 >                 case 'h':
 > @@ -5410,7 +5413,7 @@ cmd_kmem(void)
 >         }
 >
 >         if (iflag == 1)
 > -               dump_kmeminfo();
 > +               dump_kmeminfo(&meminfo);
 >
 >         if (pflag == 1)
 >                 dump_mem_map(&meminfo);
 > @@ -8515,7 +8518,7 @@ bailout:
 >  char *kmeminfo_hdr = "                 PAGES        TOTAL
 PERCENTAGE\n";
 >
 >  static void
 > -dump_kmeminfo(void)
 > +dump_kmeminfo(struct meminfo *mi)
 >  {
 >         int i, len;
 >         ulong totalram_pages;
 > @@ -8546,18 +8549,23 @@ dump_kmeminfo(void)
 >          ulong get_buffers;
 >          ulong get_slabs;
 >         char buf[BUFSIZE];
 > +       ulong flags;
 >
 > -
 > -       BZERO(&meminfo, sizeof(struct meminfo));
 > -       meminfo.flags = GET_ALL;
 > -       dump_mem_map(&meminfo);
 > -       get_totalram = meminfo.get_totalram;
 > -       shared_pages = meminfo.get_shared;
 > -       get_buffers = meminfo.get_buffers;
 > -       get_slabs = meminfo.get_slabs;
 > +       /*
 > +        * By default, we will no longer call dump_mem_map() as this is
 too
 > +        * slow for large memory systems. If we have to call it (eg.
 missing
 > +        * important information such as slabs or total ram), we will
 also
 > +        * collect shared pages. Otherwise, we won't print shared pages
 unless
 > +        * the caller explicitly requested shared pages ("kmem
 -i=shared").
 > +        */
 > +       flags = mi->flags;
 > +       shared_pages = 0;
 > +       get_totalram = 0;
 > +       get_buffers = 0;
 > +       get_slabs = 0;
 >
 >         /*
 > -        *  If vm_stat array exists, override page search info.
 > +        *  If vm_stat array does not exists, then set mem map flag.
 >          */
 >         if (vm_stat_init()) {
 >                 if (dump_vm_stat("NR_SLAB", &nr_slab, 0))
 > @@ -8572,9 +8580,10 @@ dump_kmeminfo(void)
 >                         if (dump_vm_stat("NR_SLAB_UNRECLAIMABLE_B",
 &nr_slab, 0))
 >                                 get_slabs += nr_slab;
 >                 }
 > +       } else {
 > +               flags |= GET_SLAB_PAGES;
 >         }
 >
 > -       fprintf(fp, "%s", kmeminfo_hdr);
 >         /*
 >          *  Get total RAM based upon how the various versions of
 si_meminfo()
 >           *  have done it, latest to earliest:
 > @@ -8586,9 +8595,32 @@ dump_kmeminfo(void)
 >             symbol_exists("_totalram_pages")) {
 >                 totalram_pages = vt->totalram_pages ?
 >                         vt->totalram_pages : get_totalram;
 > -       } else
 > -               totalram_pages = get_totalram;
 > +       } else {
 > +               flags |= GET_TOTALRAM_PAGES;
 > +               totalram_pages = 0;
 > +       }
 >
 > +       /*
 > +        * If the caller wants shared pages or if we are missing
 important data
 > +        * (ie. slab or totalram) then go through the slow
 dump_mem_map() path.
 > +        */
 > +       if (flags) {
 > +               BZERO(&meminfo, sizeof(struct meminfo));
 > +               meminfo.flags = GET_ALL;
 > +               dump_mem_map(&meminfo);
 > +               /* Update the missing information */
 > +               if (flags & GET_SLAB_PAGES) {
 > +                       get_slabs = meminfo.get_slabs;
 > +               }
 > +               if (flags & GET_TOTALRAM_PAGES) {
 > +                       get_totalram = meminfo.get_totalram;
 > +                       totalram_pages = get_totalram;
 > +               }
 > +               shared_pages = meminfo.get_shared;
 > +               get_buffers = meminfo.get_buffers;
 > +       }
 > +
 > +       fprintf(fp, "%s", kmeminfo_hdr);
 >         fprintf(fp, "%13s  %7ld  %11s         ----\n", "TOTAL
MEM",
 >                 totalram_pages, pages_to_size(totalram_pages, buf));
 >
 > @@ -8613,9 +8645,11 @@ dump_kmeminfo(void)
 >           *  differently than the kernel -- it just tallies the
 non-reserved
 >           *  pages that have a count of greater than 1.
 >          */
 > -        pct = (shared_pages * 100)/totalram_pages;
 > -        fprintf(fp, "%13s  %7ld  %11s  %3ld%% of TOTAL MEM\n",
 > -               "SHARED", shared_pages, pages_to_size(shared_pages,
 buf), pct);
 > +       if (flags & GET_SHARED_PAGES) {
 > +               pct = (shared_pages * 100)/totalram_pages;
 > +               fprintf(fp, "%13s  %7ld  %11s  %3ld%% of TOTAL MEM\n",
 > +                       "SHARED", shared_pages,
 pages_to_size(shared_pages, buf), pct);
 > +       }
 >
 >         subtract_buffer_pages = 0;
 >         if (symbol_exists("buffermem_pages")) {