On Thu, Jan 9, 2025 at 6:41 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
Date: Thu, 9 Jan 2025 11:39:18 +1300
From: Tao Liu <ltao(a)redhat.com>
Subject: [Crash-utility] Re: [PATCH v3] "kmem -i" extremely slow on
dumps from large memory systems
To: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau(a)hpe.com>
Cc: "devel(a)lists.crash-utility.osci.io"
<devel(a)lists.crash-utility.osci.io>
Message-ID:
<
CAO7dBbULiC-3S+1a5O8NzT7PqaAos_ocFjQew6wWR8MSv0LbLw(a)mail.gmail.com>
Content-Type: text/plain; charset="UTF-8"
Hi Aureau,
Thanks for the improvements. LGTM, so ack.
Applied(with minor changes):
https://github.com/crash-utility/crash/commit/b39f8b558f9c71f91ae5f623a06...
Thanks
Lianbo
Thanks,
Tao Liu
On Thu, Jan 9, 2025 at 3:38 AM Aureau, Georges (Kernel Tools ERT)
<georges.aureau(a)hpe.com> wrote:
>
> The "kmem -i" command is extremely slow (appears to hang) on dumps from
large memory systems.
>
> For example, on 120GB crash dump from a high-end server with 32TB of RAM
(ie. 8Giga Pages),
> the "kmem -i" command is taking over 50 minutes to execute on a DL380
Gen10. To report basic
> general memory usage figures, we should only be reading global counters,
without having to walk
> the full flat/sparse mem map page table. Hence, dump_kmeminfo() should
first be reading globals,
> and then only call dump_mem_map() if important information (ie. slabs or
total ram) is missing.
>
> Signed-off-by: Georges Aureau <georges.aureau(a)hpe.com>
> --
> help.c | 4 +++-
> memory.c | 72
+++++++++++++++++++++++++++++++++++++++++++++++-----------------
> 2 files changed, 56 insertions(+), 20 deletions(-)
>
> diff --git a/help.c b/help.c
> index 4c30047..8bd74ac 100644
> --- a/help.c
> +++ b/help.c
> @@ -6825,7 +6825,9 @@ char *help_kmem[] = {
> " -F same as -f, but also dumps all pages linked to that
header.",
> " -c walks through the page_hash_table and verifies
page_cache_size.",
> " -C same as -c, but also dumps all pages in the
page_hash_table.",
> -" -i displays general memory usage information",
> +" -i displays general memory usage information.",
> +" Note: SHARED is no longer printed (too slow on large
memory systems)",
> +" unless specifying \"-i=shared\".",
> " -v displays the mapped virtual memory regions allocated by
vmalloc().",
> " -V displays the kernel vm_stat table if it exists, or in more
recent",
> " kernels, the vm_zone_stat, vm_node_stat and vm_numa_stat
tables,",
> diff --git a/memory.c b/memory.c
> index 8c01ed0..ea00ef1 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -237,7 +237,7 @@ static void dump_vmlist(struct meminfo *);
> static void dump_vmap_area(struct meminfo *);
> static int get_vmap_area_list_from_nodes(ulong **);
> static int dump_page_lists(struct meminfo *);
> -static void dump_kmeminfo(void);
> +static void dump_kmeminfo(struct meminfo *);
> static int page_to_phys(ulong, physaddr_t *);
> static void display_memory(ulonglong, long, ulong, int, void *);
> static char *show_opt_string(struct searchinfo *);
> @@ -5107,7 +5107,7 @@ cmd_kmem(void)
> BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
> pc->curcmd_flags &= ~HEADER_PRINTED;
>
> - while ((c = getopt(argcnt, args,
"gI:sS::rFfm:pvczCinl:L:PVoh")) != EOF) {
> + while ((c = getopt(argcnt, args,
"gI:sS::rFfm:pvczCi::nl:L:PVoh")) != EOF) {
> switch(c)
> {
> case 'V':
> @@ -5124,6 +5124,9 @@ cmd_kmem(void)
>
> case 'i':
> iflag = 1;
> + if (optarg && strcmp(optarg, "=shared") ==
0) {
> + meminfo.flags = GET_SHARED_PAGES;
> + }
> break;
>
> case 'h':
> @@ -5410,7 +5413,7 @@ cmd_kmem(void)
> }
>
> if (iflag == 1)
> - dump_kmeminfo();
> + dump_kmeminfo(&meminfo);
>
> if (pflag == 1)
> dump_mem_map(&meminfo);
> @@ -8515,7 +8518,7 @@ bailout:
> char *kmeminfo_hdr = " PAGES TOTAL
PERCENTAGE\n";
>
> static void
> -dump_kmeminfo(void)
> +dump_kmeminfo(struct meminfo *mi)
> {
> int i, len;
> ulong totalram_pages;
> @@ -8546,18 +8549,23 @@ dump_kmeminfo(void)
> ulong get_buffers;
> ulong get_slabs;
> char buf[BUFSIZE];
> + ulong flags;
>
> -
> - BZERO(&meminfo, sizeof(struct meminfo));
> - meminfo.flags = GET_ALL;
> - dump_mem_map(&meminfo);
> - get_totalram = meminfo.get_totalram;
> - shared_pages = meminfo.get_shared;
> - get_buffers = meminfo.get_buffers;
> - get_slabs = meminfo.get_slabs;
> + /*
> + * By default, we will no longer call dump_mem_map() as this is
too
> + * slow for large memory systems. If we have to call it (eg.
missing
> + * important information such as slabs or total ram), we will
also
> + * collect shared pages. Otherwise, we won't print shared pages
unless
> + * the caller explicitly requested shared pages ("kmem
-i=shared").
> + */
> + flags = mi->flags;
> + shared_pages = 0;
> + get_totalram = 0;
> + get_buffers = 0;
> + get_slabs = 0;
>
> /*
> - * If vm_stat array exists, override page search info.
> + * If vm_stat array does not exists, then set mem map flag.
> */
> if (vm_stat_init()) {
> if (dump_vm_stat("NR_SLAB", &nr_slab, 0))
> @@ -8572,9 +8580,10 @@ dump_kmeminfo(void)
> if (dump_vm_stat("NR_SLAB_UNRECLAIMABLE_B",
&nr_slab, 0))
> get_slabs += nr_slab;
> }
> + } else {
> + flags |= GET_SLAB_PAGES;
> }
>
> - fprintf(fp, "%s", kmeminfo_hdr);
> /*
> * Get total RAM based upon how the various versions of
si_meminfo()
> * have done it, latest to earliest:
> @@ -8586,9 +8595,32 @@ dump_kmeminfo(void)
> symbol_exists("_totalram_pages")) {
> totalram_pages = vt->totalram_pages ?
> vt->totalram_pages : get_totalram;
> - } else
> - totalram_pages = get_totalram;
> + } else {
> + flags |= GET_TOTALRAM_PAGES;
> + totalram_pages = 0;
> + }
>
> + /*
> + * If the caller wants shared pages or if we are missing
important data
> + * (ie. slab or totalram) then go through the slow
dump_mem_map() path.
> + */
> + if (flags) {
> + BZERO(&meminfo, sizeof(struct meminfo));
> + meminfo.flags = GET_ALL;
> + dump_mem_map(&meminfo);
> + /* Update the missing information */
> + if (flags & GET_SLAB_PAGES) {
> + get_slabs = meminfo.get_slabs;
> + }
> + if (flags & GET_TOTALRAM_PAGES) {
> + get_totalram = meminfo.get_totalram;
> + totalram_pages = get_totalram;
> + }
> + shared_pages = meminfo.get_shared;
> + get_buffers = meminfo.get_buffers;
> + }
> +
> + fprintf(fp, "%s", kmeminfo_hdr);
> fprintf(fp, "%13s %7ld %11s ----\n", "TOTAL
MEM",
> totalram_pages, pages_to_size(totalram_pages, buf));
>
> @@ -8613,9 +8645,11 @@ dump_kmeminfo(void)
> * differently than the kernel -- it just tallies the
non-reserved
> * pages that have a count of greater than 1.
> */
> - pct = (shared_pages * 100)/totalram_pages;
> - fprintf(fp, "%13s %7ld %11s %3ld%% of TOTAL MEM\n",
> - "SHARED", shared_pages, pages_to_size(shared_pages,
buf), pct);
> + if (flags & GET_SHARED_PAGES) {
> + pct = (shared_pages * 100)/totalram_pages;
> + fprintf(fp, "%13s %7ld %11s %3ld%% of TOTAL MEM\n",
> + "SHARED", shared_pages,
pages_to_size(shared_pages, buf), pct);
> + }
>
> subtract_buffer_pages = 0;
> if (symbol_exists("buffermem_pages")) {