The "kmem -i" command is extremely slow (appears to hang) on dumps from large
memory systems.
For example, on 120GB crash dump from a high-end server with 32TB of RAM (ie. 8Giga
Pages),
the "kmem -i" command is taking over 50 minutes to execute on a DL380 Gen10. To
report basic
general memory usage figures, we should only be reading global counters, without having to
walk
the full flat/sparse mem map page table. Hence, dump_kmeminfo() should first be reading
globals,
and then only call dump_mem_map() if important information (ie. slabs or total ram) is
missing.
Signed-off-by: Georges Aureau <georges.aureau(a)hpe.com>
--
help.c | 4 +++-
memory.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++-----------------
2 files changed, 56 insertions(+), 20 deletions(-)
diff --git a/help.c b/help.c
index 4c30047..8bd74ac 100644
--- a/help.c
+++ b/help.c
@@ -6825,7 +6825,9 @@ char *help_kmem[] = {
" -F same as -f, but also dumps all pages linked to that header.",
" -c walks through the page_hash_table and verifies page_cache_size.",
" -C same as -c, but also dumps all pages in the page_hash_table.",
-" -i displays general memory usage information",
+" -i displays general memory usage information.",
+" Note: SHARED is no longer printed (too slow on large memory
systems)",
+" unless specifying \"-i=shared\".",
" -v displays the mapped virtual memory regions allocated by
vmalloc().",
" -V displays the kernel vm_stat table if it exists, or in more
recent",
" kernels, the vm_zone_stat, vm_node_stat and vm_numa_stat
tables,",
diff --git a/memory.c b/memory.c
index 8c01ed0..ea00ef1 100644
--- a/memory.c
+++ b/memory.c
@@ -237,7 +237,7 @@ static void dump_vmlist(struct meminfo *);
static void dump_vmap_area(struct meminfo *);
static int get_vmap_area_list_from_nodes(ulong **);
static int dump_page_lists(struct meminfo *);
-static void dump_kmeminfo(void);
+static void dump_kmeminfo(struct meminfo *);
static int page_to_phys(ulong, physaddr_t *);
static void display_memory(ulonglong, long, ulong, int, void *);
static char *show_opt_string(struct searchinfo *);
@@ -5107,7 +5107,7 @@ cmd_kmem(void)
BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
pc->curcmd_flags &= ~HEADER_PRINTED;
- while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh")) !=
EOF) {
+ while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCi::nl:L:PVoh")) !=
EOF) {
switch(c)
{
case 'V':
@@ -5124,6 +5124,9 @@ cmd_kmem(void)
case 'i':
iflag = 1;
+ if (optarg && strcmp(optarg, "=shared") == 0) {
+ meminfo.flags = GET_SHARED_PAGES;
+ }
break;
case 'h':
@@ -5410,7 +5413,7 @@ cmd_kmem(void)
}
if (iflag == 1)
- dump_kmeminfo();
+ dump_kmeminfo(&meminfo);
if (pflag == 1)
dump_mem_map(&meminfo);
@@ -8515,7 +8518,7 @@ bailout:
char *kmeminfo_hdr = " PAGES TOTAL PERCENTAGE\n";
static void
-dump_kmeminfo(void)
+dump_kmeminfo(struct meminfo *mi)
{
int i, len;
ulong totalram_pages;
@@ -8546,18 +8549,23 @@ dump_kmeminfo(void)
ulong get_buffers;
ulong get_slabs;
char buf[BUFSIZE];
+ ulong flags;
-
- BZERO(&meminfo, sizeof(struct meminfo));
- meminfo.flags = GET_ALL;
- dump_mem_map(&meminfo);
- get_totalram = meminfo.get_totalram;
- shared_pages = meminfo.get_shared;
- get_buffers = meminfo.get_buffers;
- get_slabs = meminfo.get_slabs;
+ /*
+ * By default, we will no longer call dump_mem_map() as this is too
+ * slow for large memory systems. If we have to call it (eg. missing
+ * important information such as slabs or total ram), we will also
+ * collect shared pages. Otherwise, we won't print shared pages unless
+ * the caller explicitly requested shared pages ("kmem -i=shared").
+ */
+ flags = mi->flags;
+ shared_pages = 0;
+ get_totalram = 0;
+ get_buffers = 0;
+ get_slabs = 0;
/*
- * If vm_stat array exists, override page search info.
+ * If vm_stat array does not exists, then set mem map flag.
*/
if (vm_stat_init()) {
if (dump_vm_stat("NR_SLAB", &nr_slab, 0))
@@ -8572,9 +8580,10 @@ dump_kmeminfo(void)
if (dump_vm_stat("NR_SLAB_UNRECLAIMABLE_B", &nr_slab, 0))
get_slabs += nr_slab;
}
+ } else {
+ flags |= GET_SLAB_PAGES;
}
- fprintf(fp, "%s", kmeminfo_hdr);
/*
* Get total RAM based upon how the various versions of si_meminfo()
* have done it, latest to earliest:
@@ -8586,9 +8595,32 @@ dump_kmeminfo(void)
symbol_exists("_totalram_pages")) {
totalram_pages = vt->totalram_pages ?
vt->totalram_pages : get_totalram;
- } else
- totalram_pages = get_totalram;
+ } else {
+ flags |= GET_TOTALRAM_PAGES;
+ totalram_pages = 0;
+ }
+ /*
+ * If the caller wants shared pages or if we are missing important data
+ * (ie. slab or totalram) then go through the slow dump_mem_map() path.
+ */
+ if (flags) {
+ BZERO(&meminfo, sizeof(struct meminfo));
+ meminfo.flags = GET_ALL;
+ dump_mem_map(&meminfo);
+ /* Update the missing information */
+ if (flags & GET_SLAB_PAGES) {
+ get_slabs = meminfo.get_slabs;
+ }
+ if (flags & GET_TOTALRAM_PAGES) {
+ get_totalram = meminfo.get_totalram;
+ totalram_pages = get_totalram;
+ }
+ shared_pages = meminfo.get_shared;
+ get_buffers = meminfo.get_buffers;
+ }
+
+ fprintf(fp, "%s", kmeminfo_hdr);
fprintf(fp, "%13s %7ld %11s ----\n", "TOTAL MEM",
totalram_pages, pages_to_size(totalram_pages, buf));
@@ -8613,9 +8645,11 @@ dump_kmeminfo(void)
* differently than the kernel -- it just tallies the non-reserved
* pages that have a count of greater than 1.
*/
- pct = (shared_pages * 100)/totalram_pages;
- fprintf(fp, "%13s %7ld %11s %3ld%% of TOTAL MEM\n",
- "SHARED", shared_pages, pages_to_size(shared_pages, buf), pct);
+ if (flags & GET_SHARED_PAGES) {
+ pct = (shared_pages * 100)/totalram_pages;
+ fprintf(fp, "%13s %7ld %11s %3ld%% of TOTAL MEM\n",
+ "SHARED", shared_pages, pages_to_size(shared_pages, buf), pct);
+ }
subtract_buffer_pages = 0;
if (symbol_exists("buffermem_pages")) {