Hi, Shijie
Thank you for the update.

On Wed, Mar 30, 2022 at 8:00 PM <crash-utility-request@redhat.com> wrote:
Date: Wed, 30 Mar 2022 19:03:23 +0000
From: Huang Shijie <shijie@os.amperecomputing.com>
To: k-hagio-ab@nec.com, lijiang@redhat.com
Cc: patches@amperecomputing.com, zwang@amperecomputing.com,
        darren@os.amperecomputing.com, crash-utility@redhat.com, Huang Shijie
        <shijie@os.amperecomputing.com>
Subject: [Crash-utility] [PATCH v2] diskdump: Optimize the boot time
Message-ID: <20220330190323.2420144-1-shijie@os.amperecomputing.com>
Content-Type: text/plain

1.) The vmcore file maybe very big.

    For example, I have a vmcore file which is over 23G,
    and the panic kernel had 767.6G memory,
    its max_sect_len is 4468736.

    Current code costs too much time to do the following loop:
    ..............................................
        for (i = 1; i < max_sect_len + 1; i++) {
                dd->valid_pages[i] = dd->valid_pages[i - 1];
                for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++)
                        if (page_is_dumpable(pfn))
                                dd->valid_pages[i]++;
    ..............................................

    For my case, it costs about 56 seconds to finish the
    big loop.

    This patch moves the hweightXX macros to defs.h,
    and uses hweight64 to optimize the loop.

    For my vmcore, the loop only costs about one second now.

2.) Tests result:
  # cat ./commands.txt
      quit

 Before:

  #echo  3 > /proc/sys/vm/drop_caches;
  #time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore > /dev/null 2>&1
   ............................
        real    1m54.259s
        user    1m12.494s
        sys     0m3.857s
   ............................

 After this patch:

  #echo  3 > /proc/sys/vm/drop_caches;
  #time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore > /dev/null 2>&1
   ............................
        real    0m55.217s
        user    0m15.114s
        sys     0m3.560s
   ............................

Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
---
v1 --> v2:
        1.) change u64 to ulonglong.
        2.) compile this patch in x86_64.

---
 defs.h     | 20 ++++++++++++++++++++
 diskdump.c | 12 +++++++++---
 sbitmap.c  | 19 -------------------
 3 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/defs.h b/defs.h
index 81ac049..1e8360d 100644
--- a/defs.h
+++ b/defs.h
@@ -4531,6 +4531,26 @@ struct machine_specific {
 #define NUM_IN_BITMAP(bitmap, x) (bitmap[(x)/BITS_PER_LONG] & NUM_TO_BIT(x))
 #define SET_BIT(bitmap, x) (bitmap[(x)/BITS_PER_LONG] |= NUM_TO_BIT(x))

+static inline unsigned int __const_hweight8(unsigned long w)
+{
+       return
+               (!!((w) & (1ULL << 0))) +
+               (!!((w) & (1ULL << 1))) +
+               (!!((w) & (1ULL << 2))) +
+               (!!((w) & (1ULL << 3))) +
+               (!!((w) & (1ULL << 4))) +
+               (!!((w) & (1ULL << 5))) +
+               (!!((w) & (1ULL << 6))) +
+               (!!((w) & (1ULL << 7)));
+}
+
+#define __const_hweight16(w) (__const_hweight8(w)  + __const_hweight8((w)  >> 8))
+#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16))
+#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32))
+
+#define hweight32(w) __const_hweight32(w)
+#define hweight64(w) __const_hweight64(w)
+

No need to move the above code from sbitmap.c to defs.h, a simple way is to implement a new function in sbitmap.c and add its definition in defs.h, that will
be easy to call it in diskdump.c. For example:

diff --git a/defs.h b/defs.h
index 81ac0498dac7..0c5115e71f1c 100644
--- a/defs.h
+++ b/defs.h
@@ -5894,6 +5894,7 @@ typedef bool (*sbitmap_for_each_fn)(unsigned int idx, void *p);
 void sbitmap_for_each_set(const struct sbitmap_context *sc,
  sbitmap_for_each_fn fn, void *data);
 void sbitmap_context_load(ulong addr, struct sbitmap_context *sc);
+unsigned long get_hweight64(unsigned long w);
 
 /* sbitmap_queue helpers */
 typedef bool (*sbitmapq_for_each_fn)(unsigned int idx, ulong addr, void *p);
diff --git a/sbitmap.c b/sbitmap.c
index 286259f71d64..628cc00c0b6b 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -71,6 +71,11 @@ static inline unsigned int __const_hweight8(unsigned long w)
 
 #define BIT(nr) (1UL << (nr))
 
+unsigned long get_hweight64(unsigned long w)
+{
+ return hweight64(w);
+}
+
 static inline unsigned long min(unsigned long a, unsigned long b)
 {
  return (a < b) ? a : b;

//diskdump.c
...
dd->valid_pages[i] += get_hweight64(tmp);
...

How about the above suggestions? Shijie and Kazu.

Thanks.
Lianbo

 /*
  *  precision lengths for fprintf
  */
diff --git a/diskdump.c b/diskdump.c
index d567427..ff1e9a3 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -547,6 +547,7 @@ read_dump_header(char *file)
        ulong pfn;
        int i, j, max_sect_len;
        int is_split = 0;
+       ulonglong tmp, *bitmap;

        if (block_size < 0)
                return FALSE;
@@ -899,11 +900,16 @@ restart:

        dd->valid_pages = calloc(sizeof(ulong), max_sect_len + 1);
        dd->max_sect_len = max_sect_len;
+
+       /* It is safe to convert it to (ulonglong *). */
+       bitmap = (ulonglong *)dd->dumpable_bitmap;
        for (i = 1; i < max_sect_len + 1; i++) {
                dd->valid_pages[i] = dd->valid_pages[i - 1];
-               for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++)
-                       if (page_is_dumpable(pfn))
-                               dd->valid_pages[i]++;
+               for (j = 0; j < BITMAP_SECT_LEN; j += 64, pfn += 64) {
+                       tmp = bitmap[pfn >> 6];
+                       if (tmp)
+                               dd->valid_pages[i] += hweight64(tmp);
+               }
        }

         return TRUE;
diff --git a/sbitmap.c b/sbitmap.c
index 286259f..96a61e6 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -49,25 +49,6 @@ struct sbitmapq_data {

 static uint sb_flags = 0;

-static inline unsigned int __const_hweight8(unsigned long w)
-{
-       return
-               (!!((w) & (1ULL << 0))) +
-               (!!((w) & (1ULL << 1))) +
-               (!!((w) & (1ULL << 2))) +
-               (!!((w) & (1ULL << 3))) +
-               (!!((w) & (1ULL << 4))) +
-               (!!((w) & (1ULL << 5))) +
-               (!!((w) & (1ULL << 6))) +
-               (!!((w) & (1ULL << 7)));
-}
-
-#define __const_hweight16(w) (__const_hweight8(w)  + __const_hweight8((w)  >> 8))
-#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16))
-#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32))
-
-#define hweight32(w) __const_hweight32(w)
-#define hweight64(w) __const_hweight64(w)

 #define BIT(nr)                        (1UL << (nr))

--
2.30.2



------------------------------

Subject: Digest Footer

--
Crash-utility mailing list
Crash-utility@redhat.com
https://listman.redhat.com/mailman/listinfo/crash-utility


------------------------------

End of Crash-utility Digest, Vol 198, Issue 45
**********************************************