HI Lianbo,
On Mon, Apr 11, 2022 at 11:40:22AM +0800, lijiang wrote:
 Hi, Shijie
 Thank you for the update.
 
 On Wed, Mar 30, 2022 at 8:00 PM <crash-utility-request(a)redhat.com> wrote:
 
 > Date: Wed, 30 Mar 2022 19:03:23 +0000
 > From: Huang Shijie <shijie(a)os.amperecomputing.com>
 > To: k-hagio-ab(a)nec.com, lijiang(a)redhat.com
 > Cc: patches(a)amperecomputing.com, zwang(a)amperecomputing.com,
 >         darren(a)os.amperecomputing.com, crash-utility(a)redhat.com, Huang
 > Shijie
 >         <shijie(a)os.amperecomputing.com>
 > Subject: [Crash-utility] [PATCH v2] diskdump: Optimize the boot time
 > Message-ID: <20220330190323.2420144-1-shijie(a)os.amperecomputing.com>
 > Content-Type: text/plain
 >
 > 1.) The vmcore file maybe very big.
 >
 >     For example, I have a vmcore file which is over 23G,
 >     and the panic kernel had 767.6G memory,
 >     its max_sect_len is 4468736.
 >
 >     Current code costs too much time to do the following loop:
 >     ..............................................
 >         for (i = 1; i < max_sect_len + 1; i++) {
 >                 dd->valid_pages[i] = dd->valid_pages[i - 1];
 >                 for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++)
 >                         if (page_is_dumpable(pfn))
 >                                 dd->valid_pages[i]++;
 >     ..............................................
 >
 >     For my case, it costs about 56 seconds to finish the
 >     big loop.
 >
 >     This patch moves the hweightXX macros to defs.h,
 >     and uses hweight64 to optimize the loop.
 >
 >     For my vmcore, the loop only costs about one second now.
 >
 > 2.) Tests result:
 >   # cat ./commands.txt
 >       quit
 >
 >  Before:
 >
 >   #echo  3 > /proc/sys/vm/drop_caches;
 >   #time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore >
 > /dev/null 2>&1
 >    ............................
 >         real    1m54.259s
 >         user    1m12.494s
 >         sys     0m3.857s
 >    ............................
 >
 >  After this patch:
 >
 >   #echo  3 > /proc/sys/vm/drop_caches;
 >   #time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore >
 > /dev/null 2>&1
 >    ............................
 >         real    0m55.217s
 >         user    0m15.114s
 >         sys     0m3.560s
 >    ............................
 >
 > Signed-off-by: Huang Shijie <shijie(a)os.amperecomputing.com>
 > ---
 > v1 --> v2:
 >         1.) change u64 to ulonglong.
 >         2.) compile this patch in x86_64.
 >
 > ---
 >  defs.h     | 20 ++++++++++++++++++++
 >  diskdump.c | 12 +++++++++---
 >  sbitmap.c  | 19 -------------------
 >  3 files changed, 29 insertions(+), 22 deletions(-)
 >
 > diff --git a/defs.h b/defs.h
 > index 81ac049..1e8360d 100644
 > --- a/defs.h
 > +++ b/defs.h
 > @@ -4531,6 +4531,26 @@ struct machine_specific {
 >  #define NUM_IN_BITMAP(bitmap, x) (bitmap[(x)/BITS_PER_LONG] &
 > NUM_TO_BIT(x))
 >  #define SET_BIT(bitmap, x) (bitmap[(x)/BITS_PER_LONG] |= NUM_TO_BIT(x))
 >
 > +static inline unsigned int __const_hweight8(unsigned long w)
 > +{
 > +       return
 > +               (!!((w) & (1ULL << 0))) +
 > +               (!!((w) & (1ULL << 1))) +
 > +               (!!((w) & (1ULL << 2))) +
 > +               (!!((w) & (1ULL << 3))) +
 > +               (!!((w) & (1ULL << 4))) +
 > +               (!!((w) & (1ULL << 5))) +
 > +               (!!((w) & (1ULL << 6))) +
 > +               (!!((w) & (1ULL << 7)));
 > +}
 > +
 > +#define __const_hweight16(w) (__const_hweight8(w)  +
 > __const_hweight8((w)  >> 8))
 > +#define __const_hweight32(w) (__const_hweight16(w) +
 > __const_hweight16((w) >> 16))
 > +#define __const_hweight64(w) (__const_hweight32(w) +
 > __const_hweight32((w) >> 32))
 > +
 > +#define hweight32(w) __const_hweight32(w)
 > +#define hweight64(w) __const_hweight64(w)
 > +
 >
 
 No need to move the above code from sbitmap.c to defs.h, a simple way is to
 implement a new function in sbitmap.c and add its definition in defs.h,
 that will
 be easy to call it in diskdump.c. For example:
 
 diff --git a/defs.h b/defs.h
 index 81ac0498dac7..0c5115e71f1c 100644
 --- a/defs.h
 +++ b/defs.h
 @@ -5894,6 +5894,7 @@ typedef bool (*sbitmap_for_each_fn)(unsigned int idx,
 void *p);
  void sbitmap_for_each_set(const struct sbitmap_context *sc,
   sbitmap_for_each_fn fn, void *data);
  void sbitmap_context_load(ulong addr, struct sbitmap_context *sc);
 +unsigned long get_hweight64(unsigned long w);
 
  /* sbitmap_queue helpers */
  typedef bool (*sbitmapq_for_each_fn)(unsigned int idx, ulong addr, void
 *p);
 diff --git a/sbitmap.c b/sbitmap.c
 index 286259f71d64..628cc00c0b6b 100644
 --- a/sbitmap.c
 +++ b/sbitmap.c
 @@ -71,6 +71,11 @@ static inline unsigned int __const_hweight8(unsigned
 long w)
 
  #define BIT(nr) (1UL << (nr))
 
 +unsigned long get_hweight64(unsigned long w)
 +{
 + return hweight64(w);
 +}
 +
  static inline unsigned long min(unsigned long a, unsigned long b)
  {
   return (a < b) ? a : b;
 
 //diskdump.c
 ...
 dd->valid_pages[i] += get_hweight64(tmp);
 ...
 
 How about the above suggestions? Shijie and Kazu. 
I moved these functions(hweightXX) to defs.h, because I thought they maybe used
by other code in future.
But anyway, it depends on you and Kazu to make the decision.
Thanks
Huang Shijie