-----Original Message-----
1.) The vmcore file maybe very big.
For example, I have a vmcore file which is over 23G,
and the panic kernel had 767.6G memory,
its max_sect_len is 4468736.
Current code costs too much time to do the following loop:
..............................................
for (i = 1; i < max_sect_len + 1; i++) {
dd->valid_pages[i] = dd->valid_pages[i - 1];
for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++)
if (page_is_dumpable(pfn))
dd->valid_pages[i]++;
..............................................
For my case, it costs about 56 seconds to finish the
big loop.
This patch moves the hweightXX macros to defs.h,
and uses hweight64 to optimize the loop.
For my vmcore, the loop only costs about one second now.
2.) Tests result:
# cat ./commands.txt
quit
Before:
#echo 3 > /proc/sys/vm/drop_caches;
#time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore > /dev/null
2>&1
............................
real 1m54.259s
user 1m12.494s
sys 0m3.857s
............................
After this patch:
#echo 3 > /proc/sys/vm/drop_caches;
#time ./crash -i ./commands.txt /root/t/vmlinux /root/t/vmcore > /dev/null
2>&1
............................
real 0m55.217s
user 0m15.114s
sys 0m3.560s
............................
Thank you for the improvement!
as far as I tested on x86_64 it did not give such a big gain, but looking at
the user time, it will do on arm64. Lianbo, can you reproduce on arm64?
with a 192GB x86_64 dumpfile, slightly improved:
$ time echo quit | ./crash vmlinux dump >/dev/null
real 0m5.632s
user 0m5.545s
sys 0m0.185s
$ time echo quit | ./crash vmlinux dump >/dev/null
real 0m5.547s
user 0m5.477s
sys 0m0.170s
Signed-off-by: Huang Shijie <shijie(a)os.amperecomputing.com>
---
defs.h | 20 ++++++++++++++++++++
diskdump.c | 12 +++++++++---
sbitmap.c | 19 -------------------
3 files changed, 29 insertions(+), 22 deletions(-)
diff --git a/defs.h b/defs.h
index 81ac049..1e8360d 100644
--- a/defs.h
+++ b/defs.h
@@ -4531,6 +4531,26 @@ struct machine_specific {
#define NUM_IN_BITMAP(bitmap, x) (bitmap[(x)/BITS_PER_LONG] & NUM_TO_BIT(x))
#define SET_BIT(bitmap, x) (bitmap[(x)/BITS_PER_LONG] |= NUM_TO_BIT(x))
+static inline unsigned int __const_hweight8(unsigned long w)
+{
+ return
+ (!!((w) & (1ULL << 0))) +
+ (!!((w) & (1ULL << 1))) +
+ (!!((w) & (1ULL << 2))) +
+ (!!((w) & (1ULL << 3))) +
+ (!!((w) & (1ULL << 4))) +
+ (!!((w) & (1ULL << 5))) +
+ (!!((w) & (1ULL << 6))) +
+ (!!((w) & (1ULL << 7)));
+}
+
+#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8))
+#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >>
16))
+#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >>
32))
+
+#define hweight32(w) __const_hweight32(w)
+#define hweight64(w) __const_hweight64(w)
+
/*
* precision lengths for fprintf
*/
diff --git a/diskdump.c b/diskdump.c
index d567427..d30db9d 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -547,6 +547,7 @@ read_dump_header(char *file)
ulong pfn;
int i, j, max_sect_len;
int is_split = 0;
+ u64 tmp, *bitmap;
$ make warn
...
cc -c -g -DX86_64 -DLZO -DSNAPPY -DZSTD -DGDB_10_2 diskdump.c -Wall -O2
-Wstrict-prototypes -Wmissing-prototypes -fstack-protector -Wformat-security
diskdump.c: In function ‘read_dump_header’:
diskdump.c:550:2: error: unknown type name ‘u64’
u64 tmp, *bitmap;
^~~
diskdump.c:905:12: error: ‘u64’ undeclared (first use in this function); did you mean
‘a64l’?
bitmap = (u64 *)dd->dumpable_bitmap;
^~~
a64l
It looks like u64 is defined only on arm and arm64, please use ulonglong
commonly used in crash.
Otherwise, looks good to me.
Thanks,
Kazu
if (block_size < 0)
return FALSE;
@@ -899,11 +900,16 @@ restart:
dd->valid_pages = calloc(sizeof(ulong), max_sect_len + 1);
dd->max_sect_len = max_sect_len;
+
+ /* It is safe to convert it to (u64*). */
+ bitmap = (u64 *)dd->dumpable_bitmap;
for (i = 1; i < max_sect_len + 1; i++) {
dd->valid_pages[i] = dd->valid_pages[i - 1];
- for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++)
- if (page_is_dumpable(pfn))
- dd->valid_pages[i]++;
+ for (j = 0; j < BITMAP_SECT_LEN; j += 64, pfn += 64) {
+ tmp = bitmap[pfn >> 6];
+ if (tmp)
+ dd->valid_pages[i] += hweight64(tmp);
+ }
}
return TRUE;
diff --git a/sbitmap.c b/sbitmap.c
index 286259f..96a61e6 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -49,25 +49,6 @@ struct sbitmapq_data {
static uint sb_flags = 0;
-static inline unsigned int __const_hweight8(unsigned long w)
-{
- return
- (!!((w) & (1ULL << 0))) +
- (!!((w) & (1ULL << 1))) +
- (!!((w) & (1ULL << 2))) +
- (!!((w) & (1ULL << 3))) +
- (!!((w) & (1ULL << 4))) +
- (!!((w) & (1ULL << 5))) +
- (!!((w) & (1ULL << 6))) +
- (!!((w) & (1ULL << 7)));
-}
-
-#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8))
-#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >>
16))
-#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >>
32))
-
-#define hweight32(w) __const_hweight32(w)
-#define hweight64(w) __const_hweight64(w)
#define BIT(nr) (1UL << (nr))
--
2.30.2