Date: Wed, 2 Jun 2021 18:56:50 +0300
From: Roman Bolshakov <r.bolshakov(a)yadro.com>
To: <crash-utility(a)redhat.com>
Cc: linux(a)yadro.com
Subject: [Crash-utility] [PATCH 1/2] diskdump: Fail readmem() early if
dump is incomplete
Message-ID: <20210602155650.86202-2-r.bolshakov(a)yadro.com>
Content-Type: text/plain
Hi, Roman
Thank you for the improvement.
kdump format description [1] says:
[...] zero page has its own offset not equal 0. So when reading page
from incomplete core, only the page lost by ENOSPACE errors has 0 in its
corresponding page descriptor's member offset.
crash has special treatment for page descriptors with zero offset only if
DUMP_DH_COMPRESSED_INCOMPLETE is set in dump header. However,
makedumpfile places the flag after ENOSPC is hit and only if dump header
modification went without errors.
In case if crashkernel environment was terminated early (e.g. by BMC) or
some other reason, DUMP_DH_COMPRESSED_INCOMPLETE won't be set on the
dump header. Then cache_page() would be performed on pages with
pd.offset == 0 and due to pd.size == 0 it'll skip read into
compressed_page and then non related pre-existing contents of
compressed_page will copied into page cache for the non-present page.
Ultimately, it'll lead to a cryptic failure, like:
crash: invalid kernel virtual address: 72288cacacf427f8 [...]
The failure would be a bit cleaner if crash explicitly fails on the page
that is an outcome of incomplete dump:
crash: page incomplete: kernel virtual address: c000003fff9d17e8 [...]
Debugging level 8 would also produce exact offset from data_offset to
print descriptor value with ease:
read_diskdump/cache_page: descriptor with zero offset found at paddr/pfn/pos:
3fff9d0000/3fff9d/743dd
That helps in inspecting broken descriptor with hexdump or similar tools:
hexdump -s (data_offset + pos * 0x18) -n 0x18
1.
https://github.com/makedumpfile/makedumpfile/blob/master/IMPLEMENTATION
Signed-off-by: Roman Bolshakov <r.bolshakov(a)yadro.com>
---
defs.h | 1 +
diskdump.c | 13 ++++++++++---
memory.c | 7 +++++++
3 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index 396d61a..8418da2 100644
--- a/defs.h
+++ b/defs.h
@@ -361,6 +361,7 @@ struct number_option {
#define READ_ERROR (-2)
#define WRITE_ERROR (-3)
#define PAGE_EXCLUDED (-4)
+#define PAGE_INCOMPLETE (-5)
#define RESTART() (longjmp(pc->main_loop_env, 1))
#define RESUME_FOREACH() (longjmp(pc->foreach_loop_env, 1))
diff --git a/diskdump.c b/diskdump.c
index 3effb52..c05f1ec 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -1146,7 +1146,7 @@ cache_page(physaddr_t paddr)
if (FLAT_FORMAT()) {
if (!read_flattened_format(dd->dfd, pd.offset, dd->compressed_page,
pd.size))
return READ_ERROR;
- } else if (is_incomplete_dump() && (0 == pd.offset)) {
+ } else if (0 == pd.offset) {
/*
* If the incomplete flag has been set in the header,
* first check whether zero_excluded has been set.
Does it still make sense to check the incomplete flag for the
zero_excluded case? Otherwise, the above code comment will be
outdated.
+ } else if (0 == pd.offset) {
...
- if (*diskdump_flags & ZERO_EXCLUDED) {
+ if (is_incomplete_dump() && (*diskdump_flags & ZERO_EXCLUDED))
{
Thanks.
Lianbo
@@ -1158,8 +1158,15 @@ cache_page(physaddr_t paddr)
"paddr/pfn: %llx/%lx\n",
(ulonglong)paddr, pfn);
memset(dd->compressed_page, 0, dd->block_size);
- } else
- return READ_ERROR;
+ } else {
+ if (CRASHDEBUG(8))
+ fprintf(fp,
+ "read_diskdump/cache_page: "
+ "descriptor with zero offset found at
"
+ "paddr/pfn/pos: %llx/%lx/%lx\n",
+ (ulonglong)paddr, pfn, desc_pos);
+ return PAGE_INCOMPLETE;
+ }
} else {
if (lseek(dd->dfd, pd.offset, SEEK_SET) == failed)
return SEEK_ERROR;
diff --git a/memory.c b/memory.c
index 8c6bbe4..5d7eee6 100644
--- a/memory.c
+++ b/memory.c
@@ -2211,6 +2211,7 @@ accessible(ulong kva)
#define READ_ERRMSG "read error: %s address: %llx type:
\"%s\"\n"
#define WRITE_ERRMSG "write error: %s address: %llx type:
\"%s\"\n"
#define PAGE_EXCLUDED_ERRMSG "page excluded: %s address: %llx type:
\"%s\"\n"
+#define PAGE_INCOMPLETE_ERRMSG "page incomplete: %s address: %llx type:
\"%s\"\n"
#define RETURN_ON_PARTIAL_READ() \
if ((error_handle & RETURN_PARTIAL) && (size < orig_size)) {
\
@@ -2376,6 +2377,12 @@ readmem(ulonglong addr, int memtype, void *buffer, long size,
error(INFO, PAGE_EXCLUDED_ERRMSG, memtype_string(memtype,
0), addr, type);
goto readmem_error;
+ case PAGE_INCOMPLETE:
+ RETURN_ON_PARTIAL_READ();
+ if (PRINT_ERROR_MESSAGE)
+ error(INFO, PAGE_INCOMPLETE_ERRMSG,
memtype_string(memtype, 0), addr, type);
+ goto readmem_error;
+
default:
break;
}
--
2.31.1