-----Original Message-----
Hi, Roman
Thank you for the improvement.
> kdump format description [1] says:
>
> [...] zero page has its own offset not equal 0. So when reading page
> from incomplete core, only the page lost by ENOSPACE errors has 0 in its
> corresponding page descriptor's member offset.
>
> crash has special treatment for page descriptors with zero offset only if
> DUMP_DH_COMPRESSED_INCOMPLETE is set in dump header. However,
> makedumpfile places the flag after ENOSPC is hit and only if dump header
> modification went without errors.
>
> In case if crashkernel environment was terminated early (e.g. by BMC) or
> some other reason, DUMP_DH_COMPRESSED_INCOMPLETE won't be set on the
> dump header. Then cache_page() would be performed on pages with
> pd.offset == 0 and due to pd.size == 0 it'll skip read into
> compressed_page and then non related pre-existing contents of
> compressed_page will copied into page cache for the non-present page.
>
> Ultimately, it'll lead to a cryptic failure, like:
>
> crash: invalid kernel virtual address: 72288cacacf427f8 [...]
>
> The failure would be a bit cleaner if crash explicitly fails on the page
> that is an outcome of incomplete dump:
>
> crash: page incomplete: kernel virtual address: c000003fff9d17e8 [...]
>
> Debugging level 8 would also produce exact offset from data_offset to
> print descriptor value with ease:
>
> read_diskdump/cache_page: descriptor with zero offset found at paddr/pfn/pos:
3fff9d0000/3fff9d/743dd
>
> That helps in inspecting broken descriptor with hexdump or similar tools:
>
> hexdump -s (data_offset + pos * 0x18) -n 0x18
Looks good.
>
> 1.
https://github.com/makedumpfile/makedumpfile/blob/master/IMPLEMENTATION
>
> Signed-off-by: Roman Bolshakov <r.bolshakov(a)yadro.com>
> ---
> defs.h | 1 +
> diskdump.c | 13 ++++++++++---
> memory.c | 7 +++++++
> 3 files changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 396d61a..8418da2 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -361,6 +361,7 @@ struct number_option {
> #define READ_ERROR (-2)
> #define WRITE_ERROR (-3)
> #define PAGE_EXCLUDED (-4)
> +#define PAGE_INCOMPLETE (-5)
>
> #define RESTART() (longjmp(pc->main_loop_env, 1))
> #define RESUME_FOREACH() (longjmp(pc->foreach_loop_env, 1))
> diff --git a/diskdump.c b/diskdump.c
> index 3effb52..c05f1ec 100644
> --- a/diskdump.c
> +++ b/diskdump.c
> @@ -1146,7 +1146,7 @@ cache_page(physaddr_t paddr)
> if (FLAT_FORMAT()) {
> if (!read_flattened_format(dd->dfd, pd.offset,
dd->compressed_page, pd.size))
> return READ_ERROR;
> - } else if (is_incomplete_dump() && (0 == pd.offset)) {
> + } else if (0 == pd.offset) {
> /*
> * If the incomplete flag has been set in the header,
> * first check whether zero_excluded has been set.
Does it still make sense to check the incomplete flag for the
zero_excluded case? Otherwise, the above code comment will be
outdated.
+ } else if (0 == pd.offset) {
...
- if (*diskdump_flags & ZERO_EXCLUDED) {
+ if (is_incomplete_dump() && (*diskdump_flags & ZERO_EXCLUDED))
{
Given the fact that makedumpfile cannot mark a dump as incomplete every time
it fails, I think it might be good to be able to use zero_excluded option
also without the incomplete flag.
So I'm ok with the original change, but additionally would like to
- fix the comment Lianbo pointed out and help texts of zero_excluded
- change netdump.c as well
Any concerns?
Thanks,
Kazu
Thanks.
Lianbo
> @@ -1158,8 +1158,15 @@ cache_page(physaddr_t paddr)
> "paddr/pfn: %llx/%lx\n",
> (ulonglong)paddr, pfn);
> memset(dd->compressed_page, 0, dd->block_size);
> - } else
> - return READ_ERROR;
> + } else {
> + if (CRASHDEBUG(8))
> + fprintf(fp,
> + "read_diskdump/cache_page: "
> + "descriptor with zero offset found at
"
> + "paddr/pfn/pos: %llx/%lx/%lx\n",
> + (ulonglong)paddr, pfn, desc_pos);
> + return PAGE_INCOMPLETE;
> + }
> } else {
> if (lseek(dd->dfd, pd.offset, SEEK_SET) == failed)
> return SEEK_ERROR;
> diff --git a/memory.c b/memory.c
> index 8c6bbe4..5d7eee6 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -2211,6 +2211,7 @@ accessible(ulong kva)
> #define READ_ERRMSG "read error: %s address: %llx type:
\"%s\"\n"
> #define WRITE_ERRMSG "write error: %s address: %llx type:
\"%s\"\n"
> #define PAGE_EXCLUDED_ERRMSG "page excluded: %s address: %llx type:
\"%s\"\n"
> +#define PAGE_INCOMPLETE_ERRMSG "page incomplete: %s address: %llx type:
\"%s\"\n"
>
> #define RETURN_ON_PARTIAL_READ() \
> if ((error_handle & RETURN_PARTIAL) && (size < orig_size)) {
\
> @@ -2376,6 +2377,12 @@ readmem(ulonglong addr, int memtype, void *buffer, long
size,
> error(INFO, PAGE_EXCLUDED_ERRMSG,
memtype_string(memtype, 0), addr,
type);
> goto readmem_error;
>
> + case PAGE_INCOMPLETE:
> + RETURN_ON_PARTIAL_READ();
> + if (PRINT_ERROR_MESSAGE)
> + error(INFO, PAGE_INCOMPLETE_ERRMSG,
memtype_string(memtype, 0), addr,
type);
> + goto readmem_error;
> +
> default:
> break;
> }
> --
> 2.31.1
--
Crash-utility mailing list
Crash-utility(a)redhat.com
https://listman.redhat.com/mailman/listinfo/crash-utility