Re: [PATCH] vmware_guestdump: Various format versions support
by Lianbo Jiang
On 3/1/24 07:16, devel-request(a)lists.crash-utility.osci.io wrote:
> Date: Thu, 29 Feb 2024 15:16:33 -0800
> From: Alexey Makhalov<alexey.makhalov(a)broadcom.com>
> Subject: [Crash-utility] [PATCH] vmware_guestdump: Various format
> versions support
> To:devel@lists.crash-utility.osci.io
> Cc: Alexey Makhalov<alexey.makhalov(a)broadcom.com>
> Message-ID:<20240229231633.102756-1-alexey.makhalov(a)broadcom.com>
>
> There are several versions of debug.guest format. Current version of
> the code is able to parse only version 4.
>
> Improve parser to support other known versions. Split data structures
> on sub-structures and introduce a helper functions to calculate a gap
> between them based on the version number. Implement additional data
> structure (struct mainmeminfo_old) and logic specifically for original
> (version 1) format support.
>
> Signed-off-by: Alexey Makhalov<alexey.makhalov(a)broadcom.com>
> ---
> vmware_guestdump.c | 316 ++++++++++++++++++++++++++++++++-------------
> 1 file changed, 229 insertions(+), 87 deletions(-)
>
> diff --git a/vmware_guestdump.c b/vmware_guestdump.c
> index 5be26c8..5c7ee4d 100644
> --- a/vmware_guestdump.c
> +++ b/vmware_guestdump.c
> @@ -2,6 +2,8 @@
> * vmware_guestdump.c
> *
> * Copyright (c) 2020 VMware, Inc.
> + * Copyright (c) 2024 Broadcom. All Rights Reserved. The term "Broadcom"
> + * refers to Broadcom Inc. and/or its subsidiaries.
> *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> @@ -13,7 +15,7 @@
> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> * GNU General Public License for more details.
> *
> - * Author: Alexey Makhalov<amakhalov(a)vmware.com>
> + * Author: Alexey Makhalov<alexey.makhalov(a)broadcom.com>
> */
>
> #include "defs.h"
> @@ -21,20 +23,31 @@
>
> #define LOGPRX "vmw: "
>
> -#define GUESTDUMP_VERSION 4
> -#define GUESTDUMP_MAGIC1 1
> -#define GUESTDUMP_MAGIC2 0
> -
> +/*
> + * debug.guest file layout
> + * 00000000: guest dump header, it includes:
> + * 1. Version (4 bytes) \
> + * 2. Number of Virtual CPUs (4 bytes) } - struct guestdumpheader
> + * 3. Reserved gap
> + * 4. Main Memory information - struct mainmeminfo{,_old}
> + * (use get_vcpus_offset() to get total size of guestdumpheader)
> + * vcpus_offset: ---------\
> + * 1. struct vcpu_state1 \
> + * 2. reserved gap } num_vcpus times
> + * 3. struct vcpu_state2 /
> + * 4. 4KB of reserved data /
> + * --------/
> + *
> + */
> struct guestdumpheader {
> uint32_t version;
> uint32_t num_vcpus;
> - uint8_t magic1;
> - uint8_t reserved1;
> - uint32_t cpu_vendor;
> - uint64_t magic2;
> +} __attribute__((packed)) hdr;
> +
> +struct mainmeminfo {
> uint64_t last_addr;
> uint64_t memsize_in_pages;
> - uint32_t reserved2;
> + uint32_t reserved1;
> uint32_t mem_holes;
> struct memhole {
> uint64_t ppn;
> @@ -42,14 +55,36 @@ struct guestdumpheader {
> } holes[2];
> } __attribute__((packed));
>
> -struct vcpu_state {
> +/* Used by version 1 only */
> +struct mainmeminfo_old {
> + uint64_t last_addr;
> + uint32_t memsize_in_pages;
> + uint32_t reserved1;
> + uint32_t mem_holes;
> + struct memhole1 {
> + uint32_t ppn;
> + uint32_t pages;
> + } holes[2];
> + /* There are additional fields, see get_vcpus_offset() calculation. */
> +} __attribute__((packed));
> +
> +/* First half of vcpu_state */
> +struct vcpu_state1 {
> uint32_t cr0;
> uint64_t cr2;
> uint64_t cr3;
> uint64_t cr4;
> uint64_t reserved1[10];
> uint64_t idt_base;
> - uint16_t reserved2[21];
> +} __attribute__((packed));
> +
> +/*
> + * Unused fields between vcpu_state1 and vcpu_state2 swill be skipped.
> + * See get_vcpu_gapsize() calculation.
> + */
> +
> +/* Second half of vcpu_state */
> +struct vcpu_state2 {
> struct x86_64_pt_regs {
> uint64_t r15;
> uint64_t r14;
> @@ -76,9 +111,41 @@ struct vcpu_state {
> uint8_t reserved3[65];
> } __attribute__((packed));
>
> +/*
> + * Returns the size of the guest dump header.
> + */
> +static inline long
> +get_vcpus_offset(uint32_t version, int mem_holes)
> +{
> + switch (version) {
> + case 1: /* ESXi 6.7 and older */
> + return sizeof(struct guestdumpheader) + 13 + sizeof(struct mainmeminfo_old) +
> + (mem_holes == -1 ? 0 : 8 * mem_holes + 4);
> + case 3: /* ESXi 6.8 */
> + return sizeof(struct guestdumpheader) + 14 + sizeof(struct mainmeminfo);
> + case 4: /* ESXi 7.0 */
> + case 5: /* ESXi 8.0 */
> + return sizeof(struct guestdumpheader) + 14 + sizeof(struct mainmeminfo);
The version "ESXi 6.8 ", "ESXi 7.0" and "ESXi 8.0" look the same
handling, can it be folded into one code block? For example:
+ case 3: /* ESXi 6.8 */
+ case 4: /* ESXi 7.0 */
+ case 5: /* ESXi 8.0 */
+ return sizeof(struct guestdumpheader) + 14 + sizeof(struct mainmeminfo);
> + case 6: /* ESXi 8.0u2 */
> + return sizeof(struct guestdumpheader) + 15 + sizeof(struct mainmeminfo);
In addition, for the above magic numbers, could you please add some code
comments and say what it means?
Also, there is a warning below:
vmware_guestdump.c: In function ‘is_vmware_guestdump’:
vmware_guestdump.c:290:1: warning: label ‘unrecognized’ defined but not
used [-Wunused-label]
290 | unrecognized:
| ^~~~~~~~~~~~
And other changes are fine to me.
Thanks.
Lianbo
> +
> + }
> + return 0;
> +}
> +
> +/*
> + * Returns the size of reserved (unused) fields in the middle of vcpu_state structure.
> + */
> +static inline long
> +get_vcpu_gapsize(uint32_t version)
> +{
> + if (version < 4)
> + return 45;
> + return 42;
> +}
>
> /*
> - * vmware_guestdump is extension to vmware_vmss with ability to debug
> + * vmware_guestdump is an extension to the vmware_vmss with ability to debug
> * debug.guest and debug.vmem files.
> *
> * debug.guest.gz and debug.vmem.gz can be obtained using following
> @@ -86,73 +153,136 @@ struct vcpu_state {
> * monitor.mini-suspend_on_panic = TRUE
> * monitor.suspend_on_triplefault = TRUE
> *
> - * guestdump (debug.guest) is simplified version of *.vmss which does
> - * not contain full VM state, but minimal guest state, such as memory
> + * guestdump (debug.guest) is a simplified version of the *.vmss which does
> + * not contain a full VM state, but minimal guest state, such as a memory
> * layout and CPUs state, needed for debugger. is_vmware_guestdump()
> * and vmware_guestdump_init() functions parse guestdump header and
> - * populate vmss data structure (from vmware_vmss.c). As result, all
> + * populate vmss data structure (from vmware_vmss.c). In result, all
> * handlers (except mempry_dump) from vmware_vmss.c can be reused.
> *
> - * debug.guest does not have dedicated header magic or signature for
> - * its format. To probe debug.guest we need to perform header fields
> - * and file size validity. In addition, check for the filename
> - * extension, which must be ".guest".
> + * debug.guest does not have a dedicated header magic or file format signature
> + * To probe debug.guest we need to perform series of validations. In addition,
> + * we check for the filename extension, which must be ".guest".
> */
> -
> int
> is_vmware_guestdump(char *filename)
> {
> - struct guestdumpheader hdr;
> + struct mainmeminfo mmi;
> + long vcpus_offset;
> FILE *fp;
> - uint64_t filesize, holes_sum = 0;
> + uint64_t filesize, expected_filesize, holes_sum = 0;
> int i;
>
> if (strcmp(filename + strlen(filename) - 6, ".guest"))
> return FALSE;
>
> - if ((fp = fopen(filename, "r")) == NULL) {
> + if ((fp = fopen(filename, "r")) == NULL) {
> error(INFO, LOGPRX"Failed to open '%s': [Error %d] %s\n",
> - filename, errno, strerror(errno));
> + filename, errno, strerror(errno));
> return FALSE;
> - }
> + }
>
> if (fread(&hdr, sizeof(struct guestdumpheader), 1, fp) != 1) {
> error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
> - "guestdumpheader", filename, errno, strerror(errno));
> + "guestdumpheader", filename, errno, strerror(errno));
> + fclose(fp);
> + return FALSE;
> + }
> +
> + vcpus_offset = get_vcpus_offset(hdr.version, -1 /* Unknown yet, adjust it later */);
> +
> + if (!vcpus_offset) {
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Not supported version %d\n", hdr.version);
> fclose(fp);
> return FALSE;
> }
>
> + if (hdr.version == 1) {
> + struct mainmeminfo_old tmp;
> + if (fseek(fp, vcpus_offset - sizeof(struct mainmeminfo_old), SEEK_SET) == -1) {
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Failed to fseek '%s': [Error %d] %s\n",
> + filename, errno, strerror(errno));
> + fclose(fp);
> + return FALSE;
> + }
> +
> + if (fread(&tmp, sizeof(struct mainmeminfo_old), 1, fp) != 1) {
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
> + "mainmeminfo_old", filename, errno, strerror(errno));
> + fclose(fp);
> + return FALSE;
> + }
> + mmi.last_addr = tmp.last_addr;
> + mmi.memsize_in_pages = tmp.memsize_in_pages;
> + mmi.mem_holes = tmp.mem_holes;
> + mmi.holes[0].ppn = tmp.holes[0].ppn;
> + mmi.holes[0].pages = tmp.holes[0].pages;
> + mmi.holes[1].ppn = tmp.holes[1].ppn;
> + mmi.holes[1].pages = tmp.holes[1].pages;
> + /* vcpu_offset adjustment for mem_holes is required only for version 1. */
> + vcpus_offset = get_vcpus_offset(hdr.version, mmi.mem_holes);
> + } else {
> + if (fseek(fp, vcpus_offset - sizeof(struct mainmeminfo), SEEK_SET) == -1) {
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Failed to fseek '%s': [Error %d] %s\n",
> + filename, errno, strerror(errno));
> + fclose(fp);
> + return FALSE;
> + }
> +
> + if (fread(&mmi, sizeof(struct mainmeminfo), 1, fp) != 1) {
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
> + "mainmeminfo", filename, errno, strerror(errno));
> + fclose(fp);
> + return FALSE;
> + }
> + }
> if (fseek(fp, 0L, SEEK_END) == -1) {
> - error(INFO, LOGPRX"Failed to fseek '%s': [Error %d] %s\n",
> - filename, errno, strerror(errno));
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Failed to fseek '%s': [Error %d] %s\n",
> + filename, errno, strerror(errno));
> fclose(fp);
> return FALSE;
> }
> filesize = ftell(fp);
> fclose(fp);
>
> - if (hdr.mem_holes > 2)
> - goto unrecognized;
> + if (mmi.mem_holes > 2) {
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Unexpected mmi.mem_holes value %d\n",
> + mmi.mem_holes);
> + return FALSE;
> + }
>
> - for (i = 0; i < hdr.mem_holes; i++) {
> + for (i = 0; i < mmi.mem_holes; i++) {
> /* hole start page */
> - vmss.regions[i].startpagenum = hdr.holes[i].ppn;
> + vmss.regions[i].startpagenum = mmi.holes[i].ppn;
> /* hole end page */
> - vmss.regions[i].startppn = hdr.holes[i].ppn + hdr.holes[i].pages;
> - holes_sum += hdr.holes[i].pages;
> + vmss.regions[i].startppn = mmi.holes[i].ppn + mmi.holes[i].pages;
> + holes_sum += mmi.holes[i].pages;
> + }
> +
> + if ((mmi.last_addr + 1) != ((mmi.memsize_in_pages + holes_sum) << VMW_PAGE_SHIFT)) {
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Memory size check failed\n");
> + return FALSE;
> }
>
> - if (hdr.version != GUESTDUMP_VERSION ||
> - hdr.magic1 != GUESTDUMP_MAGIC1 ||
> - hdr.magic2 != GUESTDUMP_MAGIC2 ||
> - (hdr.last_addr + 1) != ((hdr.memsize_in_pages + holes_sum) << VMW_PAGE_SHIFT) ||
> - filesize != sizeof(struct guestdumpheader) +
> - hdr.num_vcpus * (sizeof (struct vcpu_state) + VMW_PAGE_SIZE))
> - goto unrecognized;
> + expected_filesize = vcpus_offset + hdr.num_vcpus * (sizeof(struct vcpu_state1) +
> + get_vcpu_gapsize(hdr.version) + sizeof(struct vcpu_state2) + VMW_PAGE_SIZE);
> + if (filesize != expected_filesize) {
> + if (CRASHDEBUG(1))
> + error(INFO, LOGPRX"Incorrect file size: %d != %d\n",
> + filesize, expected_filesize);
> + return FALSE;
> + }
>
> - vmss.memsize = hdr.memsize_in_pages << VMW_PAGE_SHIFT;
> - vmss.regionscount = hdr.mem_holes + 1;
> + vmss.memsize = mmi.memsize_in_pages << VMW_PAGE_SHIFT;
> + vmss.regionscount = mmi.mem_holes + 1;
> vmss.memoffset = 0;
> vmss.num_vcpus = hdr.num_vcpus;
> return TRUE;
> @@ -169,7 +299,8 @@ vmware_guestdump_init(char *filename, FILE *ofp)
> FILE *fp = NULL;
> int i, result = TRUE;
> char *vmem_filename = NULL;
> - struct vcpu_state vs;
> + struct vcpu_state1 vs1;
> + struct vcpu_state2 vs2;
> char *p;
>
> if (!machine_type("X86") && !machine_type("X86_64")) {
> @@ -180,14 +311,14 @@ vmware_guestdump_init(char *filename, FILE *ofp)
> goto exit;
> }
>
> - if ((fp = fopen(filename, "r")) == NULL) {
> + if ((fp = fopen(filename, "r")) == NULL) {
> error(INFO, LOGPRX"Failed to open '%s': [Error %d] %s\n",
> filename, errno, strerror(errno));
> result = FALSE;
> goto exit;
> - }
> + }
>
> - if (fseek(fp, sizeof(struct guestdumpheader), SEEK_SET) == -1) {
> + if (fseek(fp, get_vcpus_offset(hdr.version, vmss.regionscount - 1), SEEK_SET) == -1) {
> error(INFO, LOGPRX"Failed to fseek '%s': [Error %d] %s\n",
> filename, errno, strerror(errno));
> result = FALSE;
> @@ -203,7 +334,19 @@ vmware_guestdump_init(char *filename, FILE *ofp)
> }
>
> for (i = 0; i < vmss.num_vcpus; i++) {
> - if (fread(&vs, sizeof(struct vcpu_state), 1, fp) != 1) {
> + if (fread(&vs1, sizeof(struct vcpu_state1), 1, fp) != 1) {
> + error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
> + "vcpu_state", filename, errno, strerror(errno));
> + result = FALSE;
> + goto exit;
> + }
> + if (fseek(fp, get_vcpu_gapsize(hdr.version), SEEK_CUR) == -1) {
> + error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
> + "vcpu_state", filename, errno, strerror(errno));
> + result = FALSE;
> + goto exit;
> + }
> + if (fread(&vs2, sizeof(struct vcpu_state2), 1, fp) != 1) {
> error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
> "vcpu_state", filename, errno, strerror(errno));
> result = FALSE;
> @@ -217,29 +360,29 @@ vmware_guestdump_init(char *filename, FILE *ofp)
> }
> vmss.vcpu_regs[i] = 0;
>
> - vmss.regs64[i]->rax = vs.regs64.rax;
> - vmss.regs64[i]->rcx = vs.regs64.rcx;
> - vmss.regs64[i]->rdx = vs.regs64.rdx;
> - vmss.regs64[i]->rbx = vs.regs64.rbx;
> - vmss.regs64[i]->rbp = vs.regs64.rbp;
> - vmss.regs64[i]->rsp = vs.regs64.rsp;
> - vmss.regs64[i]->rsi = vs.regs64.rsi;
> - vmss.regs64[i]->rdi = vs.regs64.rdi;
> - vmss.regs64[i]->r8 = vs.regs64.r8;
> - vmss.regs64[i]->r9 = vs.regs64.r9;
> - vmss.regs64[i]->r10 = vs.regs64.r10;
> - vmss.regs64[i]->r11 = vs.regs64.r11;
> - vmss.regs64[i]->r12 = vs.regs64.r12;
> - vmss.regs64[i]->r13 = vs.regs64.r13;
> - vmss.regs64[i]->r14 = vs.regs64.r14;
> - vmss.regs64[i]->r15 = vs.regs64.r15;
> - vmss.regs64[i]->idtr = vs.idt_base;
> - vmss.regs64[i]->cr[0] = vs.cr0;
> - vmss.regs64[i]->cr[2] = vs.cr2;
> - vmss.regs64[i]->cr[3] = vs.cr3;
> - vmss.regs64[i]->cr[4] = vs.cr4;
> - vmss.regs64[i]->rip = vs.regs64.rip;
> - vmss.regs64[i]->rflags = vs.regs64.eflags;
> + vmss.regs64[i]->rax = vs2.regs64.rax;
> + vmss.regs64[i]->rcx = vs2.regs64.rcx;
> + vmss.regs64[i]->rdx = vs2.regs64.rdx;
> + vmss.regs64[i]->rbx = vs2.regs64.rbx;
> + vmss.regs64[i]->rbp = vs2.regs64.rbp;
> + vmss.regs64[i]->rsp = vs2.regs64.rsp;
> + vmss.regs64[i]->rsi = vs2.regs64.rsi;
> + vmss.regs64[i]->rdi = vs2.regs64.rdi;
> + vmss.regs64[i]->r8 = vs2.regs64.r8;
> + vmss.regs64[i]->r9 = vs2.regs64.r9;
> + vmss.regs64[i]->r10 = vs2.regs64.r10;
> + vmss.regs64[i]->r11 = vs2.regs64.r11;
> + vmss.regs64[i]->r12 = vs2.regs64.r12;
> + vmss.regs64[i]->r13 = vs2.regs64.r13;
> + vmss.regs64[i]->r14 = vs2.regs64.r14;
> + vmss.regs64[i]->r15 = vs2.regs64.r15;
> + vmss.regs64[i]->idtr = vs1.idt_base;
> + vmss.regs64[i]->cr[0] = vs1.cr0;
> + vmss.regs64[i]->cr[2] = vs1.cr2;
> + vmss.regs64[i]->cr[3] = vs1.cr3;
> + vmss.regs64[i]->cr[4] = vs1.cr4;
> + vmss.regs64[i]->rip = vs2.regs64.rip;
> + vmss.regs64[i]->rflags = vs2.regs64.eflags;
>
> vmss.vcpu_regs[i] = REGS_PRESENT_ALL;
> }
> @@ -268,9 +411,9 @@ vmware_guestdump_init(char *filename, FILE *ofp)
> fprintf(ofp, LOGPRX"vmem file: %s\n\n", vmem_filename);
>
> if (CRASHDEBUG(1)) {
> - vmware_guestdump_memory_dump(ofp);
> - dump_registers_for_vmss_dump();
> - }
> + vmware_guestdump_memory_dump(ofp);
> + dump_registers_for_vmss_dump();
> + }
>
> exit:
> if (fp)
> @@ -296,24 +439,23 @@ exit:
> int
> vmware_guestdump_memory_dump(FILE *ofp)
> {
> + uint64_t holes_sum = 0;
> + unsigned i;
> +
> fprintf(ofp, "vmware_guestdump:\n");
> fprintf(ofp, " Header: version=%d num_vcpus=%llu\n",
> - GUESTDUMP_VERSION, (ulonglong)vmss.num_vcpus);
> + hdr.version, (ulonglong)vmss.num_vcpus);
> fprintf(ofp, "Total memory: %llu\n", (ulonglong)vmss.memsize);
>
> - if (vmss.regionscount > 1) {
> - uint64_t holes_sum = 0;
> - unsigned i;
>
> - fprintf(ofp, "Memory regions[%d]:\n", vmss.regionscount);
> - fprintf(ofp, " [0x%016x-", 0);
> - for (i = 0; i < vmss.regionscount - 1; i++) {
> - fprintf(ofp, "0x%016llx]\n", (ulonglong)vmss.regions[i].startpagenum << VMW_PAGE_SHIFT);
> - fprintf(ofp, " [0x%016llx-", (ulonglong)vmss.regions[i].startppn << VMW_PAGE_SHIFT);
> - holes_sum += vmss.regions[i].startppn - vmss.regions[i].startpagenum;
> - }
> - fprintf(ofp, "0x%016llx]\n", (ulonglong)vmss.memsize + (holes_sum << VMW_PAGE_SHIFT));
> + fprintf(ofp, "Memory regions[%d]:\n", vmss.regionscount);
> + fprintf(ofp, " [0x%016x-", 0);
> + for (i = 0; i < vmss.regionscount - 1; i++) {
> + fprintf(ofp, "0x%016llx]\n", (ulonglong)vmss.regions[i].startpagenum << VMW_PAGE_SHIFT);
> + fprintf(ofp, " [0x%016llx-", (ulonglong)vmss.regions[i].startppn << VMW_PAGE_SHIFT);
> + holes_sum += vmss.regions[i].startppn - vmss.regions[i].startpagenum;
> }
> + fprintf(ofp, "0x%016llx]\n", (ulonglong)vmss.memsize + (holes_sum << VMW_PAGE_SHIFT));
>
> return TRUE;
> }
> -- 2.39.0
7 months, 4 weeks
Re: [PATCH v4 ] Adding the zram decompression algorithm "lzo-rle" to support kernel versions >= 5.1
by Tao Liu
Hi,
The v4 looks good to me, so ack.
Thanks,
Tao Liu
On Thu, Mar 28, 2024 at 08:15:53AM +0000, Yulong TANG 汤玉龙 wrote:
> In Linux 5.1, the ZRAM block driver has changed its default compressor from "lzo" to "lzo-rle" to enhance LZO compression support. However, crash does not support the improved LZO algorithm, resulting in failure when reading memory.
>
> change default compressor : ce82f19fd5809f0cf87ea9f753c5cc65ca0673d6
>
> The issue was discovered when using the extension 'gcore' to generate a process coredump, which was found to be incomplete and unable to be opened properly with gdb.
> This patch is for Crash-utility tool, it enables the Crash-utility to support decompression of the "lzo-rle" compression algorithm used in zram. The patch has been tested with vmcore files from kernel version 5.4, and successfully allows reading of memory compressed with the zram compression algorithm.
>
> Testing:
> ========
>
> before apply this patch :
> crash> gcore -v 0 1
> gcore: WARNING: only the lzo compressor is supported
> gcore: WARNING: only the lzo compressor is supported
> gcore: WARNING: only the lzo compressor is supported
> after:
> crash> gcore -v 0
> 1 Saved core.1.init
>
> Changelog:
> ==========
> v2: keep the "if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)" related code of the copied kernel code, but change the "if defined" macro into a runtime check .
> v3: set a default value of HAVE_EFFICIENT_UNALIGNED_ACCESS depending on architecture, for no ikconfig kernels.
> v4: avoid checking CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS every call;move "include lzorle_decompress.h" to diskdump.c from def.h
>
> Patch:
> ==========
>
> See attachment.
>
>
> Thanks and regards,
> Yulong
>
> From b359f434f5ba473a784756fa4711cf80df8ca51b Mon Sep 17 00:00:00 2001
> From: "yulong.tang" <yulong.tang(a)nio.com>
> Date: Tue, 20 Feb 2024 15:09:49 +0800
> Subject: [PATCH] Adding the zram decompression algorithm "lzo-rle" to support
> kernel versions >= 5.1.
>
> Port the improved decompression method for "lzo" in the kernel to support decompression of "lzorle". After the 5.1 version of the kernel, the default compression algorithm for zram was changed from "lzo" to "lzo-rle". the crash utility only supports decompression for "lzo", when parsing vmcore files that utilize zram compression, such as when using the gcore command to detach process core dump files, parsing cannot be completed successfully.
>
> before:
> crash> gcore -v 0 1
> gcore: WARNING: only the lzo compressor is supported
> gcore: WARNING: only the lzo compressor is supported
> gcore: WARNING: only the lzo compressor is supported
> gcore: WARNING: only the lzo compressor is supported
> after:
> crash> gcore -v 0 1
> Saved core.1.init
>
> Signed-off-by: yulong.tang <yulong.tang(a)nio.com>
> Reviewed-by: Tao Liu <ltao(a)redhat.com>
> ---
> Makefile | 9 +-
> diskdump.c | 3 +
> lzorle_decompress.c | 325 ++++++++++++++++++++++++++++++++++++++++++++
> lzorle_decompress.h | 81 +++++++++++
> 4 files changed, 416 insertions(+), 2 deletions(-)
> create mode 100644 lzorle_decompress.c
> create mode 100644 lzorle_decompress.h
>
> diff --git a/Makefile b/Makefile
> index 9e97313..ce0c070 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -74,7 +74,8 @@ CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \
> xen_hyper.c xen_hyper_command.c xen_hyper_global_data.c \
> xen_hyper_dump_tables.c kvmdump.c qemu.c qemu-load.c sadump.c ipcs.c \
> ramdump.c vmware_vmss.c vmware_guestdump.c \
> - xen_dom0.c kaslr_helper.c sbitmap.c maple_tree.c
> + xen_dom0.c kaslr_helper.c sbitmap.c maple_tree.c \
> + lzorle_decompress.c
>
> SOURCE_FILES=${CFILES} ${GENERIC_HFILES} ${MCORE_HFILES} \
> ${REDHAT_CFILES} ${REDHAT_HFILES} ${UNWIND_HFILES} \
> @@ -94,7 +95,8 @@ OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \
> xen_hyper.o xen_hyper_command.o xen_hyper_global_data.o \
> xen_hyper_dump_tables.o kvmdump.o qemu.o qemu-load.o sadump.o ipcs.o \
> ramdump.o vmware_vmss.o vmware_guestdump.o \
> - xen_dom0.o kaslr_helper.o sbitmap.o maple_tree.o
> + xen_dom0.o kaslr_helper.o sbitmap.o maple_tree.o \
> + lzorle_decompress.o
>
> MEMORY_DRIVER_FILES=memory_driver/Makefile memory_driver/crash.c memory_driver/README
>
> @@ -546,6 +548,9 @@ bpf.o: ${GENERIC_HFILES} bpf.c
> maple_tree.o: ${GENERIC_HFILES} ${MAPLE_TREE_HFILES} maple_tree.c
> ${CC} -c ${CRASH_CFLAGS} maple_tree.c ${WARNING_OPTIONS} ${WARNING_ERROR}
>
> +lzorle_decompress.o: lzorle_decompress.c
> + ${CC} -c ${CRASH_CFLAGS} lzorle_decompress.c ${WARNING_OPTIONS} ${WARNING_ERROR}
> +
> ${PROGRAM}: force
> @$(MAKE) all
>
> diff --git a/diskdump.c b/diskdump.c
> index 3ae7bf2..4a473e1 100644
> --- a/diskdump.c
> +++ b/diskdump.c
> @@ -28,6 +28,7 @@
> #include "xen_dom0.h"
> #include "vmcore.h"
> #include "maple_tree.h"
> +#include "lzorle_decompress.h"
>
> #define BITMAP_SECT_LEN 4096
>
> @@ -3069,6 +3070,8 @@ try_zram_decompress(ulonglong pte_val, unsigned char *buf, ulong len, ulonglong
> " with lzo library\n");
> return 0;
> #endif
> + } else if (STREQ(name, "lzo-rle")) {
> + decompressor = (void *)&lzorle_decompress_safe;
> } else { /* todo: support more compressor */
> error(WARNING, "only the lzo compressor is supported\n");
> return 0;
> diff --git a/lzorle_decompress.c b/lzorle_decompress.c
> new file mode 100644
> index 0000000..5618787
> --- /dev/null
> +++ b/lzorle_decompress.c
> @@ -0,0 +1,325 @@
> +/* lzorle_decompress.h
> + *
> + * from kernel lib/lzo/lzo1x_decompress_safe.c
> + *
> + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus(a)oberhumer.com>
> + * Copyright (C) 2024 NIO
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + */
> +
> +#include "lzorle_decompress.h"
> +#include <stdint.h>
> +#include <string.h>
> +
> +#include "defs.h"
> +
> +/* This MAX_255_COUNT is the maximum number of times we can add 255 to a base
> + * count without overflowing an integer. The multiply will overflow when
> + * multiplying 255 by more than MAXINT/255. The sum will overflow earlier
> + * depending on the base count. Since the base count is taken from a u8
> + * and a few bits, it is safe to assume that it will always be lower than
> + * or equal to 2*255, thus we can always prevent any overflow by accepting
> + * two less 255 steps. See Documentation/lzo.txt for more information.
> + */
> +#define MAX_255_COUNT ((((ulong)~0) / 255) - 2)
> +
> +static uint16_t get_unaligned_le16(const void *p) {
> + uint16_t value;
> + memcpy(&value, p, sizeof(uint16_t));
> + return value;
> +}
> +
> +int lzorle_decompress_safe(const unsigned char *in, ulong in_len,
> + unsigned char *out, ulong *out_len, void *other/* NOT USED */) {
> + unsigned char *op;
> + const unsigned char *ip;
> + ulong t, next;
> + ulong state = 0;
> + const unsigned char *m_pos;
> + const unsigned char * const ip_end = in + in_len;
> + unsigned char * const op_end = out + *out_len;
> +
> + unsigned char bitstream_version;
> +
> + static int efficient_unaligned_access = -1;
> +
> + if (efficient_unaligned_access == -1) {
> +#if defined(ARM) || defined(ARM64) || defined(X86) || defined(X86_64) || defined(PPC) || defined(PPC64) || defined(S390)|| defined(S390X)
> + efficient_unaligned_access = TRUE;
> +#else
> + efficient_unaligned_access = FALSE;
> +#endif
> +
> + if ((kt->ikconfig_flags & IKCONFIG_AVAIL) && get_kernel_config("CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS", NULL) == IKCONFIG_Y)
> + efficient_unaligned_access = TRUE;
> + }
> +
> + op = out;
> + ip = in;
> +
> + if (in_len < 3)
> + goto input_overrun;
> +
> + if (in_len >= 5 && *ip == 17) {
> + bitstream_version = ip[1];
> + ip += 2;
> + } else {
> + bitstream_version = 0;
> + }
> +
> + if (*ip > 17) {
> + t = *ip++ - 17;
> + if (t < 4) {
> + next = t;
> + goto match_next;
> + }
> + goto copy_literal_run;
> + }
> +
> + for (;;) {
> + t = *ip++;
> + if (t < 16) {
> + if (state == 0) {
> + if (t == 0) {
> + ulong offset;
> + const unsigned char *ip_last = ip;
> +
> + while (*ip == 0) {
> + ip++;
> + NEED_IP(1);
> + }
> + offset = ip - ip_last;
> + if (offset > MAX_255_COUNT)
> + return LZO_E_ERROR;
> +
> + offset = (offset << 8) - offset;
> + t += offset + 15 + *ip++;
> + }
> + t += 3;
> +copy_literal_run:
> + if (efficient_unaligned_access) {
> + if (HAVE_IP(t + 15) && HAVE_OP(t + 15)) {
> + const unsigned char *ie = ip + t;
> + unsigned char *oe = op + t;
> + do {
> + COPY8(op, ip);
> + op += 8;
> + ip += 8;
> + COPY8(op, ip);
> + op += 8;
> + ip += 8;
> + } while (ip < ie);
> + ip = ie;
> + op = oe;
> + } else {
> + NEED_OP(t);
> + NEED_IP(t + 3);
> + do {
> + *op++ = *ip++;
> + } while (--t > 0);
> + }
> + } else {
> + NEED_OP(t);
> + NEED_IP(t + 3);
> + do {
> + *op++ = *ip++;
> + } while (--t > 0);
> + }
> + state = 4;
> + continue;
> + } else if (state != 4) {
> + next = t & 3;
> + m_pos = op - 1;
> + m_pos -= t >> 2;
> + m_pos -= *ip++ << 2;
> + TEST_LB(m_pos);
> + NEED_OP(2);
> + op[0] = m_pos[0];
> + op[1] = m_pos[1];
> + op += 2;
> + goto match_next;
> + } else {
> + next = t & 3;
> + m_pos = op - (1 + M2_MAX_OFFSET);
> + m_pos -= t >> 2;
> + m_pos -= *ip++ << 2;
> + t = 3;
> + }
> + } else if (t >= 64) {
> + next = t & 3;
> + m_pos = op - 1;
> + m_pos -= (t >> 2) & 7;
> + m_pos -= *ip++ << 3;
> + t = (t >> 5) - 1 + (3 - 1);
> + } else if (t >= 32) {
> + t = (t & 31) + (3 - 1);
> + if (t == 2) {
> + ulong offset;
> + const unsigned char *ip_last = ip;
> +
> + while (*ip == 0) {
> + ip++;
> + NEED_IP(1);
> + }
> + offset = ip - ip_last;
> + if (offset > MAX_255_COUNT)
> + return LZO_E_ERROR;
> +
> + offset = (offset << 8) - offset;
> + t += offset + 31 + *ip++;
> + NEED_IP(2);
> + }
> + m_pos = op - 1;
> +
> + next = get_unaligned_le16(ip);
> + ip += 2;
> + m_pos -= next >> 2;
> + next &= 3;
> + } else {
> + NEED_IP(2);
> + next = get_unaligned_le16(ip);
> + if (((next & 0xfffc) == 0xfffc) &&
> + ((t & 0xf8) == 0x18) &&
> + bitstream_version) {
> + NEED_IP(3);
> + t &= 7;
> + t |= ip[2] << 3;
> + t += MIN_ZERO_RUN_LENGTH;
> + NEED_OP(t);
> + memset(op, 0, t);
> + op += t;
> + next &= 3;
> + ip += 3;
> + goto match_next;
> + } else {
> + m_pos = op;
> + m_pos -= (t & 8) << 11;
> + t = (t & 7) + (3 - 1);
> + if (t == 2) {
> + ulong offset;
> + const unsigned char *ip_last = ip;
> +
> + while (*ip == 0) {
> + ip++;
> + NEED_IP(1);
> + }
> + offset = ip - ip_last;
> + if (offset > MAX_255_COUNT)
> + return LZO_E_ERROR;
> +
> + offset = (offset << 8) - offset;
> + t += offset + 7 + *ip++;
> + NEED_IP(2);
> + next = get_unaligned_le16(ip);
> + }
> + ip += 2;
> + m_pos -= next >> 2;
> + next &= 3;
> + if (m_pos == op)
> + goto eof_found;
> + m_pos -= 0x4000;
> + }
> + }
> + TEST_LB(m_pos);
> +
> + if (efficient_unaligned_access) {
> + if (op - m_pos >= 8) {
> + unsigned char *oe = op + t;
> + if (HAVE_OP(t + 15)) {
> + do {
> + COPY8(op, m_pos);
> + op += 8;
> + m_pos += 8;
> + COPY8(op, m_pos);
> + op += 8;
> + m_pos += 8;
> + } while (op < oe);
> + op = oe;
> + if (HAVE_IP(6)) {
> + state = next;
> + COPY4(op, ip);
> + op += next;
> + ip += next;
> + continue;
> + }
> + } else {
> + NEED_OP(t);
> + do {
> + *op++ = *m_pos++;
> + } while (op < oe);
> + }
> + } else {
> + unsigned char *oe = op + t;
> + NEED_OP(t);
> + op[0] = m_pos[0];
> + op[1] = m_pos[1];
> + op += 2;
> + m_pos += 2;
> + do {
> + *op++ = *m_pos++;
> + } while (op < oe);
> + }
> + } else {
> + unsigned char *oe = op + t;
> + NEED_OP(t);
> + op[0] = m_pos[0];
> + op[1] = m_pos[1];
> + op += 2;
> + m_pos += 2;
> + do {
> + *op++ = *m_pos++;
> + } while (op < oe);
> + }
> +match_next:
> + state = next;
> + t = next;
> + if (efficient_unaligned_access) {
> + if (HAVE_IP(6) && HAVE_OP(4)) {
> + COPY4(op, ip);
> + op += t;
> + ip += t;
> + } else {
> + NEED_IP(t + 3);
> + NEED_OP(t);
> + while (t > 0) {
> + *op++ = *ip++;
> + t--;
> + }
> + }
> + } else {
> + NEED_IP(t + 3);
> + NEED_OP(t);
> + while (t > 0) {
> + *op++ = *ip++;
> + t--;
> + }
> + }
> + }
> +
> +eof_found:
> + *out_len = op - out;
> + return (t != 3 ? LZO_E_ERROR :
> + ip == ip_end ? LZO_E_OK :
> + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN);
> +
> +input_overrun:
> + *out_len = op - out;
> + return LZO_E_INPUT_OVERRUN;
> +
> +output_overrun:
> + *out_len = op - out;
> + return LZO_E_OUTPUT_OVERRUN;
> +
> +lookbehind_overrun:
> + *out_len = op - out;
> + return LZO_E_LOOKBEHIND_OVERRUN;
> +}
> \ No newline at end of file
> diff --git a/lzorle_decompress.h b/lzorle_decompress.h
> new file mode 100644
> index 0000000..c7dfd70
> --- /dev/null
> +++ b/lzorle_decompress.h
> @@ -0,0 +1,81 @@
> +/* lzorle_decompress.h
> + *
> + * from kernel lib/lzo/lzodefs.h
> + *
> + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus(a)oberhumer.com>
> + * Copyright (C) 2024 NIO
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef LZODEFS_H
> +#define LZODEFS_H
> +
> +#define COPY4(dst, src) memcpy((dst), (src), sizeof(uint32_t))
> +#define COPY8(dst, src) memcpy((dst), (src), sizeof(uint64_t))
> +
> +#define M1_MAX_OFFSET 0x0400
> +#define M2_MAX_OFFSET 0x0800
> +#define M3_MAX_OFFSET 0x4000
> +#define M4_MAX_OFFSET_V0 0xbfff
> +#define M4_MAX_OFFSET_V1 0xbffe
> +
> +#define M1_MIN_LEN 2
> +#define M1_MAX_LEN 2
> +#define M2_MIN_LEN 3
> +#define M2_MAX_LEN 8
> +#define M3_MIN_LEN 3
> +#define M3_MAX_LEN 33
> +#define M4_MIN_LEN 3
> +#define M4_MAX_LEN 9
> +
> +#define M1_MARKER 0
> +#define M2_MARKER 64
> +#define M3_MARKER 32
> +#define M4_MARKER 16
> +
> +#define MIN_ZERO_RUN_LENGTH 4
> +#define MAX_ZERO_RUN_LENGTH (2047 + MIN_ZERO_RUN_LENGTH)
> +
> +#define lzo_dict_t unsigned short
> +#define D_BITS 13
> +#define D_SIZE (1u << D_BITS)
> +#define D_MASK (D_SIZE - 1)
> +#define D_HIGH ((D_MASK >> 1) + 1)
> +
> +#define LZO_E_OK 0
> +#define LZO_E_ERROR (-1)
> +#define LZO_E_OUT_OF_MEMORY (-2)
> +#define LZO_E_NOT_COMPRESSIBLE (-3)
> +#define LZO_E_INPUT_OVERRUN (-4)
> +#define LZO_E_OUTPUT_OVERRUN (-5)
> +#define LZO_E_LOOKBEHIND_OVERRUN (-6)
> +#define LZO_E_EOF_NOT_FOUND (-7)
> +#define LZO_E_INPUT_NOT_CONSUMED (-8)
> +#define LZO_E_NOT_YET_IMPLEMELZO_HFILESNTED (-9)
> +#define LZO_E_INVALID_ARGUMENT (-10)
> +
> +#define HAVE_IP(x) ((unsigned long)(ip_end - ip) >= (unsigned long)(x))
> +#define HAVE_OP(x) ((unsigned long)(op_end - op) >= (unsigned long)(x))
> +#define NEED_IP(x) \
> + if (!HAVE_IP(x)) \
> + goto input_overrun
> +#define NEED_OP(x) \
> + if (!HAVE_OP(x)) \
> + goto output_overrun
> +#define TEST_LB(m_pos) \
> + if ((m_pos) < out) \
> + goto lookbehind_overrun
> +
> +int lzorle_decompress_safe(const unsigned char *in, unsigned long in_len,
> + unsigned char *out, unsigned long *out_len, void *other/* NOT USED */);
> +
> +#endif
> \ No newline at end of file
> --
> 2.25.1
>
> --
> Crash-utility mailing list -- devel(a)lists.crash-utility.osci.io
> To unsubscribe send an email to devel-leave(a)lists.crash-utility.osci.io
> https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
> Contribution Guidelines: https://github.com/crash-utility/crash/wiki
7 months, 4 weeks