 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v3 0/4] Generalize KASLR calculation and use it for KDUMPs
                                
                                
                                
                                    
                                        by Sergio Lopez
                                    
                                
                                
                                        Commit 45b74b89530d611b3fa95a1041e158fbb865fa84 added support for
calculating phys_base and kernel offset for KASLR-enabled kernels on
SADUMPs by using a technique developed by Takao Indoh. Originally, the
patchset included support for KDUMPs, but this was dropped in v2, as it
was deemed unnecessary due to the implementation of the vmcoreinfo
device in QEMU.
Sadly, there are many reasons for which the vmcoreinfo device may not be
present in the moment of taking the memory dump from a VM, ranging from
a Host running older QEMU/libvirt versions, to misconfigured VMs or
environments running Hypervisors that doesn't support this device.
This patchset generalizes the kaslr related functions from sadump.c
moving them to kaslr_helper.c, and makes KDUMP analysis fallback to
KASLR offset calculation if vmcoreinfo data is missing.
These changes have been successfully tested with a 3.10.0-830.el7.x86_64
under the following conditions:
 - kdump with KASLR and vmcoreinfo
 - kdump with KASLR but no vmcoreinfo
 - kdump without KASLR ("nokaslr" kernel command line option)
It was also tested that a "crash" patched with these changes still
builds and runs (live and kdump debugging) on an aarch64 machine.
changelog:
v3:
 - Merge *get_cr3 and *get_idtr functions and move them to
   kaslr_helper.c
 - diskdump: drop kaslr_phys_base addition and use
   sub_header_kdump->phys_base instead.
 - Unconditionally call x86_64_virt_phys_base after grabbing phys_base
v2:
 - Limit application to QEMU ELF and QEMU COMPRESSED dumps (thanks Dave)
 - Add support for QEMU COMPRESSED dumps (diskdump)
Sergio Lopez (4):
  Move kaslr related functions from sadump.c to kaslr_helper.c
  Move QEMUCPU* structs from netdump.h to defs.h
  netdump: infer kaslr offset for QEMU ELF dumps without vmcoreinfo
  diskdump: infer kaslr offset for QEMU COMPRESSED dumps without
    vmcoreinfo
 Makefile       |   7 +-
 defs.h         |  39 +++++
 diskdump.c     |  61 ++++++++
 kaslr_helper.c | 488 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 netdump.c      |  54 +++++++
 netdump.h      |  24 +--
 sadump.c       | 486 ++++----------------------------------------------------
 symbols.c      |  26 ++-
 x86_64.c       |  18 ++-
 9 files changed, 719 insertions(+), 484 deletions(-)
 create mode 100644 kaslr_helper.c
-- 
2.14.3
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v2] vmware_vmss: read vCPUs regs and show them in 'bt'
                                
                                
                                
                                    
                                        by Sergio Lopez
                                    
                                
                                
                                        VMSS dump files contain the state of each vCPU at the time of suspending
the VM. This change enables 'crash' to read some relevant registers from
each vCPU state to display them in 'bt' and adds additional output for
commands 'help -D', 'help -r' and 'help -p'.
This is also the first step towards implementing kaslr offset
calculation for VMSS dump files.
---
 defs.h        |   5 +
 help.c        |   3 +
 kernel.c      |   2 +
 main.c        |   3 +
 memory.c      |   2 +
 vmware_vmss.c | 375 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 vmware_vmss.h |  31 +++++
 x86_64.c      |  13 +-
 8 files changed, 424 insertions(+), 10 deletions(-)
diff --git a/defs.h b/defs.h
index 7998ebf..44efc8a 100644
--- a/defs.h
+++ b/defs.h
@@ -283,6 +283,7 @@ struct number_option {
 #define LKCD_KERNTYPES()    (pc->flags & KERNTYPES)
 #define KVMDUMP_DUMPFILE()  (pc->flags & KVMDUMP)
 #define SADUMP_DUMPFILE()  (pc->flags & SADUMP)
+#define VMSS_DUMPFILE()     (pc->flags & VMWARE_VMSS)
 
 #define NETDUMP_LOCAL    (0x1)  /* netdump_data flags */
 #define NETDUMP_REMOTE   (0x2)  
@@ -6388,6 +6389,10 @@ int vmware_vmss_init(char *filename, FILE *ofp);
 uint vmware_vmss_page_size(void);
 int read_vmware_vmss(int, void *, int, ulong, physaddr_t);
 int write_vmware_vmss(int, void *, int, ulong, physaddr_t);
+void vmware_vmss_display_regs(int, FILE *);
+void get_vmware_vmss_regs(struct bt_info *, ulong *, ulong *);
+int vmware_vmss_memory_dump(FILE *);
+void dump_registers_for_vmss_dump(void);
 
 /*
  *  gnu_binutils.c
diff --git a/help.c b/help.c
index 5f6d9be..06b7961 100644
--- a/help.c
+++ b/help.c
@@ -710,6 +710,9 @@ dump_registers(void)
 	} else if (NETDUMP_DUMPFILE() || KDUMP_DUMPFILE()) {
 		dump_registers_for_elf_dumpfiles();
 		return;
+	} else if (VMSS_DUMPFILE()) {
+		dump_registers_for_vmss_dump();
+		return;
 	}
 
 	error(FATAL, "-r option not supported on %s\n",
diff --git a/kernel.c b/kernel.c
index 1bf6251..7642217 100644
--- a/kernel.c
+++ b/kernel.c
@@ -2969,6 +2969,8 @@ back_trace(struct bt_info *bt)
 		get_xendump_regs(bt, &eip, &esp);
 	else if (SADUMP_DUMPFILE())
 		get_sadump_regs(bt, &eip, &esp);
+	else if (VMSS_DUMPFILE())
+		get_vmware_vmss_regs(bt, &eip, &esp);
         else if (REMOTE_PAUSED()) {
 		if (!is_task_active(bt->task) || !get_remote_regs(bt, &eip, &esp))
 			machdep->get_stack_frame(bt, &eip, &esp);
diff --git a/main.c b/main.c
index 2aae0c6..15834cb 100644
--- a/main.c
+++ b/main.c
@@ -1361,6 +1361,9 @@ dump_program_context(void)
         if (pc->flags & DISKDUMP)
                 sprintf(&buf[strlen(buf)],
                         "%sDISKDUMP", others++ ? "|" : "");
+        if (pc->flags & VMWARE_VMSS)
+                sprintf(&buf[strlen(buf)],
+                        "%sVMWARE_VMSS", others++ ? "|" : "");
         if (pc->flags & SYSMAP)
                 sprintf(&buf[strlen(buf)],
                         "%sSYSMAP", others++ ? "|" : "");
diff --git a/memory.c b/memory.c
index 0669276..9f752c2 100644
--- a/memory.c
+++ b/memory.c
@@ -16909,6 +16909,8 @@ dumpfile_memory(int cmd)
                         retval = kcore_memory_dump(fp);
 		else if (pc->flags & SADUMP)
 			retval = sadump_memory_dump(fp);
+		else if (pc->flags & VMWARE_VMSS)
+			retval = vmware_vmss_memory_dump(fp);
 		break;
 	
 	case DUMPFILE_ENVIRONMENT:
diff --git a/vmware_vmss.c b/vmware_vmss.c
index 667676a..a97a545 100644
--- a/vmware_vmss.c
+++ b/vmware_vmss.c
@@ -25,6 +25,8 @@
 #define VMW_PAGE_SIZE (4096)
 #define VMW_PAGE_SHIFT (12)
 
+#define MAX_BLOCK_DUMP (128)
+
 static vmssdata vmss = { 0 };
 
 int
@@ -128,7 +130,8 @@ vmware_vmss_init(char *filename, FILE *ofp)
 		DEBUG_PARSE_PRINT((ofp, LOGPRX"Group: %-20s offset=%#llx size=0x%#llx.\n",
 				  grps[i].name, (ulonglong)grps[i].position, (ulonglong)grps[i].size));
 
-		if (strcmp(grps[i].name, "memory") != 0) {
+		if (strcmp(grps[i].name, "memory") != 0 &&
+		    (strcmp(grps[i].name, "cpu") != 0 || !machine_type("X86_64"))) {
 			continue;
 		}
 
@@ -198,12 +201,6 @@ vmware_vmss_init(char *filename, FILE *ofp)
 				}
 				blockpos += padsize;
 
-				if (fseek(fp, blockpos + nbytes, SEEK_SET) == -1) {
-					error(INFO, LOGPRX"Cannot seek past block at %#llx.\n",
-					      (ulonglong)(blockpos + nbytes));
-					break;
-				}
-
 				if (strcmp(name, "Memory") == 0) {
 					/* The things that we really care about...*/
 					vmss.memoffset = blockpos;
@@ -217,11 +214,44 @@ vmware_vmss_init(char *filename, FILE *ofp)
 						result = FALSE;
 						goto exit;
 					}
+
+					if (fseek(fp, blockpos + nbytes, SEEK_SET) == -1) {
+						error(INFO, LOGPRX"Cannot seek past block at %#llx.\n",
+						      (ulonglong)(blockpos + nbytes));
+						break;
+					}
+				} else if (strcmp(name, "gpregs") == 0 &&
+					   nbytes == VMW_GPREGS_SIZE &&
+					   idx[0] < vmss.num_vcpus) {
+					int cpu = idx[0];
+
+					fread(vmss.regs64[cpu], nbytes, 1, fp);
+				} else if (strcmp(name, "CR64") == 0 &&
+					   nbytes == VMW_CR64_SIZE &&
+					   idx[0] < vmss.num_vcpus) {
+					int cpu = idx[0];
+
+					fread(&vmss.regs64[cpu]->cr[0], nbytes, 1, fp);
+				} else if (strcmp(name, "IDTR") == 0 &&
+					   nbytes == VMW_IDTR_SIZE &&
+					   idx[0] < vmss.num_vcpus) {
+					int cpu = idx[0];
+					char buf[10];
+
+					fread(&buf[0], nbytes, 1, fp);
+					vmss.regs64[cpu]->idtr = *((uint64_t *)(&buf[0] + 2));
+				} else {
+					if (fseek(fp, blockpos + nbytes, SEEK_SET) == -1) {
+						error(INFO, LOGPRX"Cannot seek past block at %#llx.\n",
+						      (ulonglong)(blockpos + nbytes));
+						break;
+					}
 				}
 			} else {
 				union {
 					uint8_t val[TAG_VALSIZE_MASK];
 					uint32_t val32;
+					uint64_t val64;
 				} u;
 				unsigned k;
 				unsigned valsize = TAG_VALSIZE(tag);
@@ -253,6 +283,30 @@ vmware_vmss_init(char *filename, FILE *ofp)
 					if (strcmp(name, "align_mask") == 0) {
 						vmss.alignmask = u.val32;
 					}
+				} else if (strcmp(grps[i].name, "cpu") == 0) {
+					if (strcmp(name, "cpu:numVCPUs") == 0) {
+						if (vmss.regs64 != NULL) {
+							error(INFO, LOGPRX"Duplicated cpu:numVCPUs entry.\n");
+							break;
+						}
+
+						vmss.num_vcpus = u.val32;
+						vmss.regs64 = malloc(vmss.num_vcpus * sizeof(void *));
+
+						for (k = 0; k < vmss.num_vcpus; k++) {
+							vmss.regs64[k] = malloc(sizeof(vmssregs64));
+							memset(vmss.regs64[k], 0, sizeof(vmssregs64));
+						}
+					} else if (strcmp(name, "rip") == 0) {
+						int cpu = idx[0];
+						vmss.regs64[cpu]->rip = u.val64;
+					} else if (strcmp(name, "eflags") == 0) {
+						int cpu = idx[0];
+						vmss.regs64[cpu]->rflags |= u.val32;
+					} else if (strcmp(name, "EFLAGS") == 0) {
+						int cpu = idx[0];
+						vmss.regs64[cpu]->rflags |= u.val32;
+					}
 				}
 
 				DEBUG_PARSE_PRINT((ofp, "\n"));
@@ -350,3 +404,310 @@ write_vmware_vmss(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr)
 	return SEEK_ERROR;
 }
 
+void
+vmware_vmss_display_regs(int cpu, FILE *ofp)
+{
+	if (cpu >= vmss.num_vcpus)
+		return;
+
+	if (machine_type("X86_64")) {
+		fprintf(ofp,
+		    "	 RIP: %016llx  RSP: %016llx  RFLAGS: %08llx\n"
+		    "	 RAX: %016llx  RBX: %016llx  RCX: %016llx\n"
+		    "	 RDX: %016llx  RSI: %016llx  RDI: %016llx\n"
+		    "	 RBP: %016llx	R8: %016llx   R9: %016llx\n"
+		    "	 R10: %016llx  R11: %016llx  R12: %016llx\n"
+		    "	 R13: %016llx  R14: %016llx  R15: %016llx\n",
+		    vmss.regs64[cpu]->rip,
+		    vmss.regs64[cpu]->rsp,
+		    vmss.regs64[cpu]->rflags,
+		    vmss.regs64[cpu]->rax,
+		    vmss.regs64[cpu]->rbx,
+		    vmss.regs64[cpu]->rcx,
+		    vmss.regs64[cpu]->rdx,
+		    vmss.regs64[cpu]->rsi,
+		    vmss.regs64[cpu]->rdi,
+		    vmss.regs64[cpu]->rbp,
+		    vmss.regs64[cpu]->r8,
+		    vmss.regs64[cpu]->r9,
+		    vmss.regs64[cpu]->r10,
+		    vmss.regs64[cpu]->r11,
+		    vmss.regs64[cpu]->r12,
+		    vmss.regs64[cpu]->r13,
+		    vmss.regs64[cpu]->r14,
+		    vmss.regs64[cpu]->r15
+		);
+	}
+}
+
+void
+get_vmware_vmss_regs(struct bt_info *bt, ulong *ipp, ulong *spp)
+{
+	ulong ip, sp;
+	struct register_set *rp;
+
+	ip = sp = 0;
+
+	if (!is_task_active(bt->task)) {
+		machdep->get_stack_frame(bt, ipp, spp);
+		return;
+	}
+
+	bt->flags |= BT_DUMPFILE_SEARCH;
+	if (machine_type("X86_64"))
+		machdep->get_stack_frame(bt, ipp, spp);
+	else if (machine_type("X86"))
+		get_netdump_regs_x86(bt, ipp, spp);
+	if (bt->flags & BT_DUMPFILE_SEARCH)
+		return;
+
+	if ((vmss.regs64 == NULL) ||
+	    (bt->tc->processor >= vmss.num_vcpus))
+		return;
+
+	ip = (ulong)vmss.regs64[bt->tc->processor]->rip;
+	sp = (ulong)vmss.regs64[bt->tc->processor]->rsp;
+	if (is_kernel_text(ip) &&
+	    (((sp >= GET_STACKBASE(bt->task)) &&
+	      (sp < GET_STACKTOP(bt->task))) ||
+	     in_alternate_stack(bt->tc->processor, sp))) {
+		*ipp = ip;
+		*spp = sp;
+		bt->flags |= BT_KERNEL_SPACE;
+		return;
+	}
+
+	if (!is_kernel_text(ip) &&
+	    in_user_stack(bt->tc->task, sp))
+		bt->flags |= BT_USER_SPACE;
+}
+
+int
+vmware_vmss_memory_dump(FILE *fp)
+{
+	cptdumpheader hdr;
+	cptgroupdesc *grps = NULL;
+	unsigned grpsize;
+	unsigned i;
+	int result = TRUE;
+
+	if (fseek(vmss.dfp, 0, SEEK_SET) != 0) {
+		fprintf(fp, "Error seeking to position 0.\n");
+		return FALSE;
+	}
+
+	if (fread(&hdr, sizeof(cptdumpheader), 1, vmss.dfp) != 1) {
+		fprintf(fp, "Failed to read vmss file: %s [Error %d] %s\n",
+			errno, strerror(errno));
+		return FALSE;
+	}
+
+	fprintf(fp, "vmware_vmss:\n");
+	fprintf(fp, "    Header: id=%x version=%d numgroups=%d\n",
+		hdr.id, hdr.version, hdr.numgroups);
+
+	vmss.cpt64bit = (hdr.id != CPTDUMP_OLD_MAGIC_NUMBER);
+	fprintf(fp, "    Checkpoint is %d-bit\n", vmss.cpt64bit ? 64 : 32);
+
+	grpsize = hdr.numgroups * sizeof (cptgroupdesc);
+	grps = (cptgroupdesc *) malloc(grpsize * sizeof(cptgroupdesc));
+	if (grps == NULL) {
+		fprintf(fp, "Failed to allocate memory! [Error %d] %s\n",
+			errno, strerror(errno));
+		return FALSE;
+	}
+
+	if (fread(grps, sizeof(cptgroupdesc), grpsize, vmss.dfp) != grpsize) {
+		fprintf(fp, "Failed to read vmss file: [Error %d] %s\n",
+			errno, strerror(errno));
+		result = FALSE;
+		goto exit;
+	}
+
+	for (i = 0; i < hdr.numgroups; i++) {
+		if (fseek(vmss.dfp, grps[i].position, SEEK_SET) == -1) {
+			fprintf(fp, "Bad offset of VMSS Group['%s'] in vmss file at %#llx.\n",
+				grps[i].name, (ulonglong)grps[i].position);
+			continue;
+		}
+		fprintf(fp, "\nGroup: %s offset=%#llx size=0x%#llx\n",
+			grps[i].name, (ulonglong)grps[i].position, (ulonglong)grps[i].size);
+
+		for (;;) {
+			uint16_t tag;
+			char name[TAG_NAMELEN_MASK + 1];
+			unsigned nameLen;
+			unsigned nindx;
+			int idx[3];
+			unsigned j;
+			int nextgroup = FALSE;
+
+			if (fread(&tag, sizeof(tag), 1, vmss.dfp) != 1) {
+				fprintf(fp, "Cannot read tag.\n");
+				break;
+			}
+			if (tag == NULL_TAG)
+				break;
+
+			nameLen = TAG_NAMELEN(tag);
+			if (fread(name, nameLen, 1, vmss.dfp) != 1) {
+				fprintf(fp, "Cannot read tag name.\n");
+				break;
+			}
+			name[nameLen] = 0;
+			fprintf(fp, "    Item %20s", name);
+
+			nindx = TAG_NINDX(tag);
+			if (nindx > 3) {
+				fprintf(fp, "Too many indexes %d (> 3).\n", nindx);
+				break;
+			}
+			idx[0] = idx[1] = idx[2] = NO_INDEX;
+			for (j= 0; j < 3; j++) {
+				if (j < nindx) {
+					if (fread(&idx[j], sizeof(idx[0]), 1, vmss.dfp) != 1) {
+						fprintf(fp, "Cannot read index.\n");
+						nextgroup = TRUE;
+						break;
+					}
+					fprintf(fp, "[%d]", idx[j]);
+				} else
+					fprintf(fp, "   ");
+			}
+		       if (nextgroup)
+				break;
+
+			if (IS_BLOCK_TAG(tag)) {
+				uint64_t nbytes;
+				uint64_t blockpos;
+				uint64_t nbytesinmem;
+				int compressed = IS_BLOCK_COMPRESSED_TAG(tag);
+				uint16_t padsize;
+				unsigned k, l;
+				char byte;
+
+				if (fread(&nbytes, sizeof(nbytes), 1, vmss.dfp) != 1) {
+					fprintf(fp, "Cannot read block size.\n");
+					break;
+				}
+				if (fread(&nbytesinmem, sizeof(nbytesinmem), 1, vmss.dfp) != 1) {
+					fprintf(fp, "Cannot read block memory size.\n");
+					break;
+				}
+				if (fread(&padsize, sizeof(padsize), 1, vmss.dfp) != 1) {
+					fprintf(fp, "Cannot read block padding size.\n");
+					break;
+				}
+				if ((blockpos = ftell(vmss.dfp)) == -1) {
+					fprintf(fp, "Cannot determine location within VMSS file.\n");
+					break;
+				}
+				blockpos += padsize;
+
+				fprintf(fp, " => %sBLOCK: position=%#llx size=%#llx memsize=%#llx\n",
+					compressed ? "COMPRESSED " : "",
+					(ulonglong)blockpos, (ulonglong)nbytes, (ulonglong)nbytesinmem);
+
+				if (nbytes && nbytes <= MAX_BLOCK_DUMP && !compressed) {
+					fprintf(fp, "Hex dump: \n");
+					l = 0;
+					for (k = 0; k < nbytes; k++) {
+						if (fread(&byte, 1, 1, vmss.dfp) != 1) {
+							fprintf(fp, "Cannot read byte.\n");
+							result = FALSE;
+							goto exit;
+						}
+
+						fprintf(fp, " %02hhX", byte);
+
+						if (l++ == 15) {
+							fprintf(fp, "\n");
+							l = 0;
+						}
+					}
+					if (l)
+						fprintf(fp, "\n\n");
+					else
+						fprintf(fp, "\n");
+				} else {
+					if (fseek(vmss.dfp, blockpos + nbytes, SEEK_SET) == -1) {
+						fprintf(fp, "Cannot seek past block at %#llx.\n",
+							(ulonglong)(blockpos + nbytes));
+						result = FALSE;
+						goto exit;
+					}
+				}
+			} else {
+				union {
+					uint8_t val[TAG_VALSIZE_MASK];
+					uint32_t val32;
+					uint64_t val64;
+				} u;
+				unsigned k;
+				unsigned valsize = TAG_VALSIZE(tag);
+				uint64_t blockpos = ftell(vmss.dfp);
+
+				fprintf(fp, " => position=%#llx size=%#x: ",
+					(ulonglong)blockpos, valsize);
+
+				if (fread(u.val, sizeof(u.val[0]), valsize, vmss.dfp) != valsize) {
+					fprintf(fp, "Cannot read item.\n");
+					break;
+				}
+				for (k = 0; k < valsize; k++) {
+					/* Assume Little Endian */
+					fprintf(fp, "%02X", u.val[valsize - k - 1]);
+				}
+
+
+				fprintf(fp, "\n");
+			}
+		}
+	}
+
+exit:
+	if (grps)
+		free(grps);
+
+	return result;
+}
+
+void
+dump_registers_for_vmss_dump(void)
+{
+	int i;
+	vmssregs64 *regs;
+
+	if (!machine_type("X86_64")) {
+		fprintf(fp, "-r option not supported on this dumpfile type\n");
+		return;
+	}
+
+	for (i = 0; i < vmss.num_vcpus; i++) {
+		regs = vmss.regs64[i];
+
+		if (i)
+			fprintf(fp, "\n");
+
+		fprintf(fp, "CPU %d:\n", i);
+
+		fprintf(fp, "  RAX: %016llx  RBX: %016llx  RCX: %016llx\n",
+			regs->rax, regs->rbx, regs->rcx);
+		fprintf(fp, "  RDX: %016llx  RSI: %016llx  RDI: %016llx\n",
+			regs->rdx, regs->rsi, regs->rdi);
+		fprintf(fp, "  RSP: %016llx  RBP: %016llx  R8:  %016llx\n",
+			regs->rsp, regs->rbp, regs->r8);
+		fprintf(fp, "   R9: %016llx  R10: %016llx  R11: %016llx\n",
+			regs->r9, regs->r10, regs->r11);
+		fprintf(fp, "  R12: %016llx  R13: %016llx  R14: %016llx\n",
+			regs->r12, regs->r13, regs->r14);
+		fprintf(fp, "  R15: %016llx  RIP: %016llx  RFLAGS: %08llx\n",
+			regs->r15, regs->rip, regs->rflags);
+		fprintf(fp, "  IDT: base: %016llx\n",
+                        regs->idtr);
+                fprintf(fp, "  CR0: %016llx  CR1: %016llx  CR2: %016llx\n",
+			regs->cr[0], regs->cr[1], regs->cr[2]);
+		fprintf(fp, "  CR3: %016llx  CR4: %016llx\n",
+			regs->cr[3], regs->cr[4]);
+	}
+}
diff --git a/vmware_vmss.h b/vmware_vmss.h
index a4b8937..41d14c3 100644
--- a/vmware_vmss.h
+++ b/vmware_vmss.h
@@ -89,6 +89,35 @@ struct memregion {
 };
 typedef struct memregion	memregion;
 
+#define VMW_GPREGS_SIZE (128)
+#define VMW_CR64_SIZE (72)
+#define VMW_IDTR_SIZE (10)
+struct vmssregs64 {
+	/* read from vmss */
+	uint64_t	rax;
+	uint64_t	rcx;
+	uint64_t	rdx;
+	uint64_t	rbx;
+	uint64_t	rbp;
+	uint64_t	rsp;
+	uint64_t	rsi;
+	uint64_t	rdi;
+	uint64_t	r8;
+	uint64_t	r9;
+	uint64_t	r10;
+	uint64_t	r11;
+	uint64_t	r12;
+	uint64_t	r13;
+	uint64_t	r14;
+	uint64_t	r15;
+	/* manually managed */
+	uint64_t	idtr;
+	uint64_t	cr[VMW_CR64_SIZE / 8];
+	uint64_t	rip;
+	uint64_t	rflags;
+};
+typedef struct vmssregs64 vmssregs64;
+
 #define MAX_REGIONS	3
 struct vmssdata {
 	int32_t	cpt64bit;
@@ -99,6 +128,8 @@ struct vmssdata {
         memregion	regions[MAX_REGIONS];
 	uint64_t	memoffset;
 	uint64_t	memsize;
+	uint64_t	num_vcpus;
+	vmssregs64	**regs64;
 };
 typedef struct vmssdata vmssdata;
 
diff --git a/x86_64.c b/x86_64.c
index 0d5e150..7b02761 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -3273,6 +3273,8 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
 			diskdump_display_regs(bt->tc->processor, ofp);
 		else if (SADUMP_DUMPFILE())
 			sadump_display_regs(bt->tc->processor, ofp);
+		else if (VMSS_DUMPFILE())
+			vmware_vmss_display_regs(bt->tc->processor, ofp);
 		return;
 	}
 
@@ -3295,13 +3297,16 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
 			diskdump_display_regs(bt->tc->processor, ofp);
 		else if (SADUMP_DUMPFILE())
 			sadump_display_regs(bt->tc->processor, ofp);
+		else if (VMSS_DUMPFILE())
+			vmware_vmss_display_regs(bt->tc->processor, ofp);
 		else if (pc->flags2 & QEMU_MEM_DUMP_ELF)
 			display_regs_from_elf_notes(bt->tc->processor, ofp);
 		return;
 	} else if ((bt->flags & BT_KERNEL_SPACE) &&
 		   (KVMDUMP_DUMPFILE() ||
 		    (ELF_NOTES_VALID() && DISKDUMP_DUMPFILE()) ||
-		    SADUMP_DUMPFILE() || (pc->flags2 & QEMU_MEM_DUMP_ELF))) {
+		    SADUMP_DUMPFILE() || (pc->flags2 & QEMU_MEM_DUMP_ELF) ||
+		    VMSS_DUMPFILE())) {
 		fprintf(ofp, "    [exception RIP: ");
 		if ((sp = value_search(bt->instptr, &offset))) {
 			fprintf(ofp, "%s", sp->name);
@@ -3317,6 +3322,8 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
 			diskdump_display_regs(bt->tc->processor, ofp);
 		else if (SADUMP_DUMPFILE())
 			sadump_display_regs(bt->tc->processor, ofp);
+		else if (VMSS_DUMPFILE())
+			vmware_vmss_display_regs(bt->tc->processor, ofp);
 		else if (pc->flags2 & QEMU_MEM_DUMP_ELF)
 			display_regs_from_elf_notes(bt->tc->processor, ofp);
 
@@ -4941,7 +4948,7 @@ skip_stage:
 	if (halt_rip && halt_rsp) {
         	*rip = halt_rip;
 		*rsp = halt_rsp;
-		if (KVMDUMP_DUMPFILE() || SADUMP_DUMPFILE())
+		if (KVMDUMP_DUMPFILE() || SADUMP_DUMPFILE() || VMSS_DUMPFILE())
 			bt_in->flags &= ~(ulonglong)BT_DUMPFILE_SEARCH;
 		return;
 	}
@@ -4986,7 +4993,7 @@ skip_stage:
 
         machdep->get_stack_frame(bt, rip, rsp);
 
-	if (KVMDUMP_DUMPFILE() || SADUMP_DUMPFILE())
+	if (KVMDUMP_DUMPFILE() || SADUMP_DUMPFILE() || VMSS_DUMPFILE())
 		bt_in->flags &= ~(ulonglong)BT_DUMPFILE_SEARCH;
 }
 
-- 
2.14.3
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] vmware_vmss: read vCPUs regs and show them in 'bt'
                                
                                
                                
                                    
                                        by Sergio Lopez
                                    
                                
                                
                                        VMSS dump files contain the state of each vCPU at the time of suspending
the VM. This change enables 'crash' to read some relevant registers from
each vCPU state and display them in 'bt'.
This is also the first step towards implementing kaslr offset
calculation for VMSS dump files.
---
 defs.h        |   3 ++
 kernel.c      |   2 +
 vmware_vmss.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 vmware_vmss.h |  28 +++++++++++
 x86_64.c      |  13 ++++--
 5 files changed, 184 insertions(+), 10 deletions(-)
diff --git a/defs.h b/defs.h
index 7998ebf..0ebd38b 100644
--- a/defs.h
+++ b/defs.h
@@ -283,6 +283,7 @@ struct number_option {
 #define LKCD_KERNTYPES()    (pc->flags & KERNTYPES)
 #define KVMDUMP_DUMPFILE()  (pc->flags & KVMDUMP)
 #define SADUMP_DUMPFILE()  (pc->flags & SADUMP)
+#define VMSS_DUMPFILE()     (pc->flags & VMWARE_VMSS)
 
 #define NETDUMP_LOCAL    (0x1)  /* netdump_data flags */
 #define NETDUMP_REMOTE   (0x2)  
@@ -6388,6 +6389,8 @@ int vmware_vmss_init(char *filename, FILE *ofp);
 uint vmware_vmss_page_size(void);
 int read_vmware_vmss(int, void *, int, ulong, physaddr_t);
 int write_vmware_vmss(int, void *, int, ulong, physaddr_t);
+void vmware_vmss_display_regs(int, FILE *);
+void get_vmware_vmss_regs(struct bt_info *, ulong *, ulong *);
 
 /*
  *  gnu_binutils.c
diff --git a/kernel.c b/kernel.c
index 1bf6251..7642217 100644
--- a/kernel.c
+++ b/kernel.c
@@ -2969,6 +2969,8 @@ back_trace(struct bt_info *bt)
 		get_xendump_regs(bt, &eip, &esp);
 	else if (SADUMP_DUMPFILE())
 		get_sadump_regs(bt, &eip, &esp);
+	else if (VMSS_DUMPFILE())
+		get_vmware_vmss_regs(bt, &eip, &esp);
         else if (REMOTE_PAUSED()) {
 		if (!is_task_active(bt->task) || !get_remote_regs(bt, &eip, &esp))
 			machdep->get_stack_frame(bt, &eip, &esp);
diff --git a/vmware_vmss.c b/vmware_vmss.c
index 667676a..10fbe9e 100644
--- a/vmware_vmss.c
+++ b/vmware_vmss.c
@@ -24,6 +24,9 @@
 /* VMware only supports X86/X86_64 virtual machines. */
 #define VMW_PAGE_SIZE (4096)
 #define VMW_PAGE_SHIFT (12)
+#define VMW_GPREGS_SIZE (128)
+#define VMW_CR64_SIZE (72)
+#define VMW_IDTR_SIZE (10)
 
 static vmssdata vmss = { 0 };
 
@@ -128,7 +131,8 @@ vmware_vmss_init(char *filename, FILE *ofp)
 		DEBUG_PARSE_PRINT((ofp, LOGPRX"Group: %-20s offset=%#llx size=0x%#llx.\n",
 				  grps[i].name, (ulonglong)grps[i].position, (ulonglong)grps[i].size));
 
-		if (strcmp(grps[i].name, "memory") != 0) {
+		if (strcmp(grps[i].name, "memory") != 0 &&
+		    (strcmp(grps[i].name, "cpu") != 0 || !machine_type("X86_64"))) {
 			continue;
 		}
 
@@ -198,12 +202,6 @@ vmware_vmss_init(char *filename, FILE *ofp)
 				}
 				blockpos += padsize;
 
-				if (fseek(fp, blockpos + nbytes, SEEK_SET) == -1) {
-					error(INFO, LOGPRX"Cannot seek past block at %#llx.\n",
-					      (ulonglong)(blockpos + nbytes));
-					break;
-				}
-
 				if (strcmp(name, "Memory") == 0) {
 					/* The things that we really care about...*/
 					vmss.memoffset = blockpos;
@@ -217,11 +215,46 @@ vmware_vmss_init(char *filename, FILE *ofp)
 						result = FALSE;
 						goto exit;
 					}
+
+					if (fseek(fp, blockpos + nbytes, SEEK_SET) == -1) {
+						error(INFO, LOGPRX"Cannot seek past block at %#llx.\n",
+						      (ulonglong)(blockpos + nbytes));
+						break;
+					}
+				} else if (strcmp(name, "gpregs") == 0 &&
+					   nbytes == VMW_GPREGS_SIZE &&
+					   idx[0] < vmss.num_vcpus) {
+					int cpu = idx[0];
+
+					fread(vmss.regs64[cpu], nbytes, 1, fp);
+				} else if (strcmp(name, "CR64") == 0 &&
+					   nbytes == VMW_CR64_SIZE &&
+					   idx[0] < vmss.num_vcpus) {
+					int cpu = idx[0];
+					uint64_t regs[9];
+
+					fread(®s[0], nbytes, 1, fp);
+					vmss.regs64[cpu]->cr3 = regs[3];
+				} else if (strcmp(name, "IDTR") == 0 &&
+					   nbytes == VMW_IDTR_SIZE &&
+					   idx[0] < vmss.num_vcpus) {
+					int cpu = idx[0];
+					char buf[10];
+
+					fread(&buf[0], nbytes, 1, fp);
+					vmss.regs64[cpu]->cr3 = *((uint64_t *)(&buf[0] + 2));
+				} else {
+					if (fseek(fp, blockpos + nbytes, SEEK_SET) == -1) {
+						error(INFO, LOGPRX"Cannot seek past block at %#llx.\n",
+						      (ulonglong)(blockpos + nbytes));
+						break;
+					}
 				}
 			} else {
 				union {
 					uint8_t val[TAG_VALSIZE_MASK];
 					uint32_t val32;
+					uint64_t val64;
 				} u;
 				unsigned k;
 				unsigned valsize = TAG_VALSIZE(tag);
@@ -253,6 +286,30 @@ vmware_vmss_init(char *filename, FILE *ofp)
 					if (strcmp(name, "align_mask") == 0) {
 						vmss.alignmask = u.val32;
 					}
+				} else if (strcmp(grps[i].name, "cpu") == 0) {
+					if (strcmp(name, "cpu:numVCPUs") == 0) {
+						if (vmss.regs64 != NULL) {
+							error(INFO, LOGPRX"Duplicated cpu:numVCPUs entry.\n");
+							break;
+						}
+
+						vmss.num_vcpus = u.val32;
+						vmss.regs64 = malloc(vmss.num_vcpus * sizeof(void *));
+
+						for (k = 0; k < vmss.num_vcpus; k++) {
+							vmss.regs64[k] = malloc(sizeof(vmssregs64));
+							memset(vmss.regs64[k], 0, sizeof(vmssregs64));
+						}
+					} else if (strcmp(name, "rip") == 0) {
+						int cpu = idx[0];
+						vmss.regs64[cpu]->rip = u.val64;
+					} else if (strcmp(name, "eflags") == 0) {
+						int cpu = idx[0];
+						vmss.regs64[cpu]->eflags |= u.val32;
+					} else if (strcmp(name, "EFLAGS") == 0) {
+						int cpu = idx[0];
+						vmss.regs64[cpu]->eflags |= u.val32;
+					}
 				}
 
 				DEBUG_PARSE_PRINT((ofp, "\n"));
@@ -350,3 +407,80 @@ write_vmware_vmss(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr)
 	return SEEK_ERROR;
 }
 
+void
+vmware_vmss_display_regs(int cpu, FILE *ofp)
+{
+	if (cpu >= vmss.num_vcpus)
+		return;
+
+	if (machine_type("X86_64")) {
+		fprintf(ofp,
+		    "	 RIP: %016llx  RSP: %016llx  RFLAGS: %08llx\n"
+		    "	 RAX: %016llx  RBX: %016llx  RCX: %016llx\n"
+		    "	 RDX: %016llx  RSI: %016llx  RDI: %016llx\n"
+		    "	 RBP: %016llx	R8: %016llx   R9: %016llx\n"
+		    "	 R10: %016llx  R11: %016llx  R12: %016llx\n"
+		    "	 R13: %016llx  R14: %016llx  R15: %016llx\n",
+		    vmss.regs64[cpu]->rip,
+		    vmss.regs64[cpu]->rsp,
+		    vmss.regs64[cpu]->eflags,
+		    vmss.regs64[cpu]->rax,
+		    vmss.regs64[cpu]->rbx,
+		    vmss.regs64[cpu]->rcx,
+		    vmss.regs64[cpu]->rdx,
+		    vmss.regs64[cpu]->rsi,
+		    vmss.regs64[cpu]->rdi,
+		    vmss.regs64[cpu]->rbp,
+		    vmss.regs64[cpu]->r8,
+		    vmss.regs64[cpu]->r9,
+		    vmss.regs64[cpu]->r10,
+		    vmss.regs64[cpu]->r11,
+		    vmss.regs64[cpu]->r12,
+		    vmss.regs64[cpu]->r13,
+		    vmss.regs64[cpu]->r14,
+		    vmss.regs64[cpu]->r15
+		);
+	}
+}
+
+void
+get_vmware_vmss_regs(struct bt_info *bt, ulong *ipp, ulong *spp)
+{
+	ulong ip, sp;
+	struct register_set *rp;
+
+	ip = sp = 0;
+
+	if (!is_task_active(bt->task)) {
+		machdep->get_stack_frame(bt, ipp, spp);
+		return;
+	}
+
+	bt->flags |= BT_DUMPFILE_SEARCH;
+	if (machine_type("X86_64"))
+		machdep->get_stack_frame(bt, ipp, spp);
+	else if (machine_type("X86"))
+		get_netdump_regs_x86(bt, ipp, spp);
+	if (bt->flags & BT_DUMPFILE_SEARCH)
+		return;
+
+	if ((vmss.regs64 == NULL) ||
+	    (bt->tc->processor >= vmss.num_vcpus))
+		return;
+
+	ip = (ulong)vmss.regs64[bt->tc->processor]->rip;
+	sp = (ulong)vmss.regs64[bt->tc->processor]->rsp;
+	if (is_kernel_text(ip) &&
+	    (((sp >= GET_STACKBASE(bt->task)) &&
+	      (sp < GET_STACKTOP(bt->task))) ||
+	     in_alternate_stack(bt->tc->processor, sp))) {
+		*ipp = ip;
+		*spp = sp;
+		bt->flags |= BT_KERNEL_SPACE;
+		return;
+	}
+
+	if (!is_kernel_text(ip) &&
+	    in_user_stack(bt->tc->task, sp))
+		bt->flags |= BT_USER_SPACE;
+}
diff --git a/vmware_vmss.h b/vmware_vmss.h
index a4b8937..3c69a82 100644
--- a/vmware_vmss.h
+++ b/vmware_vmss.h
@@ -90,6 +90,32 @@ struct memregion {
 typedef struct memregion	memregion;
 
 #define MAX_REGIONS	3
+struct vmssregs64 {
+	/* read from vmss */
+	uint64_t	rax;
+	uint64_t	rcx;
+	uint64_t	rdx;
+	uint64_t	rbx;
+	uint64_t	rbp;
+	uint64_t	rsp;
+	uint64_t	rsi;
+	uint64_t	rdi;
+	uint64_t	r8;
+	uint64_t	r9;
+	uint64_t	r10;
+	uint64_t	r11;
+	uint64_t	r12;
+	uint64_t	r13;
+	uint64_t	r14;
+	uint64_t	r15;
+	/* manually managed */
+	uint64_t	idtr;
+	uint64_t	cr3;
+	uint64_t	rip;
+	uint64_t	eflags;
+};
+typedef struct vmssregs64 vmssregs64;
+
 struct vmssdata {
 	int32_t	cpt64bit;
 	FILE	*dfp;
@@ -99,6 +125,8 @@ struct vmssdata {
         memregion	regions[MAX_REGIONS];
 	uint64_t	memoffset;
 	uint64_t	memsize;
+	uint64_t	num_vcpus;
+	vmssregs64	**regs64;
 };
 typedef struct vmssdata vmssdata;
 
diff --git a/x86_64.c b/x86_64.c
index 0d5e150..7b02761 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -3273,6 +3273,8 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
 			diskdump_display_regs(bt->tc->processor, ofp);
 		else if (SADUMP_DUMPFILE())
 			sadump_display_regs(bt->tc->processor, ofp);
+		else if (VMSS_DUMPFILE())
+			vmware_vmss_display_regs(bt->tc->processor, ofp);
 		return;
 	}
 
@@ -3295,13 +3297,16 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
 			diskdump_display_regs(bt->tc->processor, ofp);
 		else if (SADUMP_DUMPFILE())
 			sadump_display_regs(bt->tc->processor, ofp);
+		else if (VMSS_DUMPFILE())
+			vmware_vmss_display_regs(bt->tc->processor, ofp);
 		else if (pc->flags2 & QEMU_MEM_DUMP_ELF)
 			display_regs_from_elf_notes(bt->tc->processor, ofp);
 		return;
 	} else if ((bt->flags & BT_KERNEL_SPACE) &&
 		   (KVMDUMP_DUMPFILE() ||
 		    (ELF_NOTES_VALID() && DISKDUMP_DUMPFILE()) ||
-		    SADUMP_DUMPFILE() || (pc->flags2 & QEMU_MEM_DUMP_ELF))) {
+		    SADUMP_DUMPFILE() || (pc->flags2 & QEMU_MEM_DUMP_ELF) ||
+		    VMSS_DUMPFILE())) {
 		fprintf(ofp, "    [exception RIP: ");
 		if ((sp = value_search(bt->instptr, &offset))) {
 			fprintf(ofp, "%s", sp->name);
@@ -3317,6 +3322,8 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
 			diskdump_display_regs(bt->tc->processor, ofp);
 		else if (SADUMP_DUMPFILE())
 			sadump_display_regs(bt->tc->processor, ofp);
+		else if (VMSS_DUMPFILE())
+			vmware_vmss_display_regs(bt->tc->processor, ofp);
 		else if (pc->flags2 & QEMU_MEM_DUMP_ELF)
 			display_regs_from_elf_notes(bt->tc->processor, ofp);
 
@@ -4941,7 +4948,7 @@ skip_stage:
 	if (halt_rip && halt_rsp) {
         	*rip = halt_rip;
 		*rsp = halt_rsp;
-		if (KVMDUMP_DUMPFILE() || SADUMP_DUMPFILE())
+		if (KVMDUMP_DUMPFILE() || SADUMP_DUMPFILE() || VMSS_DUMPFILE())
 			bt_in->flags &= ~(ulonglong)BT_DUMPFILE_SEARCH;
 		return;
 	}
@@ -4986,7 +4993,7 @@ skip_stage:
 
         machdep->get_stack_frame(bt, rip, rsp);
 
-	if (KVMDUMP_DUMPFILE() || SADUMP_DUMPFILE())
+	if (KVMDUMP_DUMPFILE() || SADUMP_DUMPFILE() || VMSS_DUMPFILE())
 		bt_in->flags &= ~(ulonglong)BT_DUMPFILE_SEARCH;
 }
 
-- 
2.14.3
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v2 0/4] Generalize KASLR calculation and use it for KDUMPs
                                
                                
                                
                                    
                                        by Sergio Lopez
                                    
                                
                                
                                        Commit 45b74b89530d611b3fa95a1041e158fbb865fa84 added support for
calculating phys_base and kernel offset for KASLR-enabled kernels on
SADUMPs by using a technique developed by Takao Indoh. Originally, the
patchset included support for KDUMPs, but this was dropped in v2, as it
was deemed unnecessary due to the implementation of the vmcoreinfo
device in QEMU.
Sadly, there are many reasons for which the vmcoreinfo device may not be
present in the moment of taking the memory dump from a VM, ranging from
a Host running older QEMU/libvirt versions, to misconfigured VMs or
environments running Hypervisors that doesn't support this device.
This patchset generalizes the kaslr related functions from sadump.c
moving them to kaslr_helper.c, and makes KDUMP analysis fallback to
KASLR offset calculation if vmcoreinfo data is missing.
These changes have been successfully tested with a 3.10.0-830.el7.x86_64
under the following conditions:
 - kdump with KASLR and vmcoreinfo
 - kdump with KASLR but no vmcoreinfo
 - kdump without KASLR ("nokaslr" kernel command line option)
It was also tested that a "crash" patched with these changes still
builds and runs (live and kdump debugging) on an aarch64 machine.
changelog:
v2:
 - Limit application to QEMU ELF and QEMU COMPRESSED dumps (thanks Dave)
 - Add support for QEMU COMPRESSED dumps (diskdump)
Sergio Lopez (4):
  Move kaslr related functions from sadump.c to kaslr_helper.c
  Move QEMUCPU* structs from netdump.h to defs.h
  netdump: infer kaslr offset for QEMU ELF dumps without vmcoreinfo
  diskdump: infer kaslr offset for QEMU COMPRESSED dumps without
    vmcoreinfo
 Makefile       |   7 +-
 defs.h         |  43 +++++
 diskdump.c     |  96 ++++++++++++
 kaslr_helper.c | 473 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 netdump.c      |  79 ++++++++++
 netdump.h      |  24 +--
 sadump.c       | 487 +++++----------------------------------------------------
 symbols.c      |  26 ++-
 x86_64.c       |  29 +++-
 9 files changed, 782 insertions(+), 482 deletions(-)
 create mode 100644 kaslr_helper.c
-- 
2.14.3
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        modules loaded from wrong directory
                                
                                
                                
                                    
                                        by Olaf Hering
                                    
                                
                                
                                        When trying to run crash 7.2 like this, the 'mod -s $mod' command loads
the modules from the running system instead of the specified directory:
d=${0%/*}
tee $t <<_EOF_
mod -s xen-kbdfront
mod -s xen_kbdfront
exit
_EOF_
strace -f -s 123 -tt -o /dev/shm/crash.trace.txt \
crash \
        -i $t \
        --mod "${d}" \
        "${d}"/boot/vmlinux-*-default.gz \
        "$1" \
        "${d}"/usr/lib/debug/boot/vmlinux-*-default.debug
In the strace log I see a search for xen_kbdfront.ko, later for
xen-kbdfront.ko. Unfortunately /lib/modules/`uname -r` is searched before $d.
Now that I read the man page once more I noticed that --mod is just for
debug info?
It seems $d/boot is considered as well, likely because the kernel is
stored in that directory.
I wonder why there is no option to tell crash to only operate below a
certain directory when looking for module related things.
"--mod" seems to have an effect, if the directory exists it is searched.
How can I tell crash to only operate below $d?
Olaf
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH 0/2] Generalize KASLR calculation and use it for KDUMPs
                                
                                
                                
                                    
                                        by Sergio Lopez
                                    
                                
                                
                                        Commit 45b74b89530d611b3fa95a1041e158fbb865fa84 added support for
calculating phys_base and kernel offset for KASLR-enabled kernels on
SADUMPs by using a technique developed by Takao Indoh. Originally, the
patchset included support for KDUMPs, but this was dropped in v2, as it
was deemed unnecessary due to the implementation of the vmcoreinfo
device in QEMU.
Sadly, there are many reasons for which the vmcoreinfo device may not be
present in the moment of taking the memory dump from a VM, ranging from
a Host running older QEMU/libvirt versions, to misconfigured VMs or
environments running Hypervisors that doesn't support this device.
This patchset generalizes the kaslr related functions from sadump.c
moving them to kaslr_helper.c, and makes KDUMP analysis fallback to
KASLR offset calculation if vmcoreinfo data is missing.
These changes have been successfully tested with a 3.10.0-830.el7.x86_64
under the following conditions:
 - kdump with KASLR and vmcoreinfo
 - kdump with KASLR but no vmcoreinfo
 - kdump without KASLR ("nokaslr" kernel command line option)
It was also tested that a "crash" patched with these changes still
builds and runs (live and kdump debugging) on an aarch64 machine.
Sergio Lopez (2):
  Move kaslr related functions from sadump.c to kaslr_helper.c
  kdump: if vmcoreinfo is missing, try to infer kaslr offset
 Makefile       |   7 +-
 defs.h         |  15 ++
 kaslr_helper.c | 466 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 netdump.c      |  73 +++++++++
 netdump.h      |   1 +
 sadump.c       | 487 +++++----------------------------------------------------
 symbols.c      |  29 ++--
 x86_64.c       |  14 +-
 8 files changed, 629 insertions(+), 463 deletions(-)
 create mode 100644 kaslr_helper.c
-- 
2.14.3
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                        
                                
                                
                                        
                                                
                                        
                                        
                                        Re: [Crash-utility] [PATCH v3 0/2] Fix KASLR problem on sadump
                                
                                
                                
                                    
                                        by Sergio Lopez
                                    
                                
                                
                                        Hi,
I know support for guessing the KASLR offset for kdump/diskimages was
deliberately dropped from this patchset, because upstream was also working on
the vmcoreinfo device, but I think having that would be *really* useful.
The vmcoreinfo device solution requires explicit support in the virtualization
Hosts, and it'll take a while until its presence has been generalized among
products and users. Meanwhile, we already have VMs with KASLR-enabled kernels.
So I'd like to ask if a patchset extending the current state of KASLR offset
guessing for sadump to kdump/diskimages would be considered for merging.
Thanks,
Sergio.
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v3] Speed up "kmem -[sS]" by optimizing is_page_ptr() for x86_64
                                
                                
                                
                                    
                                        by Kazuhito Hagio
                                    
                                
                                
                                        changes v2 -> v3:
- move the setting point of machdep->is_page_ptr to machdep_init(SETUP_ENV)
changes v1 -> v2:
- rewrite based on the per-architecture function call Dave provided
- remove the part which used page.flags for non-VMEMMAP kernels
- add address range/position check first
- remove/optimize the calculations of mem_map and phys address
- modify the patch description
The "kmem -[sS]" commands can take several minutes to complete with
the following conditions:
  - The system has a lot of memory sections with CONFIG_SPARSEMEM, and
  - The kernel uses SLUB and it has a very long partial slab list.
  crash> kmem -s dentry
  CACHE            NAME                 OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE
  ffff88017fc78a00 dentry                   192    9038949  10045728  239184     8k
  crash> kmem -s dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  133
  crash> kmem -S dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  656
One of the causes is that is_page_ptr() in count_partial() determines
whether a given slub page address is a page struct by searching all
mem_sections available for the one which includes it.
With CONFIG_SPARSEMEM_VMEMMAP on x86_64, we can do that by checking
its address range and whether its calculated mem_section is valid.
With this patch, the computation amount can be significantly reduced
in that case.
  crash> kmem -s dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  1
  crash> kmem -S dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  1
Signed-off-by: Kazuhito Hagio <k-hagio(a)ab.jp.nec.com>
---
 defs.h   |  1 +
 x86_64.c | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)
diff --git a/defs.h b/defs.h
index 9663bd8..7998ebf 100644
--- a/defs.h
+++ b/defs.h
@@ -5133,6 +5133,7 @@ int vaddr_type(ulong, struct task_context *);
 char *format_stack_entry(struct bt_info *bt, char *, ulong, ulong);
 int in_user_stack(ulong, ulong);
 int dump_inode_page(ulong);
+ulong valid_section_nr(ulong);
 
 
 /*
diff --git a/x86_64.c b/x86_64.c
index 7449571..0d5e150 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -77,6 +77,7 @@ static void x86_64_calc_phys_base(void);
 static int x86_64_is_module_addr(ulong);
 static int x86_64_is_kvaddr(ulong);
 static int x86_64_is_uvaddr(ulong, struct task_context *);
+static int x86_64_is_page_ptr(ulong, physaddr_t *);
 static ulong *x86_64_kpgd_offset(ulong, int, int);
 static ulong x86_64_upgd_offset(struct task_context *, ulong, int, int);
 static ulong x86_64_upgd_offset_legacy(struct task_context *, ulong, int, int);
@@ -156,6 +157,7 @@ x86_64_init(int when)
 	{
 	case SETUP_ENV:
 		machdep->process_elf_notes = x86_process_elf_notes;
+		machdep->is_page_ptr = x86_64_is_page_ptr;
 		break;
 	case PRE_SYMTAB:
 		machdep->verify_symbol = x86_64_verify_symbol;
@@ -802,6 +804,7 @@ x86_64_dump_machdep_table(ulong arg)
 	fprintf(fp, "       get_smp_cpus: x86_64_get_smp_cpus()\n");
         fprintf(fp, "          is_kvaddr: x86_64_is_kvaddr()\n");
         fprintf(fp, "          is_uvaddr: x86_64_is_uvaddr()\n");
+        fprintf(fp, "        is_page_ptr: x86_64_is_page_ptr()\n");
         fprintf(fp, "       verify_paddr: x86_64_verify_paddr()\n");
         fprintf(fp, "  get_kvaddr_ranges: x86_64_get_kvaddr_ranges()\n");
         fprintf(fp, "    init_kernel_pgd: x86_64_init_kernel_pgd()\n");
@@ -1594,6 +1597,26 @@ x86_64_is_uvaddr(ulong addr, struct task_context *tc)
         return (addr < USERSPACE_TOP);
 }
 
+static int
+x86_64_is_page_ptr(ulong addr, physaddr_t *phys)
+{
+	ulong pfn, nr;
+
+	if (IS_SPARSEMEM() && (machdep->flags & VMEMMAP) &&
+	    (addr >= VMEMMAP_VADDR && addr <= VMEMMAP_END) &&
+	    !((addr - VMEMMAP_VADDR) % SIZE(page))) {
+
+		pfn = (addr - VMEMMAP_VADDR) / SIZE(page);
+		nr = pfn_to_section_nr(pfn);
+		if (valid_section_nr(nr)) {
+			if (phys)
+				*phys = PTOB(pfn);
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
+
 /*
  * Find the kernel pgd entry..
  * pgd = pgd_offset_k(addr);
-- 
1.8.3.1
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v2] Speed up "kmem -[sS]" by optimizing is_page_ptr() for x86_64
                                
                                
                                
                                    
                                        by Kazuhito Hagio
                                    
                                
                                
                                        Hi Dave,
Thank you very much for merging the infrastructure.
I rewrote the patch based on it and tested this with some dumpfiles.
---
Changes from v1:
- rewrite based on the per-architecture function call Dave provided
- remove the part which used page.flags for non-VMEMMAP kernels
- add address range/position check first
- remove/optimize the calculations of mem_map and phys address
- modify the patch description
The "kmem -[sS]" commands can take several minutes to complete with
the following conditions:
  - The system has a lot of memory sections with CONFIG_SPARSEMEM, and
  - The kernel uses SLUB and it has a very long partial slab list.
  crash> kmem -s dentry
  CACHE            NAME                 OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE
  ffff88017fc78a00 dentry                   192    9038949  10045728  239184     8k
  crash> kmem -s dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  133
  crash> kmem -S dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  656
One of the causes is that is_page_ptr() in count_partial() determines
whether a given slub page address is a page struct by searching all
mem_sections available for the one which includes it.
With CONFIG_SPARSEMEM_VMEMMAP on x86_64, we can do that by checking
its address range and whether its calculated mem_section is valid.
With this patch, the computation amount can be significantly reduced
in that case.
  crash> kmem -s dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  1
  crash> kmem -S dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  1
Signed-off-by: Kazuhito Hagio <k-hagio(a)ab.jp.nec.com>
---
 defs.h   |  1 +
 x86_64.c | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)
diff --git a/defs.h b/defs.h
index 9663bd8..7998ebf 100644
--- a/defs.h
+++ b/defs.h
@@ -5133,6 +5133,7 @@ int vaddr_type(ulong, struct task_context *);
 char *format_stack_entry(struct bt_info *bt, char *, ulong, ulong);
 int in_user_stack(ulong, ulong);
 int dump_inode_page(ulong);
+ulong valid_section_nr(ulong);
 
 
 /*
diff --git a/x86_64.c b/x86_64.c
index 7449571..67cc528 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -77,6 +77,7 @@ static void x86_64_calc_phys_base(void);
 static int x86_64_is_module_addr(ulong);
 static int x86_64_is_kvaddr(ulong);
 static int x86_64_is_uvaddr(ulong, struct task_context *);
+static int x86_64_is_page_ptr(ulong, physaddr_t *);
 static ulong *x86_64_kpgd_offset(ulong, int, int);
 static ulong x86_64_upgd_offset(struct task_context *, ulong, int, int);
 static ulong x86_64_upgd_offset_legacy(struct task_context *, ulong, int, int);
@@ -624,6 +625,7 @@ x86_64_init(int when)
 						_MAX_PHYSMEM_BITS_2_6_26;
 			}
 		}
+		machdep->is_page_ptr = x86_64_is_page_ptr;
 
                 if (XEN()) {
 			if (kt->xen_flags & WRITABLE_PAGE_TABLES) {
@@ -802,6 +804,7 @@ x86_64_dump_machdep_table(ulong arg)
 	fprintf(fp, "       get_smp_cpus: x86_64_get_smp_cpus()\n");
         fprintf(fp, "          is_kvaddr: x86_64_is_kvaddr()\n");
         fprintf(fp, "          is_uvaddr: x86_64_is_uvaddr()\n");
+        fprintf(fp, "        is_page_ptr: x86_64_is_page_ptr()\n");
         fprintf(fp, "       verify_paddr: x86_64_verify_paddr()\n");
         fprintf(fp, "  get_kvaddr_ranges: x86_64_get_kvaddr_ranges()\n");
         fprintf(fp, "    init_kernel_pgd: x86_64_init_kernel_pgd()\n");
@@ -1594,6 +1597,26 @@ x86_64_is_uvaddr(ulong addr, struct task_context *tc)
         return (addr < USERSPACE_TOP);
 }
 
+static int
+x86_64_is_page_ptr(ulong addr, physaddr_t *phys)
+{
+	ulong pfn, nr;
+
+	if (IS_SPARSEMEM() && (machdep->flags & VMEMMAP) &&
+	    (addr >= VMEMMAP_VADDR && addr <= VMEMMAP_END) &&
+	    !((addr - VMEMMAP_VADDR) % SIZE(page))) {
+
+		pfn = (addr - VMEMMAP_VADDR) / SIZE(page);
+		nr = pfn_to_section_nr(pfn);
+		if (valid_section_nr(nr)) {
+			if (phys)
+				*phys = PTOB(pfn);
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
+
 /*
  * Find the kernel pgd entry..
  * pgd = pgd_offset_k(addr);
-- 
1.8.3.1
                                
                         
                        
                                
                                7 years, 7 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] Speed up "kmem -[sS]" by optimizing is_page_ptr()
                                
                                
                                
                                    
                                        by k-hagio@ab.jp.nec.com
                                    
                                
                                
                                        Hi,
The "kmem -[sS]" commands can take several minutes to complete with
the following conditions:
  * The system has a lot of memory sections with CONFIG_SPARSEMEM.
  * The kernel uses SLUB and it has a very long partial slab list.
  crash> kmem -s dentry | awk '{print strftime("%T"), $0}'
  10:18:34 CACHE            NAME                 OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE
  10:19:41 ffff88017fc78a00 dentry                   192    9038949  10045728  239184     8k
  crash> kmem -S dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  334
One of the causes is that is_page_ptr() in count_partial() checks if
a given slub page address is a page struct by searching all memory
sections linearly for the one which includes it.
        nr_mem_sections = NR_MEM_SECTIONS();
        for (nr = 0; nr < nr_mem_sections ; nr++) {
                if ((sec_addr = valid_section_nr(nr))) {
                        ...
With CONFIG_SPARSEMEM{_VMEMMAP}, we can calculate the memory section
which includes a page struct with its page.flags, or its address and
VMEMMAP_VADDR. With this patch doing so, the computation amount can be
significantly reduced in that case.
  crash> kmem -s dentry | awk '{print strftime("%T"), $0}'
  10:34:55 CACHE            NAME                 OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE
  10:34:55 ffff88017fc78a00 dentry                   192    9038949  10045728  239184     8k
  crash> kmem -S dentry | bash -c 'cat >/dev/null ; echo $SECONDS'
  2
This patch uses VMEMMAP_VADDR. It is not defined on PPC64, but it looks
like PPC64 supports VMEMMAP flag and machdep->machspec->vmemmap_base is
it, so this patch also defines it for PPC64. This might need some help
from PPC folks.
Signed-off-by: Kazuhito Hagio <k-hagio(a)ab.jp.nec.com>
---
 defs.h   |  2 ++
 memory.c | 15 +++++++++++++++
 2 files changed, 17 insertions(+)
diff --git a/defs.h b/defs.h
index aa17792..84e68ca 100644
--- a/defs.h
+++ b/defs.h
@@ -3861,6 +3861,8 @@ struct efi_memory_desc_t {
 #define IS_VMALLOC_ADDR(X) machdep->machspec->is_vmaddr(X)
 #define KERNELBASE      machdep->pageoffset
 
+#define VMEMMAP_VADDR   (machdep->machspec->vmemmap_base)
+
 #define PGDIR_SHIFT     (machdep->pageshift + (machdep->pageshift -3) + (machdep->pageshift - 2))
 #define PMD_SHIFT       (machdep->pageshift + (machdep->pageshift - 3))
 
diff --git a/memory.c b/memory.c
index 0df8ecc..0696763 100644
--- a/memory.c
+++ b/memory.c
@@ -13348,10 +13348,25 @@ is_page_ptr(ulong addr, physaddr_t *phys)
 	ulong nr_mem_sections;
 	ulong coded_mem_map, mem_map, end_mem_map;
 	physaddr_t section_paddr;
+#ifdef VMEMMAP_VADDR
+	ulong flags;
+#endif
 
 	if (IS_SPARSEMEM()) {
 		nr_mem_sections = NR_MEM_SECTIONS();
+#ifdef VMEMMAP_VADDR
+		nr = nr_mem_sections;
+		if (machdep->flags & VMEMMAP)
+			nr = pfn_to_section_nr((addr - VMEMMAP_VADDR) / SIZE(page));
+		else if (readmem(addr + OFFSET(page_flags), KVADDR, &flags,
+			sizeof(ulong), "page.flags", RETURN_ON_ERROR|QUIET))
+			nr = (flags >> (SIZE(page_flags)*8 - SECTIONS_SHIFT())
+				& ((1UL << SECTIONS_SHIFT()) - 1));
+
+		if (nr < nr_mem_sections) {
+#else
 	        for (nr = 0; nr < nr_mem_sections ; nr++) {
+#endif
 	                if ((sec_addr = valid_section_nr(nr))) {
 	                        coded_mem_map = section_mem_map_addr(sec_addr);
 	                        mem_map = sparse_decode_mem_map(coded_mem_map, nr);
-- 
1.8.3.1
                                
                         
                        
                                
                                7 years, 8 months