 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        FW: patch for slight modification to runq -g command
                                
                                
                                
                                    
                                        by Chen, Anthony
                                    
                                
                                
                                        
Hi,
We're debugging an in-house application that makes use of hard limit extensively. We ran into a lot of timing windows (all our own making) and we use runq -g command a lot. The current runq -g display only the RBROOT pointer. It really is a bit inconvenient to traverse the task_group hierarchy. It would be nice to have the command also display the corresponding task_group, cfs_rq pointer (at a minimum). Since the way we crash the system by messing up the nr_running and h_nr_running, so we also display those two fields at the same time. Here's an example of before and after.
CPU 4
  CURRENT: PID: 0      TASK: ffff8840668c0380  COMMAND: "swapper"
  TASK_GROUP RT_RQ: ffff8800027d3820
  RT PRIO_ARRAY: ffff8800027d3820
     [no tasks queued]
  TASK_GROUP CFS_RQ: ffff8800027d36e0
  CFS RB_ROOT: ffff8800027d3710
     GROUP CFS RB_ROOT: ffff883ff69bcc30 <TDAT>
        GROUP CFS RB_ROOT: ffff884006290e30 <User>
           GROUP CFS RB_ROOT: ffff88400641c430 <TDWMVP1>
              GROUP CFS RB_ROOT: ffff88400646b030 <ServDown1:0>
                 GROUP CFS RB_ROOT: ffff884006492630 <ServOrder1:1>
                    GROUP CFS RB_ROOT: ffff883ff058fe30 <TDWMWD57> (THROTTLED)
                       GROUP CFS RB_ROOT: ffff88047889ee30 <S:WD:3d:35fa>
                          [120] PID: 27655  TASK: ffff8805078ce2c0  COMMAND: "actmain"
                                <<< more throttled groups removed >>>
CPU 4
  CURRENT: PID: 0     CFS: ffff8800027d36e0 TASK: ffff8840668c0380  COMMAND: "swapper"
  TASK_GROUP RT_RQ: ffff8800027d3820
  RT PRIO_ARRAY: ffff8800027d3820
     [no tasks queued]
  TASK_GROUP CFS_RQ: ffff8800027d36e0
  CFS RB_ROOT: ffff8800027d3710
     GROUP: ffff88405394d000 CFS: ffff883ff69bcc00 RB_ROOT: ffff883ff69bcc30 <TDAT> (0 0)
        GROUP: ffff88405906c400 CFS: ffff884006290e00 RB_ROOT: ffff884006290e30 <User> (0 0)
           GROUP: ffff884055081000 CFS: ffff88400641c400 RB_ROOT: ffff88400641c430 <TDWMVP1> (0 0)
              GROUP: ffff884055081c00 CFS: ffff88400646b000 RB_ROOT: ffff88400646b030 <ServDown1:0> (0 0)
                 GROUP: ffff8840580fd400 CFS: ffff884006492600 RB_ROOT: ffff884006492630 <ServOrder1:1> (0 0)
                    GROUP: ffff884058f58c00 CFS: ffff883ff058fe00 RB_ROOT: ffff883ff058fe30 <TDWMWD57> (7 9)  (THROTTLED)
                       GROUP: ffff8808fb976000 CFS: ffff88047889ee00 RB_ROOT: ffff88047889ee30 <S:WD:3d:35fa> (1 1)
                          [120] PID: 27655  TASK: ffff8805078ce2c0  COMMAND: "actmain"
                                <<< more throttled groups removed >>>
I have attached the patch we use to display additional information. Could you please take a look at my proposal to see if it is possible that you include this kind of display format.
Thanks,
Anthony
                                
                         
                        
                                
                                11 years, 11 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] fix the accounting of RSS when SPLIT_RSS_ACCOUNTING is enabled
                                
                                
                                
                                    
                                        by vinayak menon
                                    
                                
                                
                                        Hi Dave,
It was noticed that when SPLIT_RSS_ACCOUNTING is enabled in kernel, the RSS
values shown by ps command is huge (negative value represented as ulong)
for certain tasks. Shown below is a ps output of such kind.
   1416      1   3  db506ac0  IN  36235.9     936 4194128  debuggerd
   1417      1   3  db500040  IN   0.0   16512     36  rild
   1418      1   1  db500580  IN   0.1   47160    488  surfaceflinger
   1419      1   0  db74d040  IN   1.1  458544   5148  zygote
*   1420      1   1  db5d3ac0  IN  36235.9   13868 4194280  drmserver*
   1421      1   1  db5d3580  IN   0.0   54944     20  mediaserver
   1422      1   0  db7c9580  IN   0.0     900     20  installd
 *  1423      1   0  db7c9040  IN  36235.8    1828 4194112  keystore
   1424      1   3  db5d8580  IN  36235.9    1152 4194276  akmdfs
   1426      1   3  db7bd040  IN  36235.9   10880 4194212  glgps
   1428      1   0  db61c040  IN  36235.9    2084 4194152  usb_portd
   1429      1   0  db753ac0  IN  36235.9     868 4194148  atxd_proxy
   1431      1   1  da6fbac0  UN  36235.9    6100 4194280  bkmgrd*
*
*
When SPLIT_RSS_ACCOUNTING is enabled, the rss values are stored in
task_struct of each thread, and is synced to the mm_struct.rss_stat only at
certain events. So during a crash it is likely that mm.rss_stat contains
old values which can even be negative.
I have prepared a patch (attached) to sync the task_struct.rss_stat with
mm.rss_stat, in get_task_mem_usage, when SPLIT_RSS_ACCOUNTING is enabled.
Please share your thoughts on this.
Thanks,
Vinayak
                                
                         
                        
                                
                                12 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [ANNOUNCE] crash 7.0.3 is available
                                
                                
                                
                                    
                                        by Dave Anderson
                                    
                                
                                
                                        
Download from: http://people.redhat.com/anderson
Changelog:
 - Fix for the ARM architecture if the backtrace unwind information 
   cannot be gathered during session initialization.  Without the patch, 
   the two unwind-related warning messages indicating "WARNING: UNWIND: 
   failed to gather unwind_table list" and "WARNING: UNWIND: failed to 
   initialize module unwind tables" are followed by the fatal error 
   message "crash: cannot hash task_struct entries".
   (anderson(a)redhat.com)
 - Fix for the "help -[Dn]" dumpfile information display of the GUID EFI
   table in the header of SADUMP dumpfiles.  Without the patch, only 33 
   of the 36 bytes in the table are translated.
   (d.hatayama(a)jp.fujitsu.com)
 - Fix for the determination of the kernel NR_CPUS configurable for
   Linux 3.8 and later kernels that are configured with CONFIG_SLAB. 
   Without the patch, the kernel's compiled-in NR_CPUS value was 
   incorrectly calculated to be the sum of the kernel's NR_CPUS and
   MAX_NUMNODES configurables.
   (anderson(a)redhat.com)
     
 - In the next release of makedumpfile, the status field of the 
   dumpfile header of compressed kdumps will show the compression 
   type that was utilized.  The "help -[Dn]" output has been updated
   to display that information.
   (anderson(a)redhat.com)
 - For kernels configured with CONFIG_SLAB in which an array_cache
   pointer referenced by a kmem_cache structure is invalid, the
   individual cache(s) will be marked as invalid.  During session
   initialization, the message "crash: kmem_cache: <cache-address>: 
   invalid array_cache pointer" will be displayed, and during runtime, 
   attempts to access the cache(s) will result in a message indicating 
   that the cache is "[INVALID/CORRPUTED]".  Without the patch, the 
   message "crash: unable to initialize kmem slab cache subsystem" is 
   displayed during session initialization, and run-time commands that 
   attempt to access the kmem slab cache subsystem fail with the error
   message "kmem cache slab subsystem not available".
   (anderson(a)redhat.com)
 - Fix for the "kmem -[sS] <slab-object-address>" option in Linux 3.6
   and later kernels configured with CONFIG_SLAB.  Without the patch, 
   the command fails with the message "kmem: address is not allocated in 
   slab subsystem: <slab-object-address>.  This also causes the 
   "kmem <slab-object-address>" command to (quietly) fail to determine
   that the address is a slab object.
   (anderson(a)redhat.com)
 - Fix for the "bt" command if a kernel __init text address is 
   encountered.  Without the patch, and depending upon the reallocation
   of the __init text memory, a bogus framesize may be calculated, or 
   more likely, in a compressed kdump, a warning message indicating 
   "bt: page excluded: kernel virtual address: <address>  type: 
   gdb_readmem_callback" will be displayed following the frame data.
   (anderson(a)redhat.com)
 - Update for determining whether an S390X PTE contains a swap entry
   in Linux 3.12 and later kernels.
   (holzheu(a)linux.vnet.ibm.com)
 - Resurrected the translation and display of the page.flags bits by the
   "kmem -p" command on Linux 2.6.26 and later kernels whose vmlinux 
   debuginfo data contains either the "pageflags" enumerator or the 
   "pageflag_names" array of trace_print_flags structures.  If they are
   not available, just the page.flags value is printed in hexadecimal,
   as has been done since Linux 2.4.9.
   (anderson(a)redhat.com)
 - Fix for the "bt" command when used with vmcore files that were
   created with the recently-introduced "virsh dump --memory-only", 
   which dumps KVM guests into an ELF vmcore similar to those created 
   by the kdump facility.  Without the patch, a faulty backtrace for the 
   panic task may be generated due to the use of incorrect starting 
   RSP/RIP registers; this happens because (unlike kdump) the 
   non-panicking cpus are offlined prior to the dumpfile being created,
   which in turn leads to the use of the wrong NT_PRSTATUS note. 
   (anderson(a)redhat.com)
 - Fix for the CPU number display on systems with 255 or more cpus
   during the initial banner, by the "set" command, the "ps" command, 
   and by all commands that display the per-task header consisting of 
   the task address, pid, cpu and command name.  Without the patch, for
   cpu 255, the "sys" command displays "NO_PROC_ID", and the other 
   commands would show a "-" for the cpu number; for cpu numbers greater
   than 255, garbage values would be displayed in the cpu number field.
   (anderson(a)redhat.com)
 - Implemented support for compressed kdump header version 6, in which
   makedumpfile(8) adds new fields in the kdump_sub_header to support
   large memory systems with pfn values that are larger than 32-bits.
   Without the patch, if the system contains physical memory located
   in high memory such that its maximum pfn value is overflows the
   32-bit "max_mapnr" field in the header, the crash session will fail
   with the error message "crash: vmlinux and vmcore do not match!".
   (jingbai.ma(a)hp.com)
 - Fix for the "net -s" command on Linux 3.8 and later kernels.  Without
   the patch, the command fails with the message "net: invalid structure
   member offset: inet_opt_daddr".
   (anderson(a)redhat.com)
 - Fix a build failure in a native ARM64 environment due to obsolete
   LKCD dumpfile headers. 
   (anderson(a)redhat.com)
 - Implementation of a new "per-cpu object" as an argument format that
   can be passed to the "p", "struct", "union" or "*" commands.  The
   format is expressed as either <per-cpu symbol>:<cpu-specifier> or
   as <per-cpu offset>:<cpu-specifier>, where the per-cpu symbol or
   per-cpu offset must precede a colon, and where the <cpu-identifier>
   follows the colon.  The cpu-identifier may be expressed in any of
   the following manners:
           :             CPU of the currently selected task.
           :a[ll]        all CPUs.
           :#[-#][,...]  CPU list(s), e.g. "1,3,5", "1-3",
                         or "1,3,5-7,10".
   Without the patch, per-cpu symbols are only accepted by the "p" 
   command, and the data type and the resolved kernel virtual address 
   for each per-cpu instance are displayed shown.  With this patch, a
   colon and a cpu-specifier may be appended to the symbol name, and the
   the contents of the symbol on each cpu that is specified will be
   displayed by the "p" command.  For the "struct/union/*" commands, an
   argument may be specified using either a per-cpu offset value or 
   per-cpu symbol name followed by a colon and cpu-specifier, and the
   contents of each structure/union on each specified cpu will be
   displayed.
   (ptesarik(a)suse.cz)
   Fixed several minor flaws that were detected by a Coverity Scan: 
     tools.c:
       992:warning[invalidScanfArgType_int] – %d in format string 
           (no. 1) requires 'int *' but the argument type is 'unsigned 
           int *'. 
     memory.c:
       7461:error[uninitvar] – Uninitialized variable: page_cache_size
     filesys.c:
       731:error[resourceLeak] – Resource leak: version 
     kernel.c:
       5675:error[uninitvar] – Uninitialized variable: action 
       7799:error[memleakOnRealloc] – Common realloc mistake: 
            'ikconfig_all' nulled but not freed upon failure 
     configure.c:
       793:error[mismatchAllocDealloc] – Mismatching allocation and 
           deallocation: fp 
     remote.c:
       1120:error[resourceLeak] – Resource leak: pipe 
     va_server.c:
       316:error[memleak] – Memory leak: disk_hdr 
     va_server_v1.c:
       311:error[memleak] – Memory leak: disk_hdr
     makedumpfile.c:
       80:error[memleakOnRealloc] – Common realloc mistake: 'ptr' nulled
          but not freed upon failure
     sadump.c:
       231:error[memleakOnRealloc] – Common realloc mistake: 'sdh' 
           nulled but not freed upon failure 
     extensions/snap.c:
       550:error[uninitvar] – Uninitialized variable: prstatus_len
       541:error[uninitvar] – Uninitialized variable: l_offset 
     extensions/trace.c:
       1477:error[resourceLeak] – Resource leak: file 
   (anderson(a)redhat.com)
                                
                         
                        
                                
                                12 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCHv2 00/11] Implement percpu handling for crash
                                
                                
                                
                                    
                                        by Petr Tesarik
                                    
                                
                                
                                        Hi Dave,
I'm sorry for the last submission. It seems I forgot to refresh the
patches, so it was completely bogus. Should be fixed now. I'm also
attaching my changes as one big patch to this message.
Petr Tesarik
                                
                         
                        
                                
                                12 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH 0/11] Implement percpu handling for crash
                                
                                
                                
                                    
                                        by Petr Tesarik
                                    
                                
                                
                                        Hi Dave et al.
thanks to the feedback, I have reworked my patch set. It is now much
larger than the first version, but I'm also much more happy with it.
I'm pretty sure that there are still some issues here and there, so
as always, your comments are most welcome.
Petr Tesarik
                                
                         
                        
                                
                                12 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v4] crash utility: fix max_mapnr issue on system has over 44-bit addressing
                                
                                
                                
                                    
                                        by Jingbai Ma
                                    
                                
                                
                                        The patch will add support for new compressed dumpfile header_version 6.
This bug is posted here:
http://lists.infradead.org/pipermail/kexec/2013-September/009587.html
This patch will add 3 new fields in struct kdump_sub_header.
unsigned long long start_pfn_64;  /* header_version 6 and later */
unsigned long long end_pfn_64;    /* header_version 6 and later */
unsigned long long max_mapnr_64;  /* header_version 6 and later */
The old max_mapnr, start_pfn and end_pfn are obsolete, but still be there
for compatibility purpose.
The corresponding patch for makedumpfile can be found here:
http://lists.infradead.org/pipermail/kexec/2013-October/009779.html
Changelog:
v4:
- Fix an invalid condition branch.
- Remove a piece of obsolete code.
- Display the original dh->max_mapnr as it exists in the dumpfile
  header, regardless whether it is the obsolete version or not.
- Change notes for max_mapnr, start_pfn and end_pfn as obsolete.
v3:
- Fix a bug that failed to work with old split format kdumps.
v2:
- Rename max_mapnr in struct kdump_sub_header to max_mapnr_64.
- Change type of max_mapnr_64 from unsigned long to unsigned long long.
  In x86 PAE mode on x86_32 kernel, the address may exceeds 44bit limit.
- Add start_pfn_64, end_pfn_64 for struct kdump_sub_header.
- Add a 64bit max_mapnr in struct diskdump_data. The max_mapnr_64 in
  the sub-header only exists in compressed kdump file format, so can't
  be used in diskdump file format.
- Merge a patch from Dave Anderson that fixed bitmap_len issue.
v1:
- http://lists.infradead.org/pipermail/kexec/2013-September/009663.html
Signed-off-by: Jingbai Ma <jingbai.ma(a)hp.com>
Tested-by: Lisa Mitchell <lisa.mitchell(a)hp.com>
---
 diskdump.c |  123 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 diskdump.h |   15 ++++++-
 2 files changed, 108 insertions(+), 30 deletions(-)
diff --git a/diskdump.c b/diskdump.c
index 0819a3f..65e0210 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -40,11 +40,13 @@ struct diskdump_data {
 	struct disk_dump_sub_header	*sub_header;
 	struct kdump_sub_header		*sub_header_kdump;
 
+	unsigned long long	max_mapnr;	/* 64bit max_mapnr */
+
 	size_t	data_offset;
 	int	block_size;
 	int	block_shift;
 	char	*bitmap;
-	int	bitmap_len;
+	off_t	bitmap_len;
 	char	*dumpable_bitmap;
 	int	byte, bit;
 	char	*compressed_page;	/* copy of compressed page data */
@@ -170,9 +172,9 @@ add_diskdump_data(char* name)
 	dd->filename = name;
 
 	if (CRASHDEBUG(1))
-		fprintf(fp, "%s: start_pfn=%lu, end_pfn=%lu\n", name,
-			dd->sub_header_kdump->start_pfn,
-			dd->sub_header_kdump->end_pfn);
+		fprintf(fp, "%s: start_pfn=%llu, end_pfn=%llu\n", name,
+			dd->sub_header_kdump->start_pfn_64,
+			dd->sub_header_kdump->end_pfn_64);
 }
 
 static void 
@@ -199,13 +201,13 @@ get_bit(char *map, int byte, int bit)
 }
 
 static inline int 
-page_is_ram(unsigned int nr)
+page_is_ram(unsigned long nr)
 {
 	return get_bit(dd->bitmap, nr >> 3, nr & 7);
 }
 
 static inline int 
-page_is_dumpable(unsigned int nr)
+page_is_dumpable(unsigned long nr)
 {
 	return dd->dumpable_bitmap[nr>>3] & (1 << (nr & 7));
 }
@@ -214,7 +216,7 @@ static inline int
 dump_is_partial(const struct disk_dump_header *header)
 {
 	return header->bitmap_blocks >=
-	    divideup(divideup(header->max_mapnr, 8), dd->block_size) * 2;
+	    divideup(divideup(dd->max_mapnr, 8), dd->block_size) * 2;
 }
 
 static int 
@@ -321,6 +323,9 @@ x86_process_elf_notes(void *note_ptr, unsigned long size_note)
  * [40]    unsigned long   size_note;          /  header_version 4 and later  /
  * [44]    off_t           offset_eraseinfo;   /  header_version 5 and later  /
  * [52]    unsigned long   size_eraseinfo;     /  header_version 5 and later  /
+ * [56]    unsigned long long   start_pfn_64;  /  header_version 6 and later  /
+ * [64]    unsigned long long   end_pfn_64;    /  header_version 6 and later  /
+ * [72]    unsigned long long   max_mapnr_64;  /  header_version 6 and later  /
  * };
  * 
  * But when compiled on an ARM processor, each 64-bit "off_t" would be pushed
@@ -337,7 +342,10 @@ x86_process_elf_notes(void *note_ptr, unsigned long size_note)
  * [40]    off_t           offset_note;        /  header_version 4 and later  /
  * [48]    unsigned long   size_note;          /  header_version 4 and later  /
  * [56]    off_t           offset_eraseinfo;   /  header_version 5 and later  /
- * [62]    unsigned long   size_eraseinfo;     /  header_version 5 and later  /
+ * [64]    unsigned long   size_eraseinfo;     /  header_version 5 and later  /
+ * [72]    unsigned long long   start_pfn_64;  /  header_version 6 and later  /
+ * [80]    unsigned long long   end_pfn_64;    /  header_version 6 and later  /
+ * [88]    unsigned long long   max_mapnr_64;  /  header_version 6 and later  /
  * };
  * 
  */
@@ -357,6 +365,10 @@ struct kdump_sub_header_ARM_target {
 	int 		pad3;	
         off_t           offset_eraseinfo;   /* header_version 5 and later */
         unsigned long   size_eraseinfo;     /* header_version 5 and later */
+	int		pad4;
+	unsigned long long start_pfn_64;    /* header_version 6 and later */
+	unsigned long long end_pfn_64;      /* header_version 6 and later */
+	unsigned long long max_mapnr_64;    /* header_version 6 and later */
 };
 
 static void
@@ -380,6 +392,15 @@ arm_kdump_header_adjust(int header_version)
 		kdsh->offset_eraseinfo = kdsh_ARM_target->offset_eraseinfo;
 		kdsh->size_eraseinfo = kdsh_ARM_target->size_eraseinfo;
 	}
+	if (header_version >= 6) {
+		kdsh->start_pfn_64 = kdsh_ARM_target->start_pfn_64;
+		kdsh->end_pfn_64 = kdsh_ARM_target->end_pfn_64;
+		kdsh->max_mapnr_64 = kdsh_ARM_target->map_mapnr_64;
+	} else {
+		kdsh->start_pfn_64 = kdsh_ARM_target->start_pfn;
+		kdsh->end_pfn_64 = kdsh_ARM_target->end_pfn;
+		kdsh->max_mapnr_64 = dd->map_mapnr;
+	}
 }
 #endif  /* __i386__ && ARM */
 
@@ -390,7 +411,10 @@ read_dump_header(char *file)
 	struct disk_dump_sub_header *sub_header = NULL;
 	struct kdump_sub_header *sub_header_kdump = NULL;
 	size_t size;
-	int bitmap_len;
+	off_t bitmap_len;
+	char *bufptr;
+	size_t len;
+	size_t bytes_read;
 	int block_size = (int)sysconf(_SC_PAGESIZE);
 	off_t offset;
 	const off_t failed = (off_t)-1;
@@ -516,6 +540,13 @@ restart:
 			}
 		}
 		dd->sub_header = sub_header;
+
+		/* the 64bit max_mapnr only exists in sub-header of compressed
+		 * kdump file, if it's not a compressed kdump file, we have to
+		 * use the old 32bit max_mapnr in dumpfile header.
+		 * max_mapnr may be truncated here.
+		 */
+		dd->max_mapnr = header->max_mapnr;
 	} else if (KDUMP_CMPRS_VALID()) {
 		if ((sub_header_kdump = malloc(block_size)) == NULL)
 			error(FATAL, "compressed kdump: cannot malloc sub_header_kdump buffer\n");
@@ -540,8 +571,20 @@ restart:
 #if defined(__i386__) && defined(ARM)
 		arm_kdump_header_adjust(header->header_version);
 #endif
+		/* use 64bit max_mapnr in compressed kdump file sub-header */
+		if (header->header_version >= 6)
+			dd->max_mapnr = dd->sub_header_kdump->max_mapnr_64;
+		else {
+			dd->sub_header_kdump->start_pfn_64
+				= dd->sub_header_kdump->start_pfn;
+			dd->sub_header_kdump->end_pfn_64
+				= dd->sub_header_kdump->end_pfn;
+		}
 	}
 
+	if (header->header_version < 6)
+		dd->max_mapnr = header->max_mapnr;
+
 	/* read memory bitmap */
 	bitmap_len = block_size * header->bitmap_blocks;
 	dd->bitmap_len = bitmap_len;
@@ -571,10 +614,18 @@ restart:
 				DISKDUMP_VALID() ? "diskdump" : "compressed kdump");
 			goto err;
 		}
-		if (read(dd->dfd, dd->bitmap, bitmap_len) < bitmap_len) {
-			error(INFO, "%s: cannot read memory bitmap\n",
-				DISKDUMP_VALID() ? "diskdump" : "compressed kdump");
-			goto err;
+		bufptr = dd->bitmap;
+		len = bitmap_len;
+		while (len) {
+			bytes_read = read(dd->dfd, bufptr, len);
+			if (bytes_read  < 0) {
+				error(INFO, "%s: cannot read memory bitmap\n",
+					DISKDUMP_VALID() ? "diskdump"
+					: "compressed kdump");
+				goto err;
+			}
+			len -= bytes_read;
+			bufptr += bytes_read;
 		}
 	}
 
@@ -679,13 +730,13 @@ restart:
 	}
 
 	if (!is_split) {
-		max_sect_len = divideup(header->max_mapnr, BITMAP_SECT_LEN);
+		max_sect_len = divideup(dd->max_mapnr, BITMAP_SECT_LEN);
 		pfn = 0;
 		dd->filename = file;
 	}
 	else {
-		ulong start = sub_header_kdump->start_pfn;
-		ulong end = sub_header_kdump->end_pfn;
+		unsigned long long start = sub_header_kdump->start_pfn_64;
+		unsigned long long end = sub_header_kdump->end_pfn_64;
 		max_sect_len = divideup(end - start + 1, BITMAP_SECT_LEN);
 		pfn = start;
 	}
@@ -727,8 +778,9 @@ pfn_to_pos(ulong pfn)
 	ulong p1, p2;
 
 	if (KDUMP_SPLIT()) {
-		p1 = pfn - dd->sub_header_kdump->start_pfn;
-		p2 = round(p1, BITMAP_SECT_LEN) + dd->sub_header_kdump->start_pfn;
+		p1 = pfn - dd->sub_header_kdump->start_pfn_64;
+		p2 = round(p1, BITMAP_SECT_LEN)
+			+ dd->sub_header_kdump->start_pfn_64;
 	}
 	else {
 		p1 = pfn; 
@@ -1034,12 +1086,12 @@ read_diskdump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr)
 	if (KDUMP_SPLIT()) {
 		/* Find proper dd */
 		int i;
-		unsigned long start_pfn;
-		unsigned long end_pfn;
+		unsigned long long start_pfn;
+		unsigned long long end_pfn;
 
 		for (i=0; i<num_dumpfiles; i++) {
-			start_pfn = dd_list[i]->sub_header_kdump->start_pfn;
-			end_pfn = dd_list[i]->sub_header_kdump->end_pfn;
+			start_pfn = dd_list[i]->sub_header_kdump->start_pfn_64;
+			end_pfn = dd_list[i]->sub_header_kdump->end_pfn_64;
 			if ((pfn >= start_pfn) && (pfn <= end_pfn))	{
 				dd = dd_list[i];
 				break;
@@ -1058,14 +1110,14 @@ read_diskdump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr)
 	curpaddr = paddr & ~((physaddr_t)(dd->block_size-1));
 	page_offset = paddr & ((physaddr_t)(dd->block_size-1));
 
-	if ((pfn >= dd->header->max_mapnr) || !page_is_ram(pfn)) {
+	if ((pfn >= dd->max_mapnr) || !page_is_ram(pfn)) {
 		if (CRASHDEBUG(8)) {
 			fprintf(fp, "read_diskdump: SEEK_ERROR: "
 			    "paddr/pfn: %llx/%lx ",
 				(ulonglong)paddr, pfn);
-			if (pfn >= dd->header->max_mapnr)
-				fprintf(fp, "max_mapnr: %x\n",
-					dd->header->max_mapnr);
+			if (pfn >= dd->max_mapnr)
+				fprintf(fp, "max_mapnr: %llx\n",
+					dd->max_mapnr);
 			else
 				fprintf(fp, "!page_is_ram\n");
 		}
@@ -1662,6 +1714,23 @@ __diskdump_memory_dump(FILE *fp)
 				dump_eraseinfo(fp);
 			}
 		}
+		if (dh->header_version >= 6) {
+			fprintf(fp, "        start_pfn_64: ");
+			if (KDUMP_SPLIT())
+				fprintf(fp, "%lld (0x%llx)\n",
+					kdsh->start_pfn_64, kdsh->start_pfn_64);
+			else
+				fprintf(fp, "(unused)\n");
+			fprintf(fp, "          end_pfn_64: ");
+			if (KDUMP_SPLIT())
+				fprintf(fp, "%lld (0x%llx)\n",
+					kdsh->end_pfn_64, kdsh->end_pfn_64);
+			else
+				fprintf(fp, "(unused)\n");
+
+			fprintf(fp, "        max_mapnr_64: %llu (0x%llx)\n",
+				kdsh->max_mapnr_64, kdsh->max_mapnr_64);
+		}
 		fprintf(fp, "\n");
 	} else
         	fprintf(fp, "(n/a)\n\n");
@@ -1670,7 +1739,7 @@ __diskdump_memory_dump(FILE *fp)
 	fprintf(fp, "        block_size: %d\n", dd->block_size);
 	fprintf(fp, "       block_shift: %d\n", dd->block_shift);
 	fprintf(fp, "            bitmap: %lx\n", (ulong)dd->bitmap);
-	fprintf(fp, "        bitmap_len: %d\n", dd->bitmap_len);
+	fprintf(fp, "        bitmap_len: %ld\n", dd->bitmap_len);
 	fprintf(fp, "   dumpable_bitmap: %lx\n", (ulong)dd->dumpable_bitmap);
 	fprintf(fp, "              byte: %d\n", dd->byte);
 	fprintf(fp, "               bit: %d\n", dd->bit);
diff --git a/diskdump.h b/diskdump.h
index 9ab10b6..88c5be9 100644
--- a/diskdump.h
+++ b/diskdump.h
@@ -42,7 +42,9 @@ struct disk_dump_header {
 						   header in blocks */
 	unsigned int		bitmap_blocks;	/* Size of Memory bitmap in
 						   block */
-	unsigned int		max_mapnr;	/* = max_mapnr */
+	unsigned int		max_mapnr;	/* = max_mapnr, OBSOLETE!
+						   32bit only, full 64bit
+						   in sub header. */
 	unsigned int		total_ram_blocks;/* Number of blocks should be
 						   written */
 	unsigned int		device_blocks;	/* Number of total blocks in
@@ -61,14 +63,21 @@ struct kdump_sub_header {
 	unsigned long	phys_base;
 	int		dump_level;         /* header_version 1 and later */
 	int		split;              /* header_version 2 and later */
-	unsigned long	start_pfn;          /* header_version 2 and later */
-	unsigned long	end_pfn;            /* header_version 2 and later */
+	unsigned long	start_pfn;          /* header_version 2 and later,
+					       OBSOLETE! 32bit only, full 64bit
+					       in start_pfn_64. */
+	unsigned long	end_pfn;            /* header_version 2 and later,
+					       OBSOLETE! 32bit only, full 64bit
+					       in end_pfn_64. */
 	off_t		offset_vmcoreinfo;  /* header_version 3 and later */
 	unsigned long	size_vmcoreinfo;    /* header_version 3 and later */
 	off_t		offset_note;        /* header_version 4 and later */
 	unsigned long	size_note;          /* header_version 4 and later */
 	off_t		offset_eraseinfo;   /* header_version 5 and later */
 	unsigned long	size_eraseinfo;     /* header_version 5 and later */
+	unsigned long long start_pfn_64;    /* header_version 6 and later */
+	unsigned long long end_pfn_64;      /* header_version 6 and later */
+	unsigned long long max_mapnr_64;    /* header_version 6 and later */
 };
 
 /* page flags */
                                
                         
                        
                                
                                12 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v4] makedumpfile: fix max_mapnr issue on system has over 44-bit addressing
                                
                                
                                
                                    
                                        by Jingbai Ma
                                    
                                
                                
                                        This patch will fix a bug of makedumpfile doesn't work correctly on system
has over 44-bit addressing in compression dump mode.
This bug was posted here:
http://lists.infradead.org/pipermail/kexec/2013-September/009587.html
This patch will add 3 new fields in struct kdump_sub_header.
unsigned long long start_pfn_64;  /* header_version 6 and later */
unsigned long long end_pfn_64;    /* header_version 6 and later */
unsigned long long max_mapnr_64;  /* header_version 6 and later */
And the old "unsigned int max_mapnr" in struct disk_dump_header will
not be used anymore, but still be there for compatibility purpose.
The max_mapnr_64 only exists in strcut kdump_sub_header, and that only
for compressed kdump format, so for ELF format kdump files (non-compressed),
only the max_mapnr is available, so it still may be truncated for addresses
exceed 44bit (above 16TB).
This patch will change the header_version to 6.
The corresponding patch for crash utility can be found here:
http://lists.infradead.org/pipermail/kexec/2013-October/009750.html
This patch doesn't change sadump_header.
Changelog:
v4:
- Do not change max_mapnr_64 in kdump_sub_header in memory for old kernel.
v3:
- Change notes for max_mapnr, start_pfn and end_pfn as obsolete.
- Remove "(32bit)" from debug messages of max_mapnr, start_pfn and end_pfn.
- Set the 32bit start_pfn and end_pfn to UINT_MAX.
- Remove bitmap writting enhancement to another seperate patch.
- Change type of len_bitmap in struct DumpInfo back to unsigned long.
v2:
- Rename max_mapnr in struct kdump_sub_header to max_mapnr_64.
- Change type of max_mapnr_64 from unsigned long to unsigned long long.
  In x86 PAE mode on x86_32 kernel, the address may exceeds 44bit limit.
- Add start_pfn_64, end_pfn_64 for struct kdump_sub_header.
- Only print 64bit start_pfn_64, end_pfn_64 and max_mapnr_64
  debug messages for disk dump header version >= 6.
- Change type of bitmap_len in struct DumpInfo, from unsigned long to
  unsigned long long.
- Enhance bitmap writting function in reassemble_kdump_header().
  Prevent bitmap writting failure if the size of bitmap is too large to
  fit a sigle write.
v1:
- http://lists.infradead.org/pipermail/kexec/2013-September/009662.html
Signed-off-by: Jingbai Ma <jingbai.ma(a)hp.com>
Tested-by: Lisa Mitchell <lisa.mitchell(a)hp.com>
---
 IMPLEMENTATION |   15 ++++++++++--
 diskdump_mod.h |   15 ++++++++++--
 makedumpfile.c |   68 ++++++++++++++++++++++++++++++++++++++++++++------------
 3 files changed, 78 insertions(+), 20 deletions(-)
diff --git a/IMPLEMENTATION b/IMPLEMENTATION
index f0f3135..2f4cfd6 100644
--- a/IMPLEMENTATION
+++ b/IMPLEMENTATION
@@ -48,7 +48,9 @@
                                                    header in blocks */
         unsigned int            bitmap_blocks;  /* Size of Memory bitmap in
                                                    block */
-        unsigned int            max_mapnr;      /* = max_mapnr */
+        unsigned int            max_mapnr;      /* = max_mapnr, OBSOLETE!
+						   32bit only, full 64bit
+						   in sub header. */
         unsigned int            total_ram_blocks;/* Number of blocks should be
                                                    written */
         unsigned int            device_blocks;  /* Number of total blocks in
@@ -69,14 +71,21 @@
         unsigned long   phys_base;
         int             dump_level;     /* header_version 1 and later */
 	int		split;		/* header_version 2 and later */
-	unsigned long	start_pfn;	/* header_version 2 and later */
-	unsigned long	end_pfn;	/* header_version 2 and later */
+	unsigned long	start_pfn;	/* header_version 2 and later,
+					   OBSOLETE! 32bit only, full
+					   64bit in start_pfn_64. */
+	unsigned long	end_pfn;	/* header_version 2 and later,
+					   OBSOLETE! 32bit only, full
+					   64bit in end_pfn_64. */
 	off_t		offset_vmcoreinfo;/* header_version 3 and later */
 	unsigned long	size_vmcoreinfo;  /* header_version 3 and later */
 	off_t		offset_note;      /* header_version 4 and later */
 	unsigned long	size_note;        /* header_version 4 and later */
 	off_t		offset_eraseinfo; /* header_version 5 and later */
 	unsigned long	size_eraseinfo;   /* header_version 5 and later */
+	unsigned long long start_pfn_64;  /* header_version 6 and later */
+	unsigned long long end_pfn_64;	  /* header_version 6 and later */
+	unsigned long long max_mapnr_64;  /* header_version 6 and later */
     };
 
   - 1st-bitmap
diff --git a/diskdump_mod.h b/diskdump_mod.h
index af060b6..7306867 100644
--- a/diskdump_mod.h
+++ b/diskdump_mod.h
@@ -48,7 +48,9 @@ struct disk_dump_header {
 						   header in blocks */
 	unsigned int		bitmap_blocks;	/* Size of Memory bitmap in
 						   block */
-	unsigned int		max_mapnr;	/* = max_mapnr */
+	unsigned int		max_mapnr;	/* = max_mapnr, OBSOLETE!
+						   32bit only, full 64bit
+						   in sub header. */
 	unsigned int		total_ram_blocks;/* Number of blocks should be
 						   written */
 	unsigned int		device_blocks;	/* Number of total blocks in
@@ -67,14 +69,21 @@ struct kdump_sub_header {
 	unsigned long	phys_base;
 	int		dump_level;	/* header_version 1 and later */
 	int		split;		/* header_version 2 and later */
-	unsigned long	start_pfn;	/* header_version 2 and later */
-	unsigned long	end_pfn;	/* header_version 2 and later */
+	unsigned long	start_pfn;	/* header_version 2 and later,
+					   OBSOLETE! 32bit only, full
+					   64bit in start_pfn_64. */
+	unsigned long	end_pfn;	/* header_version 2 and later,
+					   OBSOLETE! 32bit only, full
+					   64bit in end_pfn_64. */
 	off_t		offset_vmcoreinfo;/* header_version 3 and later */
 	unsigned long	size_vmcoreinfo;  /* header_version 3 and later */
 	off_t		offset_note;      /* header_version 4 and later */
 	unsigned long	size_note;        /* header_version 4 and later */
 	off_t		offset_eraseinfo; /* header_version 5 and later */
 	unsigned long	size_eraseinfo;   /* header_version 5 and later */
+	unsigned long long start_pfn_64;  /* header_version 6 and later */
+	unsigned long long end_pfn_64;	  /* header_version 6 and later */
+	unsigned long long max_mapnr_64;  /* header_version 6 and later */
 };
 
 /* page flags */
diff --git a/makedumpfile.c b/makedumpfile.c
index b42565c..b41b1ad 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -23,6 +23,7 @@
 #include <stddef.h>
 #include <ctype.h>
 #include <sys/time.h>
+#include <limits.h>
 
 struct symbol_table	symbol_table;
 struct size_table	size_table;
@@ -125,7 +126,10 @@ get_max_mapnr(void)
 	unsigned long long max_paddr;
 
 	if (info->flag_refiltering) {
-		info->max_mapnr = info->dh_memory->max_mapnr;
+		if (info->dh_memory->header_version >= 6)
+			info->max_mapnr = info->kh_memory->max_mapnr_64;
+		else
+			info->max_mapnr = info->dh_memory->max_mapnr;
 		return TRUE;
 	}
 
@@ -802,6 +806,12 @@ get_kdump_compressed_header_info(char *filename)
 	DEBUG_MSG("  split            : %d\n", kh.split);
 	DEBUG_MSG("  start_pfn        : 0x%lx\n", kh.start_pfn);
 	DEBUG_MSG("  end_pfn          : 0x%lx\n", kh.end_pfn);
+	if (dh.header_version >= 6) {
+		/* A dumpfile contains full 64bit values. */
+		DEBUG_MSG("  start_pfn_64 : 0x%llx\n", kh.start_pfn_64);
+		DEBUG_MSG("  end_pfn_64 : 0x%llx\n", kh.end_pfn_64);
+		DEBUG_MSG("  max_mapnr_64 : 0x%llx\n", kh.max_mapnr_64);
+	}
 
 	info->dh_memory = malloc(sizeof(dh));
 	if (info->dh_memory == NULL) {
@@ -2766,14 +2776,16 @@ int
 initialize_bitmap_memory(void)
 {
 	struct disk_dump_header	*dh;
+	struct kdump_sub_header *kh;
 	struct dump_bitmap *bmp;
 	off_t bitmap_offset;
-	int bitmap_len, max_sect_len;
+	off_t bitmap_len, max_sect_len;
 	unsigned long pfn;
 	int i, j;
 	long block_size;
 
 	dh = info->dh_memory;
+	kh = info->kh_memory;
 	block_size = dh->block_size;
 
 	bitmap_offset
@@ -2793,7 +2805,10 @@ initialize_bitmap_memory(void)
 	bmp->offset = bitmap_offset + bitmap_len / 2;
 	info->bitmap_memory = bmp;
 
-	max_sect_len = divideup(dh->max_mapnr, BITMAP_SECT_LEN);
+	if (dh->header_version >= 6)
+		max_sect_len = divideup(kh->max_mapnr_64, BITMAP_SECT_LEN);
+	else
+		max_sect_len = divideup(dh->max_mapnr, BITMAP_SECT_LEN);
 	info->valid_pages = calloc(sizeof(ulong), max_sect_len);
 	if (info->valid_pages == NULL) {
 		ERRMSG("Can't allocate memory for the valid_pages. %s\n",
@@ -4705,7 +4720,7 @@ create_2nd_bitmap(void)
 int
 prepare_bitmap_buffer(void)
 {
-	unsigned long tmp;
+	unsigned long long tmp;
 
 	/*
 	 * Create 2 bitmaps (1st-bitmap & 2nd-bitmap) on block_size boundary.
@@ -4737,7 +4752,7 @@ prepare_bitmap_buffer(void)
 int
 prepare_bitmap_buffer_cyclic(void)
 {
-	unsigned long tmp;
+	unsigned long long tmp;
 
 	/*
 	 * Create 2 bitmaps (1st-bitmap & 2nd-bitmap) on block_size boundary.
@@ -5153,11 +5168,12 @@ write_kdump_header(void)
 	 * Write common header
 	 */
 	strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
-	dh->header_version = 5;
+	dh->header_version = 6;
 	dh->block_size     = info->page_size;
 	dh->sub_hdr_size   = sizeof(kh) + size_note;
 	dh->sub_hdr_size   = divideup(dh->sub_hdr_size, dh->block_size);
-	dh->max_mapnr      = info->max_mapnr;
+	/* dh->max_mapnr may be truncated, full 64bit in kh.max_mapnr_64 */
+	dh->max_mapnr      = MIN(info->max_mapnr, UINT_MAX);
 	dh->nr_cpus        = get_nr_cpus();
 	dh->bitmap_blocks  = divideup(info->len_bitmap, dh->block_size);
 	memcpy(&dh->timestamp, &info->timestamp, sizeof(dh->timestamp));
@@ -5172,12 +5188,21 @@ write_kdump_header(void)
 	 */
 	size = sizeof(struct kdump_sub_header);
 	memset(&kh, 0, size);
+	/* 64bit max_mapnr_64 */
+	kh.max_mapnr_64 = info->max_mapnr;
 	kh.phys_base  = info->phys_base;
 	kh.dump_level = info->dump_level;
 	if (info->flag_split) {
 		kh.split = 1;
-		kh.start_pfn = info->split_start_pfn;
-		kh.end_pfn   = info->split_end_pfn;
+		/* start_pfn and end_pfn may be truncated,
+		 * only for compatibility purpose
+		 */
+		kh.start_pfn = MIN(info->split_start_pfn, UINT_MAX);
+		kh.end_pfn   = MIN(info->split_end_pfn, UINT_MAX);
+
+		/* 64bit start_pfn_64 and end_pfn_64 */
+		kh.start_pfn_64 = info->split_start_pfn;
+		kh.end_pfn_64   = info->split_end_pfn;
 	}
 	if (has_pt_note()) {
 		/*
@@ -6421,7 +6446,7 @@ int
 write_kdump_bitmap(void)
 {
 	struct cache_data bm;
-	long buf_size;
+	long long buf_size;
 	off_t offset;
 
 	int ret = FALSE;
@@ -7796,10 +7821,8 @@ store_splitting_info(void)
 
 		if (i == 0) {
 			memcpy(&dh, &tmp_dh, sizeof(tmp_dh));
-			info->max_mapnr = dh.max_mapnr;
 			if (!set_page_size(dh.block_size))
 				return FALSE;
-			DEBUG_MSG("max_mapnr    : %llx\n", info->max_mapnr);
 			DEBUG_MSG("page_size    : %ld\n", info->page_size);
 		}
 
@@ -7816,11 +7839,26 @@ store_splitting_info(void)
 			return FALSE;
 
 		if (i == 0) {
+			if (dh.header_version >= 6)
+				info->max_mapnr = kh.max_mapnr_64;
+			else
+				info->max_mapnr = dh.max_mapnr;
+
+			DEBUG_MSG("max_mapnr    : %llx\n", info->max_mapnr);
+		}
+
+		if (i == 0) {
 			info->dump_level = kh.dump_level;
 			DEBUG_MSG("dump_level   : %d\n", info->dump_level);
 		}
-		SPLITTING_START_PFN(i) = kh.start_pfn;
-		SPLITTING_END_PFN(i)   = kh.end_pfn;
+
+		if (dh.header_version >= 6) {
+			SPLITTING_START_PFN(i) = kh.start_pfn_64;
+			SPLITTING_END_PFN(i)   = kh.end_pfn_64;
+		} else {
+			SPLITTING_START_PFN(i) = kh.start_pfn;
+			SPLITTING_END_PFN(i)   = kh.end_pfn;
+		}
 		SPLITTING_OFFSET_EI(i) = kh.offset_eraseinfo;
 		SPLITTING_SIZE_EI(i)   = kh.size_eraseinfo;
 	}
@@ -7981,6 +8019,8 @@ reassemble_kdump_header(void)
 	kh.split = 0;
 	kh.start_pfn = 0;
 	kh.end_pfn   = 0;
+	kh.start_pfn_64 = 0;
+	kh.end_pfn_64 = 0;
 
 	if (lseek(info->fd_dumpfile, info->page_size, SEEK_SET) < 0) {
 		ERRMSG("Can't seek a file(%s). %s\n",
                                
                         
                        
                                
                                12 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH v2] Add support for kASLR for offline vmcore files
                                
                                
                                
                                    
                                        by Andy Honig
                                    
                                
                                
                                        
This patch adds a --kaslr command line parameter for loading x86_64
crash dumps with kaslr enabled.  This reuses the code from 32-bit
x86 relocations with some small changes.  The ASLR offset is postive
instead of negative.  Also had to move the code to traverse the
kernel section before the symbol storing code to figure out which
symbols were outside any sections and therefore were not relocated.
Also made a very small change in search_for_switch_to it was
searching through gdb command output which can be different under
different settings.
Tested: Tested by loading kdump files from kernels with aslr enabled
and not enabled.  Ran bt, files, and struct file 0xXXXXXX.
Signed-off-by: Andy Honig <ahonig(a)google.com>
---
 defs.h    |  2 ++
 main.c    |  8 ++++++--
 symbols.c | 66 +++++++++++++++++++++++++++++++++++++++++++++------------------
 x86_64.c  | 20 +++++++++++++------
 4 files changed, 69 insertions(+), 27 deletions(-)
diff --git a/defs.h b/defs.h
index 83a4402..8de1fa4 100755
--- a/defs.h
+++ b/defs.h
@@ -2394,6 +2394,8 @@ struct symbol_table_data {
 	ulong __per_cpu_end;
 	off_t dwarf_debug_frame_file_offset;
 	ulong dwarf_debug_frame_size;
+	ulong first_section_start;
+	ulong last_section_end;
 };
 
 /* flags for st */
diff --git a/main.c b/main.c
index 3b469e3..5a41c1a 100755
--- a/main.c
+++ b/main.c
@@ -57,6 +57,7 @@ static struct option long_options[] = {
         {"CRASHPAGER", 0, 0, 0},
         {"no_scroll", 0, 0, 0},
         {"reloc", required_argument, 0, 0},
+        {"kaslr", required_argument, 0, 0},
 	{"active", 0, 0, 0},
 	{"minimal", 0, 0, 0},
 	{"mod", required_argument, 0, 0},
@@ -216,12 +217,15 @@ main(int argc, char **argv)
 		        else if (STREQ(long_options[option_index].name, "mod"))
 				kt->module_tree = optarg;
 
-		        else if (STREQ(long_options[option_index].name, "reloc")) {
+		        else if (STREQ(long_options[option_index].name, "reloc") || 
+			         STREQ(long_options[option_index].name, "kaslr")) {
 				if (!calculate(optarg, &kt->relocate, NULL, 0)) {
 					error(INFO, "invalid --reloc argument: %s\n",
 						optarg);
 					program_usage(SHORT_FORM);
-				} 
+				} else if (STREQ(long_options[option_index].name, "kaslr")) {
+					kt->relocate *= -1;
+				}
 				kt->flags |= RELOC_SET;
 			}
 
diff --git a/symbols.c b/symbols.c
index 93d9c8c..345c0de 100755
--- a/symbols.c
+++ b/symbols.c
@@ -192,22 +192,6 @@ symtab_init(void)
 		if (!check_gnu_debuglink(st->bfd))
 			no_debugging_data(FATAL);
 	}
-	
-	symcount = bfd_read_minisymbols(st->bfd, FALSE, &minisyms, &size);
-
-	if (symcount <= 0)
-		no_debugging_data(FATAL);
-	
-	sort_x = bfd_make_empty_symbol(st->bfd);
-	sort_y = bfd_make_empty_symbol(st->bfd);
-	if (sort_x == NULL || sort_y == NULL)
-		error(FATAL, "bfd_make_empty_symbol() failed\n");
-	
-	gnu_qsort(st->bfd, minisyms, symcount, size, sort_x, sort_y);
-	
-	store_symbols(st->bfd, FALSE, minisyms, symcount, size);
-	
-	free(minisyms);
 
 	/*
 	 *  Gather references to the kernel sections.
@@ -217,6 +201,7 @@ symtab_init(void)
                 error(FATAL, "symbol table section array malloc: %s\n",
                         strerror(errno));
 	BZERO(st->sections, st->bfd->section_count * sizeof(struct sec *));
+	st->first_section_start = st->last_section_end = 0;
 
 	bfd_map_over_sections(st->bfd, section_header_info, KERNEL_SECTIONS);
 	if ((st->flags & (NO_SEC_LOAD|NO_SEC_CONTENTS)) ==
@@ -227,6 +212,23 @@ symtab_init(void)
 			error(FATAL, DEBUGINFO_ERROR_MESSAGE2);
 		}
 	}
+	
+	symcount = bfd_read_minisymbols(st->bfd, FALSE, &minisyms, &size);
+
+	if (symcount <= 0)
+		no_debugging_data(FATAL);
+	
+	sort_x = bfd_make_empty_symbol(st->bfd);
+	sort_y = bfd_make_empty_symbol(st->bfd);
+	if (sort_x == NULL || sort_y == NULL)
+		error(FATAL, "bfd_make_empty_symbol() failed\n");
+	
+	gnu_qsort(st->bfd, minisyms, symcount, size, sort_x, sort_y);
+	
+	store_symbols(st->bfd, FALSE, minisyms, symcount, size);
+	
+	free(minisyms);
+
 
 	symname_hash_init();
 	symval_hash_init();
@@ -585,7 +587,7 @@ store_symbols(bfd *abfd, int dynamic, void *minisyms, long symcount,
 	st->symcnt = 0;
 	sp = st->symtable;
 
-	if (machine_type("X86")) {
+	if (machine_type("X86") || machine_type("X86_64")) {
 		if (!(kt->flags & RELOC_SET))
 			kt->flags |= RELOC_FORCE;
 	} else
@@ -658,7 +660,7 @@ store_sysmap_symbols(void)
                 error(FATAL, "symbol table namespace malloc: %s\n",
                         strerror(errno));
 
-	if (!machine_type("X86"))
+	if (!machine_type("X86") && !machine_type("X86_64"))
 		kt->flags &= ~RELOC_SET;
 
 	first = 0;
@@ -730,7 +732,17 @@ relocate(ulong symval, char *symname, int first_symbol)
 		break;
 	}
 
-	return (symval - kt->relocate);
+	/*
+	 * There are some symbols which are outside of any section
+	 * either because they are offsets or because they are absolute
+	 * addresses.  These should not be relocated.
+	 */
+	if (symval >= st->first_section_start &&
+		symval <= st->last_section_end) {
+		return (symval - kt->relocate);
+	} else {
+		return symval;
+	}
 }
 
 /*
@@ -9506,6 +9518,7 @@ section_header_info(bfd *bfd, asection *section, void *reqptr)
 	struct load_module *lm;
 	ulong request;
         asection **sec;
+	ulong section_end_address;
 
 	request = ((ulong)reqptr);
 
@@ -9524,6 +9537,12 @@ section_header_info(bfd *bfd, asection *section, void *reqptr)
                 	kt->etext_init = kt->stext_init +
                         	(ulong)bfd_section_size(bfd, section);
 		}
+
+		if (STREQ(bfd_get_section_name(bfd, section), ".text")) {
+			st->first_section_start = (ulong)
+				bfd_get_section_vma(bfd, section);
+		}
+
                 if (STREQ(bfd_get_section_name(bfd, section), ".text") ||
                     STREQ(bfd_get_section_name(bfd, section), ".data")) {
                         if (!(bfd_get_section_flags(bfd, section) & SEC_LOAD))
@@ -9540,6 +9559,15 @@ section_header_info(bfd *bfd, asection *section, void *reqptr)
 			st->dwarf_debug_frame_file_offset = (off_t)section->filepos;
 			st->dwarf_debug_frame_size = (ulong)bfd_section_size(bfd, section);
 		}
+
+		if (st->first_section_start != 0) {
+			section_end_address =
+				(ulong) bfd_get_section_vma(bfd, section) +
+				(ulong) bfd_section_size(bfd, section);
+			if (section_end_address > st->last_section_end)
+				st->last_section_end = section_end_address;
+		}
+
 		break;
 
 	case (ulong)MODULE_SECTIONS:
diff --git a/x86_64.c b/x86_64.c
index 1d915b1..0c22ee1 100755
--- a/x86_64.c
+++ b/x86_64.c
@@ -5382,16 +5382,22 @@ search_for_switch_to(ulong start, ulong end)
 {
 	ulong max_instructions, address;
 	char buf1[BUFSIZE];
-	char buf2[BUFSIZE];
+	char search_string1[BUFSIZE];
+	char search_string2[BUFSIZE];
 	int found;
 
 	max_instructions = end - start;
 	found = FALSE;
 	sprintf(buf1, "x/%ldi 0x%lx", max_instructions, start);
-	if (symbol_exists("__switch_to"))
-		sprintf(buf2, "callq  0x%lx", symbol_value("__switch_to"));
-	else
-		buf2[0] = NULLCHAR;
+	if (symbol_exists("__switch_to")) {
+		sprintf(search_string1,
+			"call   0x%lx", symbol_value("__switch_to"));
+		sprintf(search_string2,
+			"callq  0x%lx", symbol_value("__switch_to"));
+	} else {
+		search_string1[0] = NULLCHAR;
+		search_string2[0] = NULLCHAR;
+	}
 
 	open_tmpfile();
 
@@ -5404,7 +5410,9 @@ search_for_switch_to(ulong start, ulong end)
 			break;
 		if (strstr(buf1, "<__switch_to>"))
 			found = TRUE;
-		if (strlen(buf2) && strstr(buf1, buf2))
+		if (strlen(search_string1) && strstr(buf1, search_string1))
+			found = TRUE;
+		if (strlen(search_string2) && strstr(buf1, search_string2))
 			found = TRUE;
 	}
 	close_tmpfile();
-- 
1.8.4
                                
                         
                        
                                
                                12 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                        
                                
                                
                                        
                                                
                                        
                                        
                                        Crash support for kASLR
                                
                                
                                
                                    
                                        by Andrew Honig
                                    
                                
                                
                                        I'm trying to add crash support for kdumps from kASLR'd kernels.  I've
got it working with a few small changes and I wanted to solicit
comments before sending a patch.
1) The --reloc flag appears to specify an offset to be subtracted from
the loaded address, when the aslr offset is added.  It's annoying to
try to specify negative numbers on the command line, so I'd like to
add another argument --aslr which is the same as --reloc but negates
the value.
2) There are some symbols which should not be relocated.  Specifically
the per_cpu section symbols are zero based offsets which should not
have the offset apply.  Additionally there are VDSO symbols which are
fixed even with kASLR enabled.  To fix this I'd like to add code to
iterate through the section and find the end of the last section and
only apply the relocation value to values after the start of text but
before the end of the last section.
thanks,
Andy
                                
                         
                        
                                
                                12 years
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] Add support for kASLR for offline vmcore filesB
                                
                                
                                
                                    
                                        by Andy Honig
                                    
                                
                                
                                        
This patch adds a --kaslr command line parameter for loading x86_64
crash dumps with kaslr enabled.  This reuses the code from 32-bit
x86 relocations with some small changes.  The ASLR offset is postive
instead of negative.  Also had to move the code to traverse the
kernel section before the symbol storing code to figure out which
symbols were outside any sections and therefore were not relocated.
Also made a very small change in search_for_switch_to it was
searching through gdb command output for a slightly incorrect syntax.
Still TODO is getting the relocation address from the vmcoreinfo to make
the --kaslr argument unnecessary.
Tested: Tested by loading kdump files from kernels with aslr enabled
and not enabled.  Ran bt, files, and struct file 0xXXXXXX.
Signed-off-by: Andy Honig <ahonig(a)google.com>
---
 defs.h    |  2 ++
 main.c    |  8 ++++++--
 symbols.c | 66 +++++++++++++++++++++++++++++++++++++++++++++------------------
 x86_64.c  |  2 +-
 4 files changed, 56 insertions(+), 22 deletions(-)
diff --git a/defs.h b/defs.h
index 83a4402..8de1fa4 100755
--- a/defs.h
+++ b/defs.h
@@ -2394,6 +2394,8 @@ struct symbol_table_data {
 	ulong __per_cpu_end;
 	off_t dwarf_debug_frame_file_offset;
 	ulong dwarf_debug_frame_size;
+	ulong first_section_start;
+	ulong last_section_end;
 };
 
 /* flags for st */
diff --git a/main.c b/main.c
index 3b469e3..5a41c1a 100755
--- a/main.c
+++ b/main.c
@@ -57,6 +57,7 @@ static struct option long_options[] = {
         {"CRASHPAGER", 0, 0, 0},
         {"no_scroll", 0, 0, 0},
         {"reloc", required_argument, 0, 0},
+        {"kaslr", required_argument, 0, 0},
 	{"active", 0, 0, 0},
 	{"minimal", 0, 0, 0},
 	{"mod", required_argument, 0, 0},
@@ -216,12 +217,15 @@ main(int argc, char **argv)
 		        else if (STREQ(long_options[option_index].name, "mod"))
 				kt->module_tree = optarg;
 
-		        else if (STREQ(long_options[option_index].name, "reloc")) {
+		        else if (STREQ(long_options[option_index].name, "reloc") || 
+			         STREQ(long_options[option_index].name, "kaslr")) {
 				if (!calculate(optarg, &kt->relocate, NULL, 0)) {
 					error(INFO, "invalid --reloc argument: %s\n",
 						optarg);
 					program_usage(SHORT_FORM);
-				} 
+				} else if (STREQ(long_options[option_index].name, "kaslr")) {
+					kt->relocate *= -1;
+				}
 				kt->flags |= RELOC_SET;
 			}
 
diff --git a/symbols.c b/symbols.c
index 93d9c8c..345c0de 100755
--- a/symbols.c
+++ b/symbols.c
@@ -192,22 +192,6 @@ symtab_init(void)
 		if (!check_gnu_debuglink(st->bfd))
 			no_debugging_data(FATAL);
 	}
-	
-	symcount = bfd_read_minisymbols(st->bfd, FALSE, &minisyms, &size);
-
-	if (symcount <= 0)
-		no_debugging_data(FATAL);
-	
-	sort_x = bfd_make_empty_symbol(st->bfd);
-	sort_y = bfd_make_empty_symbol(st->bfd);
-	if (sort_x == NULL || sort_y == NULL)
-		error(FATAL, "bfd_make_empty_symbol() failed\n");
-	
-	gnu_qsort(st->bfd, minisyms, symcount, size, sort_x, sort_y);
-	
-	store_symbols(st->bfd, FALSE, minisyms, symcount, size);
-	
-	free(minisyms);
 
 	/*
 	 *  Gather references to the kernel sections.
@@ -217,6 +201,7 @@ symtab_init(void)
                 error(FATAL, "symbol table section array malloc: %s\n",
                         strerror(errno));
 	BZERO(st->sections, st->bfd->section_count * sizeof(struct sec *));
+	st->first_section_start = st->last_section_end = 0;
 
 	bfd_map_over_sections(st->bfd, section_header_info, KERNEL_SECTIONS);
 	if ((st->flags & (NO_SEC_LOAD|NO_SEC_CONTENTS)) ==
@@ -227,6 +212,23 @@ symtab_init(void)
 			error(FATAL, DEBUGINFO_ERROR_MESSAGE2);
 		}
 	}
+	
+	symcount = bfd_read_minisymbols(st->bfd, FALSE, &minisyms, &size);
+
+	if (symcount <= 0)
+		no_debugging_data(FATAL);
+	
+	sort_x = bfd_make_empty_symbol(st->bfd);
+	sort_y = bfd_make_empty_symbol(st->bfd);
+	if (sort_x == NULL || sort_y == NULL)
+		error(FATAL, "bfd_make_empty_symbol() failed\n");
+	
+	gnu_qsort(st->bfd, minisyms, symcount, size, sort_x, sort_y);
+	
+	store_symbols(st->bfd, FALSE, minisyms, symcount, size);
+	
+	free(minisyms);
+
 
 	symname_hash_init();
 	symval_hash_init();
@@ -585,7 +587,7 @@ store_symbols(bfd *abfd, int dynamic, void *minisyms, long symcount,
 	st->symcnt = 0;
 	sp = st->symtable;
 
-	if (machine_type("X86")) {
+	if (machine_type("X86") || machine_type("X86_64")) {
 		if (!(kt->flags & RELOC_SET))
 			kt->flags |= RELOC_FORCE;
 	} else
@@ -658,7 +660,7 @@ store_sysmap_symbols(void)
                 error(FATAL, "symbol table namespace malloc: %s\n",
                         strerror(errno));
 
-	if (!machine_type("X86"))
+	if (!machine_type("X86") && !machine_type("X86_64"))
 		kt->flags &= ~RELOC_SET;
 
 	first = 0;
@@ -730,7 +732,17 @@ relocate(ulong symval, char *symname, int first_symbol)
 		break;
 	}
 
-	return (symval - kt->relocate);
+	/*
+	 * There are some symbols which are outside of any section
+	 * either because they are offsets or because they are absolute
+	 * addresses.  These should not be relocated.
+	 */
+	if (symval >= st->first_section_start &&
+		symval <= st->last_section_end) {
+		return (symval - kt->relocate);
+	} else {
+		return symval;
+	}
 }
 
 /*
@@ -9506,6 +9518,7 @@ section_header_info(bfd *bfd, asection *section, void *reqptr)
 	struct load_module *lm;
 	ulong request;
         asection **sec;
+	ulong section_end_address;
 
 	request = ((ulong)reqptr);
 
@@ -9524,6 +9537,12 @@ section_header_info(bfd *bfd, asection *section, void *reqptr)
                 	kt->etext_init = kt->stext_init +
                         	(ulong)bfd_section_size(bfd, section);
 		}
+
+		if (STREQ(bfd_get_section_name(bfd, section), ".text")) {
+			st->first_section_start = (ulong)
+				bfd_get_section_vma(bfd, section);
+		}
+
                 if (STREQ(bfd_get_section_name(bfd, section), ".text") ||
                     STREQ(bfd_get_section_name(bfd, section), ".data")) {
                         if (!(bfd_get_section_flags(bfd, section) & SEC_LOAD))
@@ -9540,6 +9559,15 @@ section_header_info(bfd *bfd, asection *section, void *reqptr)
 			st->dwarf_debug_frame_file_offset = (off_t)section->filepos;
 			st->dwarf_debug_frame_size = (ulong)bfd_section_size(bfd, section);
 		}
+
+		if (st->first_section_start != 0) {
+			section_end_address =
+				(ulong) bfd_get_section_vma(bfd, section) +
+				(ulong) bfd_section_size(bfd, section);
+			if (section_end_address > st->last_section_end)
+				st->last_section_end = section_end_address;
+		}
+
 		break;
 
 	case (ulong)MODULE_SECTIONS:
diff --git a/x86_64.c b/x86_64.c
index 1d915b1..f4334ac 100755
--- a/x86_64.c
+++ b/x86_64.c
@@ -5389,7 +5389,7 @@ search_for_switch_to(ulong start, ulong end)
 	found = FALSE;
 	sprintf(buf1, "x/%ldi 0x%lx", max_instructions, start);
 	if (symbol_exists("__switch_to"))
-		sprintf(buf2, "callq  0x%lx", symbol_value("__switch_to"));
+		sprintf(buf2, "call   0x%lx", symbol_value("__switch_to"));
 	else
 		buf2[0] = NULLCHAR;
 
-- 
1.8.4
                                
                         
                        
                                
                                12 years