----- Original Message -----
In kernel v4.6, Kernel ASLR (KASLR) is supported on arm64, and the
start
address of the kernel image can be randomized if CONFIG_RANDOMIZE_BASE is enabled.
Even worse, the kernel image is no more mapped in the linear mapping, but
in vmalloc area (i.e. below PAGE_OFFSET).
Now, according to the kernel's memory.h, converting a virtual address to
a physical address should be done like below:
phys_addr_t __x = (phys_addr_t)(x); \
__x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \
(__x - kimage_voffset); })
Please note that PHYS_OFFSET is no more equal to the start address of
the first usable memory block in SYSTEM RAM due to the fact mentioned
above.
This patch addresses this change and allows the crash utility to access
memory contents with correct addresses.
* On a live system, crash with this patch won't work, especially
with CONFIG_RANDOMIZE_RAM configured, because we currently have no way
to know kimage_voffset.
Actually it does work on a live system if CONFIG_RANDOMIZE_BASE is not
configured. And it also works on live pre-NEW_VMEMMAP kernels now that
the VA_BITS calculation has been fixed.
* For a core dump file, we can do simply:
$ crash <vmlinux> <vmcore>
as long as the file has "NUMBER(kimage_voffset)"
(RELOC_AUTO|KASLR is automatically set.)
I'm planning to add this enhancement in my next version of kexec/kdump
patch, i.e. v17.
Signed-off-by: AKASHI Takahiro <takahiro.akashi(a)linaro.org>
---
arm64.c | 218
++++++++++++++++++++++++++++++++++++++++++++++++--------------
defs.h | 24 +++++--
main.c | 7 +-
symbols.c | 12 ++--
4 files changed, 197 insertions(+), 64 deletions(-)
diff --git a/arm64.c b/arm64.c
index 34c8c59..c16ea67 100644
--- a/arm64.c
+++ b/arm64.c
@@ -72,6 +72,21 @@ static int arm64_get_crash_notes(void);
static void arm64_calc_VA_BITS(void);
static int arm64_is_uvaddr(ulong, struct task_context *);
+ulong
+arm64_VTOP(ulong addr)
+{
+ if (!(machdep->flags & NEW_VMEMMAP) ||
+ (addr >= machdep->machspec->page_offset)) {
+ return machdep->machspec->phys_offset
+ + (addr - machdep->machspec->page_offset);
+ } else {
+ if (machdep->machspec->kimage_voffset)
+ return addr - machdep->machspec->kimage_voffset;
+ else /* no randomness */
+ return machdep->machspec->phys_offset
+ + (addr - machdep->machspec->vmalloc_start_addr);
+ }
+}
The mixture of NEW_VMEMMAP and the old/new unity-mapped translation
is confusing. For the sake of simplicity, can you please make it look
something like:
if (machdep->flags & NEW_VMEMMAP) {
/* handle both mapped kernel and unity-mapped addresses here */
...
} else {
/* the old VTOP macro */
...
}
I don't care if there's duplication of effort for the unity-mapped regions.
I just want it to make it more obvious what's going on.
/*
* Do all necessary machine-specific setup here. This is called several
times
@@ -81,6 +96,7 @@ void
arm64_init(int when)
{
ulong value;
+ char *string;
struct machine_specific *ms;
#if defined(__x86_64__)
@@ -102,9 +118,33 @@ arm64_init(int when)
if (machdep->cmdline_args[0])
arm64_parse_cmdline_args();
machdep->flags |= MACHDEP_BT_TEXT;
+
+ ms = machdep->machspec;
+ if (!ms->kimage_voffset &&
+ (string = pc->read_vmcoreinfo("NUMBER(kimage_voffset)"))) {
+ ms->kimage_voffset = htol(string, QUIET, NULL);
+ free(string);
+ }
+
+ if (ms->kimage_voffset) {
+ machdep->flags |= NEW_VMEMMAP;
+
+ /*
+ * Even if CONFIG_RANDOMIZE_RAM is not configured,
s/CONFIG_RANDOMIZE_RAM/CONFIG_RANDOMIZE_BASE/
+ * derive_kaslr_offset() should work and set
+ * kt->relocate to 0
+ */
+ if (!kt->relocate && !(kt->flags2 & (RELOC_AUTO|KASLR)))
+ kt->flags2 |= (RELOC_AUTO|KASLR);
+ }
+
break;
case PRE_GDB:
+ /* This check is somewhat redundant */
+ if (kernel_symbol_exists("kimage_voffset"))
+ machdep->flags |= NEW_VMEMMAP;
+
Certainly not redundant on a live system without CONFIG_RANDOMIZE_BASE.
if (!machdep->pagesize) {
/*
* Kerneldoc Documentation/arm64/booting.txt describes
@@ -160,16 +200,35 @@ arm64_init(int when)
machdep->pagemask = ~((ulonglong)machdep->pageoffset);
arm64_calc_VA_BITS();
- machdep->machspec->page_offset = ARM64_PAGE_OFFSET;
+ ms = machdep->machspec;
+ ms->page_offset = ARM64_PAGE_OFFSET;
+ /* FIXME: idmap for NEW_VMEMMAP */
What's the FIXME here?
machdep->identity_map_base = ARM64_PAGE_OFFSET;
- machdep->machspec->userspace_top = ARM64_USERSPACE_TOP;
- machdep->machspec->modules_vaddr = ARM64_MODULES_VADDR;
- machdep->machspec->modules_end = ARM64_MODULES_END;
- machdep->machspec->vmalloc_start_addr = ARM64_VMALLOC_START;
- machdep->machspec->vmalloc_end = ARM64_VMALLOC_END;
- machdep->kvbase = ARM64_VMALLOC_START;
- machdep->machspec->vmemmap_vaddr = ARM64_VMEMMAP_VADDR;
- machdep->machspec->vmemmap_end = ARM64_VMEMMAP_END;
+ machdep->kvbase = ARM64_VA_START;
+ ms->userspace_top = ARM64_USERSPACE_TOP;
+ if (machdep->flags & NEW_VMEMMAP) {
+ struct syment *sp;
+
+ sp = kernel_symbol_search("_text");
+ ms->kimage_text = (sp ? sp->value : 0);
+ sp = kernel_symbol_search("_end");
+ ms->kimage_end = (sp ? sp->value : 0);
+
+ ms->modules_vaddr = ARM64_VA_START;
+ if (kernel_symbol_exists("kasan_init"))
+ ms->modules_vaddr += ARM64_KASAN_SHADOW_SIZE;
+ ms->modules_end = ms->modules_vaddr
+ + ARM64_MODULES_VSIZE -1;
+
+ ms->vmalloc_start_addr = ms->modules_end + 1;
+ } else {
+ ms->modules_vaddr = ARM64_PAGE_OFFSET - MEGABYTES(64);
+ ms->modules_end = ARM64_PAGE_OFFSET - 1;
+ ms->vmalloc_start_addr = ARM64_VA_START;
+ }
+ ms->vmalloc_end = ARM64_VMALLOC_END;
+ ms->vmemmap_vaddr = ARM64_VMEMMAP_VADDR;
+ ms->vmemmap_end = ARM64_VMEMMAP_END;
switch (machdep->pagesize)
{
@@ -232,8 +291,6 @@ arm64_init(int when)
machdep->stacksize = ARM64_STACK_SIZE;
machdep->flags |= VMEMMAP;
- arm64_calc_phys_offset();
-
machdep->uvtop = arm64_uvtop;
machdep->kvtop = arm64_kvtop;
machdep->is_kvaddr = generic_is_kvaddr;
@@ -262,6 +319,10 @@ arm64_init(int when)
machdep->dumpfile_init = NULL;
machdep->verify_line_number = NULL;
machdep->init_kernel_pgd = arm64_init_kernel_pgd;
+
+ /* use machdep parameters */
+ arm64_calc_phys_offset();
+
We'll continue to beat a dead horse below in the arm64_calc_phys_offset() patch...
break;
case POST_GDB:
@@ -409,6 +470,8 @@ arm64_dump_machdep_table(ulong arg)
fprintf(fp, "%sIRQ_STACKS", others++ ? "|" : "");
if (machdep->flags & MACHDEP_BT_TEXT)
fprintf(fp, "%sMACHDEP_BT_TEXT", others++ ? "|" : "");
+ if (machdep->flags & NEW_VMEMMAP)
+ fprintf(fp, "%sNEW_VMEMMAP", others++ ? "|" : "");
fprintf(fp, ")\n");
fprintf(fp, " kvbase: %lx\n", machdep->kvbase);
@@ -503,6 +566,11 @@ arm64_dump_machdep_table(ulong arg)
fprintf(fp, " modules_end: %016lx\n", ms->modules_end);
fprintf(fp, " vmemmap_vaddr: %016lx\n", ms->vmemmap_vaddr);
fprintf(fp, " vmemmap_end: %016lx\n", ms->vmemmap_end);
+ if (machdep->flags & NEW_VMEMMAP) {
+ fprintf(fp, " kimage_text: %016lx\n", ms->kimage_text);
+ fprintf(fp, " kimage_end: %016lx\n", ms->kimage_end);
+ fprintf(fp, " kimage_voffset: %016lx\n", ms->kimage_voffset);
+ }
fprintf(fp, " phys_offset: %lx\n", ms->phys_offset);
fprintf(fp, "__exception_text_start: %lx\n", ms->__exception_text_start);
fprintf(fp, " __exception_text_end: %lx\n", ms->__exception_text_end);
@@ -543,6 +611,42 @@ arm64_dump_machdep_table(ulong arg)
}
}
+static int
+arm64_parse_machdep_arg_l(char *argstring, char *param, ulong *value)
+{
+ int len;
+ int megabytes = FALSE;
+ char *p;
+
+ len = strlen(param);
+ if (!STRNEQ(argstring, param) || (argstring[len] != '='))
+ return FALSE;
+
+ if ((LASTCHAR(argstring) == 'm') ||
+ (LASTCHAR(argstring) == 'M')) {
+ LASTCHAR(argstring) = NULLCHAR;
+ megabytes = TRUE;
+ }
+
+ p = argstring + len + 1;
+ if (strlen(p)) {
+ int flags = RETURN_ON_ERROR | QUIET;
+ int err = 0;
+
+ if (megabytes) {
+ *value = dtol(p, flags, &err);
+ if (!err)
+ *value = MEGABYTES(*value);
+ } else {
+ *value = htol(p, flags, &err);
+ }
+
+ if (!err)
+ return TRUE;
+ }
+
+ return FALSE;
+}
/*
* Parse machine dependent command line arguments.
@@ -554,11 +658,10 @@ arm64_dump_machdep_table(ulong arg)
static void
arm64_parse_cmdline_args(void)
{
- int index, i, c, err;
+ int index, i, c;
char *arglist[MAXARGS];
char buf[BUFSIZE];
char *p;
- ulong value = 0;
for (index = 0; index < MAX_MACHDEP_ARGS; index++) {
if (!machdep->cmdline_args[index])
@@ -580,39 +683,23 @@ arm64_parse_cmdline_args(void)
c = parse_line(buf, arglist);
for (i = 0; i < c; i++) {
- err = 0;
-
- if (STRNEQ(arglist[i], "phys_offset=")) {
- int megabytes = FALSE;
- int flags = RETURN_ON_ERROR | QUIET;
-
- if ((LASTCHAR(arglist[i]) == 'm') ||
- (LASTCHAR(arglist[i]) == 'M')) {
- LASTCHAR(arglist[i]) = NULLCHAR;
- megabytes = TRUE;
- }
-
- p = arglist[i] + strlen("phys_offset=");
- if (strlen(p)) {
- if (megabytes)
- value = dtol(p, flags, &err);
- else
- value = htol(p, flags, &err);
- }
-
- if (!err) {
- if (megabytes)
- value = MEGABYTES(value);
-
- machdep->machspec->phys_offset = value;
-
- error(NOTE,
- "setting phys_offset to: 0x%lx\n\n",
- machdep->machspec->phys_offset);
+ if (arm64_parse_machdep_arg_l(arglist[i],
+ "phys_offset",
+ &machdep->machspec->phys_offset)) {
+ error(NOTE,
+ "setting phys_offset to: 0x%lx\n\n",
+ machdep->machspec->phys_offset);
+
+ machdep->flags |= PHYS_OFFSET;
+ continue;
+ } else if (arm64_parse_machdep_arg_l(arglist[i],
+ "kimage_voffset",
+ &machdep->machspec->kimage_voffset)) {
+ error(NOTE,
+ "setting kimage_voffset to: 0x%lx\n\n",
+ machdep->machspec->kimage_voffset);
- machdep->flags |= PHYS_OFFSET;
- continue;
- }
+ continue;
}
error(WARNING, "ignoring --machdep option: %s\n",
@@ -631,6 +718,19 @@ arm64_calc_phys_offset(void)
if (machdep->flags & PHYS_OFFSET) /* --machdep override */
return;
+ if (machdep->flags & NEW_VMEMMAP) {
+ struct syment *sp;
+ ulong value;
+
+ sp = kernel_symbol_search("memstart_addr");
+ if (sp && readmem(sp->value, KVADDR, (char *)&value,
+ sizeof(value), "memstart_addr",
+ QUIET|RETURN_ON_ERROR)) {
+ ms->phys_offset = value;
+ return;
+ }
+ }
+
As we've discussed before, I cannot accept the chicken-and-egg snippet above.
If machdep->machspec->kimage_voffset has not been determined, then the arm64_VTOP()
call made by readmem() will utilize machdep->machspec->phys_offset -- which is what
you're trying to initialize here. On my live system that has a phys_offset of
0x40000000000, the readmem() will fail quietly, but that's not a acceptable usage
of the RETURN_ON_ERROR failure mode, because "memstart_addr" is a legitimate
virtual
address that should never fail. Also, because the actual kernel phys_offset can be
negative, it would seem to be entirely within the realm of possibility that the
readmem() could mistakenly return successfully, but have read the wrong location.
So what it boils down to is that readmem() should NEVER be called until ALL of the
pieces required by arm64_VTOP() have all been properly initialized, however
that might be accomplished. Not to mention that calling it this early sets a
dangerous precedent.
And in the case of kdump's ELF vmcore and compressed vmcore formats, there is an
existing API between kdump and the crash utility to pass back the phys_base. In the
kexec-tool's makedumpfile.c file, there is the get_phys_base_arm64() function that
currently calculates the offset by using the PT_LOAD segments, and presumably will
have to be modified to use new VMCOREINFO data. But regardless of how it's done,
the architecture-neutral write_kdump_header() laster copies that offset value into
the kdump_sub_header.phys_base field for the crash utility to access. Trying to do
a readmem() this early in time is essentially breaking that API.
/*
* Next determine suitable value for phys_offset. User can override this
* by passing valid '--machdep phys_offset=<addr>' option.
@@ -2377,6 +2477,11 @@ arm64_IS_VMALLOC_ADDR(ulong vaddr)
{
struct machine_specific *ms = machdep->machspec;
+ if ((machdep->flags & NEW_VMEMMAP) &&
+ (vaddr >= machdep->machspec->kimage_text) &&
+ (vaddr <= machdep->machspec->kimage_end))
+ return FALSE;
+
return ((vaddr >= ms->vmalloc_start_addr && vaddr <=
ms->vmalloc_end) ||
((machdep->flags & VMEMMAP) &&
(vaddr >= ms->vmemmap_vaddr && vaddr <=
ms->vmemmap_end))
||
@@ -2407,7 +2512,10 @@ arm64_calc_VA_BITS(void)
for (bitval = highest_bit_long(value); bitval; bitval--) {
if ((value & (1UL << bitval)) == 0) {
- machdep->machspec->VA_BITS = bitval + 2;
+ if (machdep->flags & NEW_VMEMMAP)
+ machdep->machspec->VA_BITS = bitval + 1;
+ else
+ machdep->machspec->VA_BITS = bitval + 2;
break;
Nice -- this was staring us in the face all along, where in the
old kernel, the symbol values were in unity-mapped space.
}
@@ -2459,10 +2567,22 @@ arm64_calc_virtual_memory_ranges(void)
break;
}
- vmemmap_size = ALIGN((1UL << (ms->VA_BITS - machdep->pageshift)) *
SIZE(page), PUD_SIZE);
+ if (machdep->flags & NEW_VMEMMAP)
+#define STRUCT_PAGE_MAX_SHIFT 6
+ vmemmap_size = 1UL << (ms->VA_BITS - machdep->pageshift - 1
+ + STRUCT_PAGE_MAX_SHIFT);
+ else
+ vmemmap_size = ALIGN((1UL << (ms->VA_BITS - machdep->pageshift)) *
SIZE(page), PUD_SIZE);
+
vmalloc_end = (ms->page_offset - PUD_SIZE - vmemmap_size - SZ_64K);
- vmemmap_start = vmalloc_end + SZ_64K;
- vmemmap_end = vmemmap_start + vmemmap_size;
+
+ if (machdep->flags & NEW_VMEMMAP) {
+ vmemmap_start = ms->page_offset - vmemmap_size;
+ vmemmap_end = ms->page_offset;
+ } else {
+ vmemmap_start = vmalloc_end + SZ_64K;
+ vmemmap_end = vmemmap_start + vmemmap_size;
+ }
ms->vmalloc_end = vmalloc_end - 1;
ms->vmemmap_vaddr = vmemmap_start;
diff --git a/defs.h b/defs.h
index 7af320a..406a865 100644
--- a/defs.h
+++ b/defs.h
@@ -2846,8 +2846,8 @@ typedef u64 pte_t;
#define PTOV(X) \
((unsigned
long)(X)-(machdep->machspec->phys_offset)+(machdep->machspec->page_offset))
-#define VTOP(X) \
- ((unsigned
long)(X)-(machdep->machspec->page_offset)+(machdep->machspec->phys_offset))
+
+#define VTOP(X) arm64_VTOP((ulong)(X))
#define USERSPACE_TOP (machdep->machspec->userspace_top)
#define PAGE_OFFSET (machdep->machspec->page_offset)
@@ -2940,18 +2940,23 @@ typedef signed int s32;
#define VM_L3_4K (0x10)
#define KDUMP_ENABLED (0x20)
#define IRQ_STACKS (0x40)
+#define NEW_VMEMMAP (0x80)
/*
* sources: Documentation/arm64/memory.txt
* arch/arm64/include/asm/memory.h
* arch/arm64/include/asm/pgtable.h
*/
-
-#define ARM64_PAGE_OFFSET ((0xffffffffffffffffUL) <<
(machdep->machspec->VA_BITS - 1))
+#define ARM64_VA_START ((0xffffffffffffffffUL) \
+ << machdep->machspec->VA_BITS)
+#define ARM64_PAGE_OFFSET ((0xffffffffffffffffUL) \
+ << (machdep->machspec->VA_BITS - 1))
#define ARM64_USERSPACE_TOP ((1UL) << machdep->machspec->VA_BITS)
-#define ARM64_MODULES_VADDR (ARM64_PAGE_OFFSET - MEGABYTES(64))
-#define ARM64_MODULES_END (ARM64_PAGE_OFFSET - 1)
-#define ARM64_VMALLOC_START ((0xffffffffffffffffUL) <<
machdep->machspec->VA_BITS)
+
+/* only used for v4.6 or later */
+#define ARM64_MODULES_VSIZE MEGABYTES(128)
+#define ARM64_KASAN_SHADOW_SIZE (1UL << (machdep->machspec->VA_BITS - 3))
+
/*
* The following 3 definitions are the original values, but are obsolete
* for 3.17 and later kernels because they are now build-time calculations.
@@ -3030,6 +3035,10 @@ struct machine_specific {
ulong kernel_flags;
ulong irq_stack_size;
ulong *irq_stacks;
+ /* only needed for v4.6 or later kernel */
+ ulong kimage_voffset;
+ ulong kimage_text;
+ ulong kimage_end;
};
struct arm64_stackframe {
@@ -5387,6 +5396,7 @@ void unwind_backtrace(struct bt_info *);
#ifdef ARM64
void arm64_init(int);
void arm64_dump_machdep_table(ulong);
+ulong arm64_VTOP(ulong);
int arm64_IS_VMALLOC_ADDR(ulong);
ulong arm64_swp_type(ulong);
ulong arm64_swp_offset(ulong);
diff --git a/main.c b/main.c
index 05787f0..4065e9a 100644
--- a/main.c
+++ b/main.c
@@ -227,9 +227,10 @@ main(int argc, char **argv)
optarg);
}
} else if (STREQ(long_options[option_index].name, "kaslr")) {
- if (!machine_type("X86_64"))
- error(INFO, "--kaslr only valid "
- "with X86_64 machine type.\n");
+ if (!machine_type("X86_64") &&
+ !machine_type("ARM64"))
+ error(INFO, "--kaslr not valid "
+ "with this machine type.\n");
else if (STREQ(optarg, "auto"))
kt->flags2 |= (RELOC_AUTO|KASLR);
else {
diff --git a/symbols.c b/symbols.c
index a8d3563..b0a6461 100644
--- a/symbols.c
+++ b/symbols.c
@@ -593,7 +593,8 @@ kaslr_init(void)
{
char *string;
- if (!machine_type("X86_64") || (kt->flags & RELOC_SET))
+ if ((!machine_type("X86_64") && !machine_type("ARM64")) ||
+ (kt->flags & RELOC_SET))
return;
/*
@@ -712,7 +713,7 @@ store_symbols(bfd *abfd, int dynamic, void *minisyms,
long symcount,
if (machine_type("X86")) {
if (!(kt->flags & RELOC_SET))
kt->flags |= RELOC_FORCE;
- } else if (machine_type("X86_64")) {
+ } else if (machine_type("X86_64") || machine_type("ARM64")) {
if ((kt->flags2 & RELOC_AUTO) && !(kt->flags & RELOC_SET))
derive_kaslr_offset(abfd, dynamic, from,
fromend, size, store);
@@ -783,7 +784,8 @@ store_sysmap_symbols(void)
error(FATAL, "symbol table namespace malloc: %s\n",
strerror(errno));
- if (!machine_type("X86") && !machine_type("X86_64"))
+ if (!machine_type("X86") && !machine_type("X86_64")
&&
+ !machine_type("ARM64"))
kt->flags &= ~RELOC_SET;
first = 0;
@@ -833,7 +835,7 @@ store_sysmap_symbols(void)
}
/*
- * Handle x86 kernels configured such that the vmlinux symbols
+ * Handle x86/arm64 kernels configured such that the vmlinux symbols
* are not as loaded into the kernel (not unity-mapped).
*/
static ulong
@@ -4681,7 +4683,7 @@ value_search(ulong value, ulong *offset)
if ((sp = machdep->value_to_symbol(value, offset)))
return sp;
- if (IS_VMALLOC_ADDR(value))
+ if (IS_VMALLOC_ADDR(value))
goto check_modules;
if ((sp = symval_hash_search(value)) == NULL)
--
2.8.1
This is looking pretty good! Except for arm64_calc_phys_offset()... ;-)
Thanks,
Dave