< 1 > The background.
The current crash code is still based at kernel v4.20, but the kernel is
v5.17-rc4(now).
The MODULE/VMALLOC/VMEMMAP ranges are not be updated since v4.20.
I list all the changes from kernel v4.20 to v5.17:
1.) The current crash code is based at kernel v4.20.
The virtual memory layout looks like this:
+--------------------------------------------------------------------+
| KASAN | MODULE | VMALLOC | .... | VMEMMAP |
+--------------------------------------------------------------------+
The macros are:
#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE)
2.) In the kernel v5.0, the patch will add a new BFP JIT region:
"91fc957c9b1d arm64/bpf: don't allocate BPF JIT programs in module
memory"
The virtual memory layout looks like this:
+--------------------------------------------------------------------+
| KASAN | BPF_JIT | MODULE | VMALLOC | .... | VMEMMAP |
+--------------------------------------------------------------------+
The macros are:
#define MODULES_VADDR (BPF_JIT_REGION_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE)
The layout does not changed until v5.4.
3.) In the kernel v5.4, several patches changes the layout, such as:
"ce3aaed87344 arm64: mm: Modify calculation of VMEMMAP_SIZE"
"14c127c957c1 arm64: mm: Flip kernel VA space"
and the virtual memory layout looks like this:
+--------------------------------------------------------------------+
| KASAN | BPF_JIT | MODULE | VMALLOC | .... | VMEMMAP |
+--------------------------------------------------------------------+
The macros are:
#define MODULES_VADDR (BPF_JIT_REGION_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M)
In the v5.7, the patch:
"bbd6ec605c arm64/mm: Enable memory hot remove"
adds the VMEMMAP_END.
4.) In the kernel v5.11, several patches changes the layout, such as:
"9ad7c6d5e75b arm64: mm: tidy up top of kernel VA space"
"f4693c2716b3 arm64: mm: extend linear region for 52-bit VA
configurations"
and the virtual memory layout looks like this:
+--------------------------------------------------------------------+
| BPF_JIT | MODULE | VMALLOC | .... | VMEMMAP |
+--------------------------------------------------------------------+
The macros are:
#define MODULES_VADDR (BPF_JIT_REGION_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (VMEMMAP_START - SZ_256M)
#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
5.) In the kernel v5.17-rc1, after the patch
"b89ddf4cca43 arm64/bpf: Remove 128MB limit for BPF JIT programs"
the virtual memory layout looks like this:
+--------------------------------------------------------------------+
| MODULE | VMALLOC | .... | VMEMMAP |
+--------------------------------------------------------------------+
The macros are:
#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (VMEMMAP_START - SZ_256M)
#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
< 2 > What does this patch do?
1.) Use arm64_get_struct_page_size() to get the size of struct page{} in the PRE_GDB.
2.) If we can succeed in above step, we will try to call arm64_get_va_range() to
get the proper kernel virtual ranges.
In the arm64_get_va_range(), we calculate the ranges by the hooks of
different kernel versions:
get_range: arm64_get_range_v5_17,
get_range: arm64_get_range_v5_11,
get_range: arm64_get_range_v5_4,
get_range: arm64_get_range_v5_0,
3.) If we can succeed in above steps, the arm64_calc_virtual_memory_ranges()
will be ignored. If we failed in above steps, the
arm64_calc_virtual_memory_ranges()
will continue to do its work.
< 3 > Test this patch.
Tested this patch with a vmcore produced by a 5.4.119 kernel panic.
(The CONFIG_KASAN is NOT set for this kernel.)
Before this patch, we get the wrong output from "help -m":
----------------------------------------------------------
vmalloc_start_addr: ffff800048000000
vmalloc_end: fffffdffbffeffff
modules_vaddr: ffff800040000000
modules_end: ffff800047ffffff
vmemmap_vaddr: fffffdffffe00000
vmemmap_end: ffffffffffffffff
----------------------------------------------------------
After this patch, we can get the correct output from "help -m":
----------------------------------------------------------
vmalloc_start_addr: ffff800010000000
vmalloc_end: fffffdffbfff0000
modules_vaddr: ffff800008000000
modules_end: ffff800010000000
vmemmap_vaddr: fffffdffffe00000
vmemmap_end: ffffffffffffffff
----------------------------------------------------------
Signed-off-by: Huang Shijie <shijie(a)os.amperecomputing.com>
---
v2 --> v3:
Fount two bugs in arm64_get_range_v5_17/arm64_get_range_v5_11:
We should use the ms->CONFIG_ARM64_VA_BITS to calculate the
vmemmep_vaddr, not use the ms->VA_BITS.
v1 --> v2:
The Crash code is based on v4.20 not v4.9.
Changed the commit message about it.
---
arm64.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 354 insertions(+), 8 deletions(-)
diff --git a/arm64.c b/arm64.c
index de1038a..7d0884d 100644
--- a/arm64.c
+++ b/arm64.c
@@ -92,6 +92,13 @@ static void arm64_calc_VA_BITS(void);
static int arm64_is_uvaddr(ulong, struct task_context *);
static void arm64_calc_KERNELPACMASK(void);
+struct kernel_range {
+ unsigned long modules_vaddr, modules_end;
+ unsigned long vmalloc_start_addr, vmalloc_end;
+ unsigned long vmemmap_vaddr, vmemmap_end;
+};
+static struct kernel_range *arm64_get_va_range(struct machine_specific *ms);
+static void arm64_get_struct_page_size(void);
/*
* Do all necessary machine-specific setup here. This is called several times
@@ -219,6 +226,7 @@ arm64_init(int when)
machdep->pageoffset = machdep->pagesize - 1;
machdep->pagemask = ~((ulonglong)machdep->pageoffset);
+ arm64_get_struct_page_size();
arm64_calc_VA_BITS();
arm64_calc_KERNELPACMASK();
ms = machdep->machspec;
@@ -238,35 +246,47 @@ arm64_init(int when)
}
machdep->is_kvaddr = generic_is_kvaddr;
machdep->kvtop = arm64_kvtop;
+
+ /* The defaults */
+ ms->vmalloc_end = ARM64_VMALLOC_END;
+ ms->vmemmap_vaddr = ARM64_VMEMMAP_VADDR;
+ ms->vmemmap_end = ARM64_VMEMMAP_END;
+
if (machdep->flags & NEW_VMEMMAP) {
struct syment *sp;
+ struct kernel_range *r;
sp = kernel_symbol_search("_text");
ms->kimage_text = (sp ? sp->value : 0);
sp = kernel_symbol_search("_end");
ms->kimage_end = (sp ? sp->value : 0);
- if (ms->VA_BITS_ACTUAL) {
+ if (ASSIGN_SIZE(page) && (r = arm64_get_va_range(ms))) {
+ /* We can get all the MODULES/VMALLOC/VMEMMAP ranges now.*/
+ ms->modules_vaddr = r->modules_vaddr;
+ ms->modules_end = r->modules_end;
+ ms->vmalloc_start_addr = r->vmalloc_start_addr;
+ ms->vmalloc_end = r->vmalloc_end;
+ ms->vmemmap_vaddr = r->vmemmap_vaddr;
+ ms->vmemmap_end = r->vmemmap_end;
+ } else if (ms->VA_BITS_ACTUAL) {
ms->modules_vaddr = (st->_stext_vmlinux & TEXT_OFFSET_MASK) -
ARM64_MODULES_VSIZE;
ms->modules_end = ms->modules_vaddr + ARM64_MODULES_VSIZE -1;
+ ms->vmalloc_start_addr = ms->modules_end + 1;
} else {
ms->modules_vaddr = ARM64_VA_START;
if (kernel_symbol_exists("kasan_init"))
ms->modules_vaddr += ARM64_KASAN_SHADOW_SIZE;
ms->modules_end = ms->modules_vaddr + ARM64_MODULES_VSIZE -1;
+ ms->vmalloc_start_addr = ms->modules_end + 1;
}
- ms->vmalloc_start_addr = ms->modules_end + 1;
-
arm64_calc_kimage_voffset();
} else {
ms->modules_vaddr = ARM64_PAGE_OFFSET - MEGABYTES(64);
ms->modules_end = ARM64_PAGE_OFFSET - 1;
ms->vmalloc_start_addr = ARM64_VA_START;
}
- ms->vmalloc_end = ARM64_VMALLOC_END;
- ms->vmemmap_vaddr = ARM64_VMEMMAP_VADDR;
- ms->vmemmap_end = ARM64_VMEMMAP_END;
switch (machdep->pagesize)
{
@@ -387,7 +407,9 @@ arm64_init(int when)
break;
case POST_GDB:
- arm64_calc_virtual_memory_ranges();
+ /* Can we get the size of struct page before POST_GDB */
+ if (!ASSIGN_SIZE(page))
+ arm64_calc_virtual_memory_ranges();
arm64_get_section_size_bits();
if (!machdep->max_physmem_bits) {
@@ -494,6 +516,331 @@ arm64_init(int when)
}
}
+struct kernel_va_range_handler {
+ unsigned long kernel_versions_start; /* include */
+ unsigned long kernel_versions_end; /* exclude */
+ struct kernel_range *(*get_range)(struct machine_specific *);
+};
+
+static struct kernel_range tmp_range;
+#define _PAGE_END(va) (-(1UL << ((va) - 1)))
+#define SZ_64K 0x00010000
+#define SZ_2M 0x00200000
+
+/*
+ * Get the max shift of the size of struct page.
+ * Most of the time, it is 64 bytes, but not sure.
+ */
+static int arm64_get_struct_page_max_shift(void)
+{
+ unsigned long v = ASSIGN_SIZE(page);
+
+ if (16 < v && v <= 32)
+ return 5;
+ if (32 < v && v <= 64)
+ return 6;
+ if (64 < v && v <= 128)
+ return 7;
+
+ error(FATAL, "We should not have such struct page size:%d!\n", v);
+ return 0;
+}
+
+/*
+ * The change is caused by the kernel patch since v5.17-rc1:
+ * "b89ddf4cca43 arm64/bpf: Remove 128MB limit for BPF JIT programs"
+ */
+static struct kernel_range *arm64_get_range_v5_17(struct machine_specific *ms)
+{
+ struct kernel_range *r = &tmp_range;
+ unsigned long v = ms->CONFIG_ARM64_VA_BITS;
+ unsigned long vmem_shift, vmemmap_size;
+
+ /* Not initialized yet */
+ if (v == 0)
+ return NULL;
+
+ if (v > 48)
+ v = 48;
+
+ /* Get the MODULES_VADDR ~ MODULES_END */
+ r->modules_vaddr = _PAGE_END(v);
+ r->modules_end = r->modules_vaddr + MEGABYTES(128);
+
+ /* Get the VMEMMAP_START ~ VMEMMAP_END */
+ vmem_shift = machdep->pageshift - arm64_get_struct_page_max_shift();
+ vmemmap_size = (_PAGE_END(v) - PAGE_OFFSET) >> vmem_shift;
+
+ r->vmemmap_vaddr = (-(1UL << (ms->CONFIG_ARM64_VA_BITS - vmem_shift)));
+ r->vmemmap_end = r->vmemmap_vaddr + vmemmap_size;
+
+ /* Get the VMALLOC_START ~ VMALLOC_END */
+ r->vmalloc_start_addr = r->modules_end;
+ r->vmalloc_end = r->vmemmap_vaddr - MEGABYTES(256);
+ return r;
+}
+
+/*
+ * The change is caused by the kernel patch since v5.11:
+ * "9ad7c6d5e75b arm64: mm: tidy up top of kernel VA space"
+ */
+static struct kernel_range *arm64_get_range_v5_11(struct machine_specific *ms)
+{
+ struct kernel_range *r = &tmp_range;
+ unsigned long v = ms->CONFIG_ARM64_VA_BITS;
+ unsigned long vmem_shift, vmemmap_size, bpf_jit_size = MEGABYTES(128);
+
+ /* Not initialized yet */
+ if (v == 0)
+ return NULL;
+
+ if (v > 48)
+ v = 48;
+
+ /* Get the MODULES_VADDR ~ MODULES_END */
+ r->modules_vaddr = _PAGE_END(v) + bpf_jit_size;
+ r->modules_end = r->modules_vaddr + MEGABYTES(128);
+
+ /* Get the VMEMMAP_START ~ VMEMMAP_END */
+ vmem_shift = machdep->pageshift - arm64_get_struct_page_max_shift();
+ vmemmap_size = (_PAGE_END(v) - PAGE_OFFSET) >> vmem_shift;
+
+ r->vmemmap_vaddr = (-(1UL << (ms->CONFIG_ARM64_VA_BITS - vmem_shift)));
+ r->vmemmap_end = r->vmemmap_vaddr + vmemmap_size;
+
+ /* Get the VMALLOC_START ~ VMALLOC_END */
+ r->vmalloc_start_addr = r->modules_end;
+ r->vmalloc_end = r->vmemmap_vaddr - MEGABYTES(256);
+ return r;
+}
+
+static unsigned long arm64_get_pud_size(void)
+{
+ unsigned long PUD_SIZE = 0;
+
+ switch (machdep->pagesize) {
+ case 4096:
+ if (machdep->machspec->VA_BITS > PGDIR_SHIFT_L4_4K) {
+ PUD_SIZE = PUD_SIZE_L4_4K;
+ } else {
+ PUD_SIZE = PGDIR_SIZE_L3_4K;
+ }
+ break;
+
+ case 65536:
+ PUD_SIZE = PGDIR_SIZE_L2_64K;
+ default:
+ break;
+ }
+ return PUD_SIZE;
+}
+
+/*
+ * The change is caused by the kernel patches since v5.4, such as:
+ * "ce3aaed87344 arm64: mm: Modify calculation of VMEMMAP_SIZE"
+ * "14c127c957c1 arm64: mm: Flip kernel VA space"
+ */
+static struct kernel_range *arm64_get_range_v5_4(struct machine_specific *ms)
+{
+ struct kernel_range *r = &tmp_range;
+ unsigned long v = ms->CONFIG_ARM64_VA_BITS;
+ unsigned long kasan_shadow_shift, kasan_shadow_offset, PUD_SIZE;
+ unsigned long vmem_shift, vmemmap_size, bpf_jit_size = MEGABYTES(128);
+ char *string;
+ int ret;
+
+ /* Not initialized yet */
+ if (v == 0)
+ return NULL;
+
+ if (v > 48)
+ v = 48;
+
+ /* Get the MODULES_VADDR ~ MODULES_END */
+ if (kernel_symbol_exists("kasan_init")) {
+ /* See the arch/arm64/Makefile */
+ ret = get_kernel_config("CONFIG_KASAN_SW_TAGS", NULL);
+ if (ret == IKCONFIG_N)
+ return NULL;
+ kasan_shadow_shift = (ret == IKCONFIG_Y) ? 4: 3;
+
+ /* See the arch/arm64/Kconfig*/
+ ret = get_kernel_config("CONFIG_KASAN_SHADOW_OFFSET", &string);
+ if (ret != IKCONFIG_STR)
+ return NULL;
+ kasan_shadow_offset = atol(string);
+
+ r->modules_vaddr = (1UL << (64 - kasan_shadow_shift)) + kasan_shadow_offset
+ + bpf_jit_size;
+ } else {
+ r->modules_vaddr = _PAGE_END(v) + bpf_jit_size;
+ }
+
+ r->modules_end = r->modules_vaddr + MEGABYTES(128);
+
+ /* Get the VMEMMAP_START ~ VMEMMAP_END */
+ vmem_shift = machdep->pageshift - arm64_get_struct_page_max_shift();
+ vmemmap_size = (_PAGE_END(v) - PAGE_OFFSET) >> vmem_shift;
+
+ r->vmemmap_vaddr = (-vmemmap_size - SZ_2M);
+ if (THIS_KERNEL_VERSION >= LINUX(5, 7, 0)) {
+ /*
+ * In the v5.7, the patch: "bbd6ec605c arm64/mm: Enable memory hot remove"
+ * adds the VMEMMAP_END.
+ */
+ r->vmemmap_end = r->vmemmap_vaddr + vmemmap_size;
+ } else {
+ r->vmemmap_end = 0xffffffffffffffffUL;
+ }
+
+ /* Get the VMALLOC_START ~ VMALLOC_END */
+ PUD_SIZE = arm64_get_pud_size();
+ r->vmalloc_start_addr = r->modules_end;
+ r->vmalloc_end = (-PUD_SIZE - vmemmap_size - SZ_64K);
+ return r;
+}
+
+/*
+ * The change is caused by the kernel patches since v5.0, such as:
+ * "91fc957c9b1d arm64/bpf: don't allocate BPF JIT programs in module
memory"
+ */
+static struct kernel_range *arm64_get_range_v5_0(struct machine_specific *ms)
+{
+ struct kernel_range *r = &tmp_range;
+ unsigned long v = ms->CONFIG_ARM64_VA_BITS;
+ unsigned long kasan_shadow_shift, PUD_SIZE;
+ unsigned long vmemmap_size, bpf_jit_size = MEGABYTES(128);
+ unsigned long va_start, page_offset;
+ int ret;
+
+ /* Not initialized yet */
+ if (v == 0)
+ return NULL;
+
+ va_start = (0xffffffffffffffffUL - (1UL << v) + 1);
+ page_offset = (0xffffffffffffffffUL - (1UL << (v - 1)) + 1);
+
+ /* Get the MODULES_VADDR ~ MODULES_END */
+ if (kernel_symbol_exists("kasan_init")) {
+ /* See the arch/arm64/Makefile */
+ ret = get_kernel_config("CONFIG_KASAN_SW_TAGS", NULL);
+ if (ret == IKCONFIG_N)
+ return NULL;
+ kasan_shadow_shift = (ret == IKCONFIG_Y) ? 4: 3;
+
+ r->modules_vaddr = va_start + (1UL << (v - kasan_shadow_shift)) +
bpf_jit_size;
+ } else {
+ r->modules_vaddr = va_start + bpf_jit_size;
+ }
+
+ r->modules_end = r->modules_vaddr + MEGABYTES(128);
+
+ /* Get the VMEMMAP_START ~ VMEMMAP_END */
+ vmemmap_size = (1UL << (v - machdep->pageshift - 1 +
arm64_get_struct_page_max_shift()));
+
+ r->vmemmap_vaddr = page_offset - vmemmap_size;
+ r->vmemmap_end = 0xffffffffffffffffUL; /* this kernel does not have VMEMMAP_END */
+
+ /* Get the VMALLOC_START ~ VMALLOC_END */
+ PUD_SIZE = arm64_get_pud_size();
+
+ r->vmalloc_start_addr = r->modules_end;
+ r->vmalloc_end = page_offset - PUD_SIZE - vmemmap_size - SZ_64K;
+ return r;
+}
+
+static struct kernel_va_range_handler kernel_va_range_handlers[] = {
+ {
+ LINUX(5,17,0),
+ LINUX(6,0,0), /* Just a boundary, Change it later */
+ get_range: arm64_get_range_v5_17,
+ }, {
+ LINUX(5,11,0), LINUX(5,17,0),
+ get_range: arm64_get_range_v5_11,
+ }, {
+ LINUX(5,4,0), LINUX(5,11,0),
+ get_range: arm64_get_range_v5_4,
+ }, {
+ LINUX(5,0,0), LINUX(5,4,0),
+ get_range: arm64_get_range_v5_0,
+ },
+};
+
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+static unsigned long arm64_get_kernel_version(void)
+{
+ char *string;
+ char buf[BUFSIZE];
+ char *p1, *p2;
+
+ if (THIS_KERNEL_VERSION)
+ return THIS_KERNEL_VERSION;
+
+ string = pc->read_vmcoreinfo("OSRELEASE");
+ if (string) {
+ strcpy(buf, string);
+
+ p1 = p2 = buf;
+ while (*p2 != '.')
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[0] = atoi(p1);
+
+ p1 = ++p2;
+ while (*p2 != '.')
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[1] = atoi(p1);
+
+ p1 = ++p2;
+ while ((*p2 >= '0') && (*p2 <= '9'))
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[2] = atoi(p1);
+ }
+ free(string);
+ return THIS_KERNEL_VERSION;
+}
+
+/* Return NULL if we fail. */
+static struct kernel_range *arm64_get_va_range(struct machine_specific *ms)
+{
+ struct kernel_va_range_handler *h;
+ unsigned long kernel_version = THIS_KERNEL_VERSION;
+ int i;
+
+ if (!kernel_version) {
+ kernel_version = arm64_get_kernel_version();
+ if (!kernel_version)
+ return NULL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(kernel_va_range_handlers); i++) {
+ h = kernel_va_range_handlers + i;
+
+ /* Get the right kernel version */
+ if (h->kernel_versions_start <= kernel_version &&
+ kernel_version < h->kernel_versions_end) {
+
+ /* Get the correct virtual address ranges */
+ return h->get_range(ms);
+ }
+ }
+ return NULL;
+}
+
+/* Get the size of struct page {} */
+static void arm64_get_struct_page_size()
+{
+ char *string;
+
+ string = pc->read_vmcoreinfo("SIZE(page)");
+ if (string)
+ ASSIGN_SIZE(page) = atol(string);
+ free(string);
+}
+
/*
* Accept or reject a symbol from the kernel namelist.
*/
@@ -4255,7 +4602,6 @@ arm64_calc_VA_BITS(void)
#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
-#define SZ_64K 0x00010000
static void
arm64_calc_virtual_memory_ranges(void)
--
2.30.2