On 2022/07/18 11:53, Xianting Tian wrote:
1. Add riscv64_init() implementation, do all necessary
machine-specific setup,
which will be called multiple times during initialization.
2. Add riscv64 sv39/48/57 pagetable macro definitions, the function of converting
virtual address to a physical address via 4k page table.
3. Add the implementation of the vtop command, which is used to convert a
virtual address to a physical address(call the functions defined in 2).
4. Add the implementation to get virtual memory layout, va_bits, phys_ram_base
from vmcoreinfo. As these configurations changes from time to time, we send
a Linux kernel patch to export these configurations, which can simplify the
development of crash tool.
The Linux patch(patch 3 of the series of patches):
https://lore.kernel.org/linux-riscv/20220717101323.370245-1-xianting.tian...
5. Add riscv64_get_smp_cpus() implementation, get the number of online cpus.
6. Add riscv64_get_page_size() implementation, get page size.
And so on.
With this patch, we can enter crash command line, and run "vtop",
"mod", "rd",
"*", "p", "kmem" ...
Tested on QEMU RISCV64 end and SoC platform of T-head Xuantie 910 CPU.
KERNEL: vmlinux
DUMPFILE: vmcore
CPUS: 1
DATE: Fri Jul 15 10:24:25 CST 2022
UPTIME: 00:00:33
LOAD AVERAGE: 0.05, 0.01, 0.00
TASKS: 41
NODENAME: buildroot
RELEASE: 5.18.9
VERSION: #30 SMP Fri Jul 15 09:47:03 CST 2022
MACHINE: riscv64 (unknown Mhz)
MEMORY: 1 GB
PANIC: "Kernel panic - not syncing: sysrq triggered crash"
PID: 113
COMMAND: "sh"
TASK: ff60000002269600 [THREAD_INFO: ff60000002269600]
CPU: 0
STATE: TASK_RUNNING (PANIC)
crash> p mem_map
mem_map = $1 = (struct page *) 0xff6000003effbf00
crash> p /x *(struct page *) 0xff6000003effbf00
$5 = {
flags = 0x1000,
{
{
{
lru = {
next = 0xff6000003effbf08,
prev = 0xff6000003effbf08
},
{
__filler = 0xff6000003effbf08,
mlock_count = 0x3effbf08
}
},
mapping = 0x0,
index = 0x0,
private = 0x0
},
crash> mod
MODULE NAME BASE SIZE OBJECT FILE
ffffffff0113e740 nvme_core ffffffff01133000 98304 (not loaded) [CONFIG_KALLSYMS]
ffffffff011542c0 nvme ffffffff0114c000 61440 (not loaded) [CONFIG_KALLSYMS]
crash> rd ffffffff0113e740 8
ffffffff0113e740: 0000000000000000 ffffffff810874f8 .........t......
ffffffff0113e750: ffffffff011542c8 726f635f656d766e .B......nvme_cor
ffffffff0113e760: 0000000000000065 0000000000000000 e...............
ffffffff0113e770: 0000000000000000 0000000000000000 ................
crash> vtop ffffffff0113e740
VIRTUAL PHYSICAL
ffffffff0113e740 8254d740
PGD: ffffffff810e9ff8 => 2ffff001
P4D: 0000000000000000 => 000000002fffec01
PUD: 00005605c2957470 => 0000000020949801
PMD: 00007fff7f1750c0 => 0000000020947401
PTE: 0 => 209534e7
PAGE: 000000008254d000
PTE PHYSICAL FLAGS
209534e7 8254d000 (PRESENT|READ|WRITE|GLOBAL|ACCESSED|DIRTY)
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ff6000003f0777d8 8254d000 0 0 1 0
Signed-off-by: Xianting Tian <xianting.tian(a)linux.alibaba.com>
---
defs.h | 93 +++++
diskdump.c | 10 +
riscv64.c | 983 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 1086 insertions(+)
diff --git a/defs.h b/defs.h
index 42ffba3..be4db94 100644
--- a/defs.h
+++ b/defs.h
@@ -3494,6 +3494,81 @@ struct arm64_stackframe {
#define _64BIT_
#define MACHINE_TYPE "RISCV64"
+typedef struct { ulong pgd; } pgd_t;
+typedef struct { ulong p4d; } p4d_t;
+typedef struct { ulong pud; } pud_t;
+typedef struct { ulong pmd; } pmd_t;
+typedef struct { ulong pte; } pte_t;
+typedef signed int s32;
+
+/* arch/riscv/include/asm/pgtable-64.h */
+#define PGD_SHIFT_L3 (30)
+#define PGD_SHIFT_L4 (39)
+#define PGD_SHIFT_L5 (48)
+
+#define P4D_SHIFT (39)
+#define PUD_SHIFT (30)
+#define PMD_SHIFT (21)
+
+#define PTRS_PER_PGD (512)
+#define PTRS_PER_P4D (512)
+#define PTRS_PER_PUD (512)
+#define PTRS_PER_PMD (512)
+#define PTRS_PER_PTE (512)
+
+/*
+ * Mask for PPN and PROT bit53~0 of PTE
+ * 63 6261 60 54 53 10 9 8 7 6 5 4 3 2 1 0
+ * N PBMT Reserved P P N RSW D A G U X W R V
+ */
+#define PTE_PFN_PROT_MASK 0x3FFFFFFFFFFFFF
+
+/*
+ * 3-levels / 4K pages
+ *
+ * sv39
+ * PGD | PMD | PTE | OFFSET |
+ * 9 | 9 | 9 | 12 |
+ */
+#define pgd_index_l3_4k(addr) (((addr) >> PGD_SHIFT_L3) & (PTRS_PER_PGD -
1))
+#define pmd_index_l3_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pte_index_l3_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
+
+/*
+ * 4-levels / 4K pages
+ *
+ * sv48
+ * PGD | PUD | PMD | PTE | OFFSET |
+ * 9 | 9 | 9 | 9 | 12 |
+ */
+#define pgd_index_l4_4k(addr) (((addr) >> PGD_SHIFT_L4) & (PTRS_PER_PGD -
1))
+#define pud_index_l4_4k(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pmd_index_l4_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pte_index_l4_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
+
+/*
+ * 5-levels / 4K pages
+ *
+ * sv48
+ * PGD | P4D | PUD | PMD | PTE | OFFSET |
+ * 9 | 9 | 9 | 9 | 9 | 12 |
+ */
+#define pgd_index_l5_4k(addr) (((addr) >> PGD_SHIFT_L5) & (PTRS_PER_PGD -
1))
+#define p4d_index_l5_4k(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+#define pud_index_l5_4k(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pmd_index_l5_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pte_index_l5_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
+
+#define VM_L3_4K (0x2)
+#define VM_L3_2M (0x4)
+#define VM_L3_1G (0x8)
+#define VM_L4_4K (0x10)
+#define VM_L4_2M (0x20)
+#define VM_L4_1G (0x40)
+#define VM_L5_4K (0x80)
+#define VM_L5_2M (0x100)
+#define VM_L5_1G (0x200)
+
/*
* Direct memory mapping
*/
@@ -3545,6 +3620,14 @@ struct arm64_stackframe {
#define PHYS_MASK_SHIFT _MAX_PHYSMEM_BITS
#define PHYS_MASK (((1UL) << PHYS_MASK_SHIFT) - 1)
+#define IS_LAST_P4D_READ(p4d) ((ulong)(p4d) == machdep->machspec->last_p4d_read)
+#define FILL_P4D(P4D, TYPE, SIZE) \
+ if (!IS_LAST_P4D_READ(P4D)) { \
+ readmem((ulonglong)((ulong)(P4D)), TYPE, machdep->machspec->p4d, \
+ SIZE, "p4d page", FAULT_ON_ERROR); \
+ machdep->machspec->last_p4d_read = (ulong)(P4D); \
+ }
+
#endif /* RISCV64 */
#ifdef X86
@@ -6810,6 +6893,10 @@ struct machine_specific {
ulong _page_soft;
ulong _pfn_shift;
+ ulong va_bits;
+ char *p4d;
+ ulong last_p4d_read;
+ ulong struct_page_size;
struct riscv64_register *crash_task_regs;
};
@@ -6833,6 +6920,12 @@ struct machine_specific {
#define _PAGE_PROT_NONE _PAGE_READ
#define _PAGE_PFN_SHIFT 10
+/* from 'struct pt_regs' definitions of RISC-V arch */
+#define RISCV64_REGS_EPC 0
+#define RISCV64_REGS_RA 1
+#define RISCV64_REGS_SP 2
+#define RISCV64_REGS_FP 8
+
#endif /* RISCV64 */
/*
diff --git a/diskdump.c b/diskdump.c
index 28503bc..cf5f5d9 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -1531,6 +1531,12 @@ get_diskdump_regs_mips(struct bt_info *bt, ulong *eip, ulong
*esp)
machdep->get_stack_frame(bt, eip, esp);
}
+static void
+get_diskdump_regs_riscv64(struct bt_info *bt, ulong *eip, ulong *esp)
+{
+ machdep->get_stack_frame(bt, eip, esp);
+}
+
static void
get_diskdump_regs_sparc64(struct bt_info *bt, ulong *eip, ulong *esp)
{
@@ -1610,6 +1616,10 @@ get_diskdump_regs(struct bt_info *bt, ulong *eip, ulong *esp)
get_diskdump_regs_sparc64(bt, eip, esp);
break;
+ case EM_RISCV:
+ get_diskdump_regs_riscv64(bt, eip, esp);
+ break;
+
default:
error(FATAL, "%s: unsupported machine type: %s\n",
DISKDUMP_VALID() ? "diskdump" : "compressed kdump",
diff --git a/riscv64.c b/riscv64.c
index c7df857..9d40297 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -16,10 +16,304 @@
#include <elf.h>
#include "defs.h"
+#include <math.h>
+
+static ulong riscv64_get_page_size(void);
+static int riscv64_vtop_3level_4k(ulong *pgd, ulong vaddr,
+ physaddr_t *paddr, int verbose);
+static int riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr,
+ physaddr_t *paddr, int verbose);
+static int riscv64_vtop_5level_4k(ulong *pgd, ulong vaddr,
+ physaddr_t *paddr, int verbose);
+static void riscv64_page_type_init(void);
+static int riscv64_is_kvaddr(ulong vaddr);
+static int riscv64_is_uvaddr(ulong vaddr, struct task_context *tc);
+static int riscv64_uvtop(struct task_context *tc, ulong vaddr,
+ physaddr_t *paddr, int verbose);
+static int riscv64_kvtop(struct task_context *tc, ulong kvaddr,
+ physaddr_t *paddr, int verbose);
(In whole, you can use about 100 chars in a line, if you like.)
+static void riscv64_cmd_mach(void);
+static int riscv64_translate_pte(ulong, void *, ulonglong);
+static int riscv64_init_active_task_regs(void);
+static int riscv64_get_crash_notes(void);
+static int riscv64_get_elf_notes(void);
+static void riscv64_get_va_range(struct machine_specific *ms);
+static void riscv64_get_struct_page_size(struct machine_specific *ms);
+
+#define REG_FMT "%016lx"
+#define SZ_2G 0x80000000
+
+/*
+ * Holds registers during the crash.
+ */
+static struct riscv64_register *panic_task_regs;
+
+/* from arch/riscv/include/asm/stacktrace.h */
+struct stackframe {
+ ulong fp;
+ ulong ra;
+};
+
+static struct machine_specific riscv64_machine_specific = {
+ ._page_present = (1 << 0),
+ ._page_read = (1 << 1),
+ ._page_write = (1 << 2),
+ ._page_exec = (1 << 3),
+ ._page_user = (1 << 4),
+ ._page_global = (1 << 5),
+ ._page_accessed = (1 << 6),
+ ._page_dirty = (1 << 7),
+ ._page_soft = (1 << 8),
+
+ .va_bits = 0,
+ .struct_page_size = 0,
+};
+
+static void
+pt_level_alloc(char **lvl, char *name)
+{
+ size_t sz = PAGESIZE();
+ void *pointer = malloc(sz);
+
+ if (!pointer)
+ error(FATAL, name);
+ *lvl = pointer;
+}
+
+static void
+riscv64_get_phys_ram_base(struct machine_specific *ms)
+{
+ char *string;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(phys_ram_base)"))) {
+ ms->phys_base = atol(string);
+ free(string);
+ } else
+ /*
+ * It can't continue without phys_ram_base. As for qemu rv64
+ * env and hardware platform, phys_ram_base may different.
+ */
+ error(FATAL, "cannot read phys_ram_base\n");
+}
+
+static ulong
+riscv64_get_page_size(void)
+{
+ return memory_page_size();
+}
+
+static ulong
+riscv64_vmalloc_start(void)
+{
+ return ((ulong)VMALLOC_START);
+}
+
+/* Get the size of struct page {} */
+static void riscv64_get_struct_page_size(struct machine_specific *ms)
+{
+ char *string;
+
+ string = pc->read_vmcoreinfo("SIZE(page)");
+ if (string)
+ ms->struct_page_size = atol(string);
+ free(string);
+}
+
+/*
+ * Get the max shift of the size of struct page.
+ * Most of the time, it is 64 bytes, but not sure.
+*/
+static int riscv64_get_struct_page_max_shift(struct machine_specific *ms)
+{
+ return (int)ceil(log2(ms->struct_page_size));
+}
+
+static void
+riscv64_cmd_mach(void)
+{
+ /* TODO: */
+}
+
+static int
+riscv64_verify_symbol(const char *name, ulong value, char type)
+{
+ /* TODO: */
+ return TRUE;
+}
void
riscv64_dump_machdep_table(ulong arg)
{
+ /* TODO: */
+}
+
+static ulong
+riscv64_processor_speed(void)
+{
+ /* TODO: */
+ return 0;
+}
+
+static unsigned long riscv64_get_kernel_version(void)
+{
+ char *string;
+ char buf[BUFSIZE];
+ char *p1, *p2;
+
+ if (THIS_KERNEL_VERSION)
+ return THIS_KERNEL_VERSION;
+
+ string = pc->read_vmcoreinfo("OSRELEASE");
+ if (string) {
+ strcpy(buf, string);
+
+ p1 = p2 = buf;
+ while (*p2 != '.')
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[0] = atoi(p1);
+
+ p1 = ++p2;
+ while (*p2 != '.')
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[1] = atoi(p1);
+
+ p1 = ++p2;
+ while ((*p2 >= '0') && (*p2 <= '9'))
+ p2++;
+ *p2 = NULLCHAR;
+ kt->kernel_version[2] = atoi(p1);
+ }
+ free(string);
+ return THIS_KERNEL_VERSION;
+}
+
+static void riscv64_get_va_range(struct machine_specific *ms)
+{
+ unsigned long kernel_version = riscv64_get_kernel_version();
+ char *string;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VA_BITS)"))) {
+ ms->va_bits = atol(string);
+ free(string);
+ } else
+ goto error;
+ if ((string = pc->read_vmcoreinfo("NUMBER(PAGE_OFFSET)"))) {
+ ms->page_offset = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VMALLOC_START)"))) {
+ ms->vmalloc_start_addr = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VMALLOC_END)"))) {
+ ms->vmalloc_end = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VMEMMAP_START)"))) {
+ ms->vmemmap_vaddr = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(VMEMMAP_END)"))) {
+ ms->vmemmap_end = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(KERNEL_LINK_ADDR)"))) {
+ ms->kernel_link_addr = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(ADDRESS_SPACE_END)"))) {
+ ms->address_space_end = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ /*
+ * From Linux 5.13, the kernel mapping is moved to the last 2GB
+ * of the address space, modules use the 2GB memory range right
+ * before the kernel. Before Linux 5.13, modules area is embedded
+ * in vmalloc area.
+ *
+ * 5.13 = 0x5 << 16 | 0xD << 8
+ */
+ if (kernel_version >= 0x50D00) {
Please use LINUX() macro.
+ if ((string =
pc->read_vmcoreinfo("NUMBER(MODULES_VADDR)"))) {
+ ms->modules_vaddr = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(MODULES_END)"))) {
+ ms->modules_end = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ goto error;
+ } else {
+ ms->modules_vaddr = ms->vmalloc_start_addr;
+ ms->modules_end = ms->vmalloc_end;
+ }
+
+ if (CRASHDEBUG(8)) {
+ fprintf(fp, "va_bits : %ld\n", ms->va_bits);
+ fprintf(fp, "vmemmap : 0x%lx - 0x%lx\n",
+ ms->vmemmap_vaddr, ms->vmemmap_end);
+ fprintf(fp, "vmalloc : 0x%lx - 0x%lx\n",
+ ms->vmalloc_start_addr, ms->vmalloc_end);
+ fprintf(fp, "lowmem : 0x%lx -\n", ms->page_offset);
+ fprintf(fp, "mudules : 0x%lx - 0x%lx\n",
+ ms->modules_vaddr, ms->modules_end);
+ fprintf(fp, "kernel : 0x%lx - 0x%lx\n",
+ ms->kernel_link_addr, ms->address_space_end);
These are essential information, I think CRASHDEBUG(1) would be better
for checking them.
The other patches look good to me.
Thanks,
Kazu
> + }
> + return;
> +error:
> + error(FATAL, "cannot get vm layout\n");
> +}
> +
> +static int
> +riscv64_is_kvaddr(ulong vaddr)
> +{
> + if (IS_VMALLOC_ADDR(vaddr))
> + return TRUE;
> +
> + return (vaddr >= machdep->kvbase);
> +}
> +
> +static int
> +riscv64_is_uvaddr(ulong vaddr, struct task_context *unused)
> +{
> + if (IS_VMALLOC_ADDR(vaddr))
> + return FALSE;
> +
> + return (vaddr < machdep->kvbase);
> +}
> +
> +static int
> +riscv64_is_task_addr(ulong task)
> +{
> + if (tt->flags & THREAD_INFO)
> + return IS_KVADDR(task);
> +
> + return (IS_KVADDR(task) && ALIGNED_STACK_OFFSET(task) == 0);
> +}
> +
> +static int
> +riscv64_get_smp_cpus(void)
> +{
> + return (get_cpus_online() > 0) ? get_cpus_online() : kt->cpus;
> }
>
> /*
> @@ -33,11 +327,700 @@ riscv64_IS_VMALLOC_ADDR(ulong vaddr)
> (vaddr >= MODULES_VADDR && vaddr <= MODULES_END));
> }
>
> +/*
> + * Translate a PTE, returning TRUE if the page is present.
> + * If a physaddr pointer is passed in, don't print anything.
> + */
> +static int
> +riscv64_translate_pte(ulong pte, void *physaddr, ulonglong unused)
> +{
> + char ptebuf[BUFSIZE];
> + char physbuf[BUFSIZE];
> + char buf[BUFSIZE];
> + int page_present;
> + int len1, len2, others;
> + ulong paddr;
> +
> + paddr = PTOB(pte >> _PAGE_PFN_SHIFT);
> + page_present = !!(pte & _PAGE_PRESENT);
> +
> + if (physaddr) {
> + *(ulong *)physaddr = paddr;
> + return page_present;
> + }
> +
> + sprintf(ptebuf, "%lx", pte);
> + len1 = MAX(strlen(ptebuf), strlen("PTE"));
> + fprintf(fp, "%s ", mkstring(buf, len1, CENTER | LJUST,
"PTE"));
> +
> + if (!page_present)
> + return page_present;
> +
> + sprintf(physbuf, "%lx", paddr);
> + len2 = MAX(strlen(physbuf), strlen("PHYSICAL"));
> + fprintf(fp, "%s ", mkstring(buf, len2, CENTER | LJUST,
"PHYSICAL"));
> +
> + fprintf(fp, "FLAGS\n");
> + fprintf(fp, "%s %s ",
> + mkstring(ptebuf, len1, CENTER | RJUST, NULL),
> + mkstring(physbuf, len2, CENTER | RJUST, NULL));
> +
> + fprintf(fp, "(");
> + others = 0;
> +
> +#define CHECK_PAGE_FLAG(flag) \
> + if ((_PAGE_##flag) && (pte & _PAGE_##flag)) \
> + fprintf(fp, "%s" #flag, others++ ? "|" : "")
> + if (pte) {
> + CHECK_PAGE_FLAG(PRESENT);
> + CHECK_PAGE_FLAG(READ);
> + CHECK_PAGE_FLAG(WRITE);
> + CHECK_PAGE_FLAG(EXEC);
> + CHECK_PAGE_FLAG(USER);
> + CHECK_PAGE_FLAG(GLOBAL);
> + CHECK_PAGE_FLAG(ACCESSED);
> + CHECK_PAGE_FLAG(DIRTY);
> + CHECK_PAGE_FLAG(SOFT);
> + } else {
> + fprintf(fp, "no mapping");
> + }
> +
> + fprintf(fp, ")\n");
> +
> + return page_present;
> +}
> +
> +static void
> +riscv64_page_type_init(void)
> +{
> + ulong va_bits = machdep->machspec->va_bits;
> +
> + /*
> + * For RISCV64 arch, any level of PTE may be a leaf PTE,
> + * so in addition to 4KiB pages,
> + * Sv39 supports 2 MiB megapages, 1 GiB gigapages;
> + * Sv48 supports 2 MiB megapages, 1 GiB gigapages, 512 GiB terapages;
> + * Sv57 supports 2 MiB megapages, 1 GiB gigapages, 512 GiB terapages, and 256 TiB
petapages.
> + *
> + * refs to riscv-privileged spec.
> + *
> + * We just support 4KiB, 2MiB, 1GiB now.
> + */
> + switch (machdep->pagesize)
> + {
> + case 0x1000: // 4 KiB
> + machdep->flags |= (va_bits == 57 ? VM_L5_4K :
> + (va_bits == 48 ? VM_L4_4K : VM_L3_4K));
> + break;
> + case 0x200000: // 2 MiB
> + /* TODO: */
> + case 0x40000000: // 1 GiB
> + /* TODO: */
> + default:
> + if (machdep->pagesize)
> + error(FATAL, "invalid/unsupported page size: %d\n",
> + machdep->pagesize);
> + else
> + error(FATAL, "cannot determine page size\n");
> + }
> +}
> +
> +static int
> +riscv64_vtop_3level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose)
> +{
> + ulong *pgd_ptr, pgd_val;
> + ulong *pmd_ptr, pmd_val;
> + ulong *pte_ptr, pte_val, pte_pfn;
> + ulong pt_phys;
> +
> + /* PGD */
> + pgd_ptr = pgd + pgd_index_l3_4k(vaddr);
> + FILL_PGD(pgd, KVADDR, PAGESIZE());
> + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
> + if (verbose)
> + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val);
> + if (!pgd_val)
> + goto no_page;
> + pgd_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* PMD */
> + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) *
> + pmd_index_l3_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val);
> + if (!pmd_val)
> + goto no_page;
> + pmd_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* PTE */
> + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) *
> + pte_index_l3_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val);
> + if (!pte_val)
> + goto no_page;
> + pte_val &= PTE_PFN_PROT_MASK;
> + pte_pfn = pte_val >> _PAGE_PFN_SHIFT;
> +
> + if (!(pte_val & _PAGE_PRESENT)) {
> + if (verbose) {
> + fprintf(fp, "\n");
> + riscv64_translate_pte((ulong)pte_val, 0, 0);
> + }
> + fprintf(fp, " PAGE: %016lx not present\n\n", PAGEBASE(*paddr));
> + return FALSE;
> + }
> +
> + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr);
> +
> + if (verbose) {
> + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr));
> + riscv64_translate_pte(pte_val, 0, 0);
> + }
> +
> + return TRUE;
> +no_page:
> + fprintf(fp, "invalid\n");
> + return FALSE;
> +}
> +
> +static int
> +riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose)
> +{
> + ulong *pgd_ptr, pgd_val;
> + ulong *pud_ptr, pud_val;
> + ulong *pmd_ptr, pmd_val;
> + ulong *pte_ptr, pte_val, pte_pfn;
> + ulong pt_phys;
> +
> + /* PGD */
> + pgd_ptr = pgd + pgd_index_l4_4k(vaddr);
> + FILL_PGD(pgd, KVADDR, PAGESIZE());
> + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
> + if (verbose)
> + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val);
> + if (!pgd_val)
> + goto no_page;
> + pgd_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* PUD */
> + FILL_PUD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + pud_val = ULONG(machdep->pud + PAGEOFFSET(sizeof(pud_t) *
> + pud_index_l4_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " PUD: %016lx => %016lx\n", (ulong)pud_ptr, pud_val);
> + if (!pud_val)
> + goto no_page;
> + pud_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (pud_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* PMD */
> + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) *
> + pmd_index_l4_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val);
> + if (!pmd_val)
> + goto no_page;
> + pmd_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* PTE */
> + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) *
> + pte_index_l4_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val);
> + if (!pte_val)
> + goto no_page;
> + pte_val &= PTE_PFN_PROT_MASK;
> + pte_pfn = pte_val >> _PAGE_PFN_SHIFT;
> +
> + if (!(pte_val & _PAGE_PRESENT)) {
> + if (verbose) {
> + fprintf(fp, "\n");
> + riscv64_translate_pte((ulong)pte_val, 0, 0);
> + }
> + fprintf(fp, " PAGE: %016lx not present\n\n", PAGEBASE(*paddr));
> + return FALSE;
> + }
> +
> + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr);
> +
> + if (verbose) {
> + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr));
> + riscv64_translate_pte(pte_val, 0, 0);
> + }
> +
> + return TRUE;
> +no_page:
> + fprintf(fp, "invalid\n");
> + return FALSE;
> +}
> +
> +static int
> +riscv64_vtop_5level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose)
> +{
> + ulong *pgd_ptr, pgd_val;
> + ulong *p4d_ptr, p4d_val;
> + ulong *pud_ptr, pud_val;
> + ulong *pmd_ptr, pmd_val;
> + ulong *pte_ptr, pte_val, pte_pfn;
> + ulong pt_phys;
> +
> + /* PGD */
> + pgd_ptr = pgd + pgd_index_l5_4k(vaddr);
> + FILL_PGD(pgd, KVADDR, PAGESIZE());
> + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
> + if (verbose)
> + fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd_ptr, pgd_val);
> + if (!pgd_val)
> + goto no_page;
> + pgd_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (pgd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* P4D */
> + FILL_P4D(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + p4d_val = ULONG(machdep->machspec->p4d + PAGEOFFSET(sizeof(p4d_t) *
> + p4d_index_l5_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " P4D: %016lx => %016lx\n", (ulong)p4d_ptr, p4d_val);
> + if (!p4d_val)
> + goto no_page;
> + p4d_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (p4d_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* PUD */
> + FILL_PUD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + pud_val = ULONG(machdep->pud + PAGEOFFSET(sizeof(pud_t) *
> + pud_index_l5_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " PUD: %016lx => %016lx\n", (ulong)pud_ptr, pud_val);
> + if (!pud_val)
> + goto no_page;
> + pud_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (pud_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* PMD */
> + FILL_PMD(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(sizeof(pmd_t) *
> + pmd_index_l4_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " PMD: %016lx => %016lx\n", (ulong)pmd_ptr, pmd_val);
> + if (!pmd_val)
> + goto no_page;
> + pmd_val &= PTE_PFN_PROT_MASK;
> + pt_phys = (pmd_val >> _PAGE_PFN_SHIFT) << PAGESHIFT();
> +
> + /* PTE */
> + FILL_PTBL(PAGEBASE(pt_phys), PHYSADDR, PAGESIZE());
> + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(sizeof(pte_t) *
> + pte_index_l4_4k(vaddr)));
> + if (verbose)
> + fprintf(fp, " PTE: %lx => %lx\n", (ulong)pte_ptr, pte_val);
> + if (!pte_val)
> + goto no_page;
> + pte_val &= PTE_PFN_PROT_MASK;
> + pte_pfn = pte_val >> _PAGE_PFN_SHIFT;
> +
> + if (!(pte_val & _PAGE_PRESENT)) {
> + if (verbose) {
> + fprintf(fp, "\n");
> + riscv64_translate_pte((ulong)pte_val, 0, 0);
> + }
> + printf("!_PAGE_PRESENT\n");
> + return FALSE;
> + }
> +
> + *paddr = PTOB(pte_pfn) + PAGEOFFSET(vaddr);
> +
> + if (verbose) {
> + fprintf(fp, " PAGE: %016lx\n\n", PAGEBASE(*paddr));
> + riscv64_translate_pte(pte_val, 0, 0);
> + }
> +
> + return TRUE;
> +no_page:
> + fprintf(fp, "invalid\n");
> + return FALSE;
> +}
> +
> +static int
> +riscv64_init_active_task_regs(void)
> +{
> + int retval;
> +
> + retval = riscv64_get_crash_notes();
> + if (retval == TRUE)
> + return retval;
> +
> + return riscv64_get_elf_notes();
> +}
> +
> +/*
> + * Retrieve task registers for the time of the crash.
> + */
> +static int
> +riscv64_get_crash_notes(void)
> +{
> + struct machine_specific *ms = machdep->machspec;
> + ulong crash_notes;
> + Elf64_Nhdr *note;
> + ulong offset;
> + char *buf, *p;
> + ulong *notes_ptrs;
> + ulong i;
> +
> + /*
> + * crash_notes contains per cpu memory for storing cpu states
> + * in case of system crash.
> + */
> + if (!symbol_exists("crash_notes"))
> + return FALSE;
> +
> + crash_notes = symbol_value("crash_notes");
> +
> + notes_ptrs = (ulong *)GETBUF(kt->cpus*sizeof(notes_ptrs[0]));
> +
> + /*
> + * Read crash_notes for the first CPU. crash_notes are in standard ELF
> + * note format.
> + */
> + if (!readmem(crash_notes, KVADDR, ¬es_ptrs[kt->cpus-1],
> + sizeof(notes_ptrs[kt->cpus-1]), "crash_notes",
> + RETURN_ON_ERROR)) {
> + error(WARNING, "cannot read crash_notes\n");
> + FREEBUF(notes_ptrs);
> + return FALSE;
> + }
> +
> + if (symbol_exists("__per_cpu_offset")) {
> +
> + /*
> + * Add __per_cpu_offset for each cpu to form the pointer to the notes
> + */
> + for (i = 0; i < kt->cpus; i++)
> + notes_ptrs[i] = notes_ptrs[kt->cpus-1] + kt->__per_cpu_offset[i];
> + }
> +
> + buf = GETBUF(SIZE(note_buf));
> +
> + if (!(panic_task_regs = calloc((size_t)kt->cpus, sizeof(*panic_task_regs))))
> + error(FATAL, "cannot calloc panic_task_regs space\n");
> +
> + for (i = 0; i < kt->cpus; i++) {
> +
> + if (!readmem(notes_ptrs[i], KVADDR, buf, SIZE(note_buf), "note_buf_t",
> + RETURN_ON_ERROR)) {
> + error(WARNING,
> + "cannot find NT_PRSTATUS note for cpu: %d\n", i);
> + goto fail;
> + }
> +
> + /*
> + * Do some sanity checks for this note before reading registers from it.
> + */
> + note = (Elf64_Nhdr *)buf;
> + p = buf + sizeof(Elf64_Nhdr);
> +
> + /*
> + * dumpfiles created with qemu won't have crash_notes, but there will
> + * be elf notes; dumpfiles created by kdump do not create notes for
> + * offline cpus.
> + */
> + if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE()))
{
> + if (DISKDUMP_DUMPFILE())
> + note = diskdump_get_prstatus_percpu(i);
> + else if (KDUMP_DUMPFILE())
> + note = netdump_get_prstatus_percpu(i);
> + if (note) {
> + /*
> + * SIZE(note_buf) accounts for a "final note", which is a
> + * trailing empty elf note header.
> + */
> + long notesz = SIZE(note_buf) - sizeof(Elf64_Nhdr);
> +
> + if (sizeof(Elf64_Nhdr) + roundup(note->n_namesz, 4) +
> + note->n_descsz == notesz)
> + BCOPY((char *)note, buf, notesz);
> + } else {
> + error(WARNING,
> + "cannot find NT_PRSTATUS note for cpu: %d\n", i);
> + continue;
> + }
> + }
> +
> + /*
> + * Check the sanity of NT_PRSTATUS note only for each online cpu.
> + */
> + if (note->n_type != NT_PRSTATUS) {
> + error(WARNING, "invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n");
> + goto fail;
> + }
> + if (!STRNEQ(p, "CORE")) {
> + error(WARNING, "invalid NT_PRSTATUS note (name !=
\"CORE\"\n");
> + goto fail;
> + }
> +
> + /*
> + * Find correct location of note data. This contains elf_prstatus
> + * structure which has registers etc. for the crashed task.
> + */
> + offset = sizeof(Elf64_Nhdr);
> + offset = roundup(offset + note->n_namesz, 4);
> + p = buf + offset; /* start of elf_prstatus */
> +
> + BCOPY(p + OFFSET(elf_prstatus_pr_reg), &panic_task_regs[i],
> + sizeof(panic_task_regs[i]));
> + }
> +
> + /*
> + * And finally we have the registers for the crashed task. This is
> + * used later on when dumping backtrace.
> + */
> + ms->crash_task_regs = panic_task_regs;
> +
> + FREEBUF(buf);
> + FREEBUF(notes_ptrs);
> + return TRUE;
> +
> +fail:
> + FREEBUF(buf);
> + FREEBUF(notes_ptrs);
> + free(panic_task_regs);
> + return FALSE;
> +}
> +
> +static int
> +riscv64_get_elf_notes(void)
> +{
> + struct machine_specific *ms = machdep->machspec;
> + int i;
> +
> + if (!DISKDUMP_DUMPFILE() && !KDUMP_DUMPFILE())
> + return false;
> +
> + panic_task_regs = calloc(kt->cpus, sizeof(*panic_task_regs));
> + if (!panic_task_regs)
> + error(FATAL, "cannot calloc panic_task_regs space\n");
> +
> + for (i = 0; i < kt->cpus; i++) {
> + Elf64_Nhdr *note = NULL;
> + size_t len;
> +
> + if (DISKDUMP_DUMPFILE())
> + note = diskdump_get_prstatus_percpu(i);
> + else if (KDUMP_DUMPFILE())
> + note = netdump_get_prstatus_percpu(i);
> +
> + if (!note) {
> + error(WARNING,
> + "cannot find NT_PRSTATUS note for cpu: %d\n", i);
> + continue;
> + }
> +
> + len = sizeof(Elf64_Nhdr);
> + len = roundup(len + note->n_namesz, 4);
> +
> + BCOPY((char *)note + len + OFFSET(elf_prstatus_pr_reg),
> + &panic_task_regs[i], sizeof(panic_task_regs[i]));
> + }
> +
> + ms->crash_task_regs = panic_task_regs;
> +
> + return TRUE;
> +}
> +
> +/*
> + * Translates a user virtual address to its physical address.
> + */
> +static int
> +riscv64_uvtop(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, int
verbose)
> +{
> + ulong mm, active_mm;
> + ulong *pgd;
> +
> + if (!tc)
> + error(FATAL, "current context invalid\n");
> +
> + *paddr = 0;
> +
> + if (is_kernel_thread(tc->task) && IS_KVADDR(uvaddr)) {
> + readmem(tc->task + OFFSET(task_struct_active_mm),
> + KVADDR, &active_mm, sizeof(void *),
> + "task active_mm contents", FAULT_ON_ERROR);
> +
> + if (!active_mm)
> + error(FATAL,
> + "no active_mm for this kernel thread\n");
> +
> + readmem(active_mm + OFFSET(mm_struct_pgd),
> + KVADDR, &pgd, sizeof(long),
> + "mm_struct pgd", FAULT_ON_ERROR);
> + } else {
> + if ((mm = task_mm(tc->task, TRUE)))
> + pgd = ULONG_PTR(tt->mm_struct + OFFSET(mm_struct_pgd));
> + else
> + readmem(tc->mm_struct + OFFSET(mm_struct_pgd),
> + KVADDR, &pgd, sizeof(long), "mm_struct pgd",
> + FAULT_ON_ERROR);
> + }
> +
> + switch (machdep->flags & (VM_L3_4K | VM_L4_4K | VM_L5_4K))
> + {
> + case VM_L3_4K:
> + return riscv64_vtop_3level_4k(pgd, uvaddr, paddr, verbose);
> + case VM_L4_4K:
> + return riscv64_vtop_4level_4k(pgd, uvaddr, paddr, verbose);
> + case VM_L5_4K:
> + return riscv64_vtop_5level_4k(pgd, uvaddr, paddr, verbose);
> + default:
> + return FALSE;
> + }
> +}
> +
> +static int
> +riscv64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int
verbose)
> +{
> + ulong kernel_pgd;
> +
> + if (!IS_KVADDR(kvaddr))
> + return FALSE;
> +
> + if (!vt->vmalloc_start) {
> + *paddr = VTOP(kvaddr);
> + return TRUE;
> + }
> +
> + if (!IS_VMALLOC_ADDR(kvaddr)) {
> + *paddr = VTOP(kvaddr);
> + if (!verbose)
> + return TRUE;
> + }
> +
> + kernel_pgd = vt->kernel_pgd[0];
> + *paddr = 0;
> +
> + switch (machdep->flags & (VM_L3_4K | VM_L4_4K | VM_L5_4K))
> + {
> + case VM_L3_4K:
> + return riscv64_vtop_3level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose);
> + case VM_L4_4K:
> + return riscv64_vtop_4level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose);
> + case VM_L5_4K:
> + return riscv64_vtop_5level_4k((ulong *)kernel_pgd, kvaddr, paddr, verbose);
> + default:
> + return FALSE;
> + }
> +}
> +
> void
> riscv64_init(int when)
> {
> + switch (when) {
> + case SETUP_ENV:
> + machdep->process_elf_notes = process_elf64_notes;
> + break;
> +
> + case PRE_SYMTAB:
> + machdep->verify_symbol = riscv64_verify_symbol;
> + machdep->machspec = &riscv64_machine_specific;
> + if (pc->flags & KERNEL_DEBUG_QUERY)
> + return;
> +
> + machdep->verify_paddr = generic_verify_paddr;
> + machdep->ptrs_per_pgd = PTRS_PER_PGD;
> + break;
> +
> + case PRE_GDB:
> + machdep->pagesize = riscv64_get_page_size();
> + machdep->pageshift = ffs(machdep->pagesize) - 1;
> + machdep->pageoffset = machdep->pagesize - 1;
> + machdep->pagemask = ~((ulonglong)machdep->pageoffset);
> + machdep->stacksize = machdep->pagesize << THREAD_SIZE_ORDER;
> +
> + riscv64_get_phys_ram_base(machdep->machspec);
> + riscv64_get_struct_page_size(machdep->machspec);
> + riscv64_get_va_range(machdep->machspec);
> +
> + pt_level_alloc(&machdep->pgd, "cannot malloc pgd space.");
> + pt_level_alloc(&machdep->machspec->p4d, "cannot malloc p4d
space.");
> + pt_level_alloc(&machdep->pud, "cannot malloc pud space.");
> + pt_level_alloc(&machdep->pmd, "cannot malloc pmd space.");
> + pt_level_alloc(&machdep->ptbl, "cannot malloc ptbl space.");
> +
> + machdep->last_pgd_read = 0;
> + machdep->machspec->last_p4d_read = 0;
> + machdep->last_pud_read = 0;
> + machdep->last_pmd_read = 0;
> + machdep->last_ptbl_read = 0;
> +
> + machdep->kvbase = machdep->machspec->page_offset;
> + machdep->identity_map_base = machdep->kvbase;
> + machdep->is_kvaddr = riscv64_is_kvaddr;
> + machdep->is_uvaddr = riscv64_is_uvaddr;
> + machdep->uvtop = riscv64_uvtop;
> + machdep->kvtop = riscv64_kvtop;
> + machdep->cmd_mach = riscv64_cmd_mach;
> +
> + machdep->vmalloc_start = riscv64_vmalloc_start;
> + machdep->processor_speed = riscv64_processor_speed;
> + machdep->get_stackbase = generic_get_stackbase;
> + machdep->get_stacktop = generic_get_stacktop;
> + machdep->translate_pte = riscv64_translate_pte;
> + machdep->memory_size = generic_memory_size;
> + machdep->is_task_addr = riscv64_is_task_addr;
> + machdep->get_smp_cpus = riscv64_get_smp_cpus;
> + machdep->value_to_symbol = generic_machdep_value_to_symbol;
> + machdep->show_interrupts = generic_show_interrupts;
> + machdep->get_irq_affinity = generic_get_irq_affinity;
> + machdep->init_kernel_pgd = NULL; /* pgd set by
symbol_value("swapper_pg_dir") */
> + break;
> +
> + case POST_GDB:
> + machdep->section_size_bits = _SECTION_SIZE_BITS;
> + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
> + riscv64_page_type_init();
> +
> + if (!machdep->hz)
> + machdep->hz = 250;
> +
> + if (symbol_exists("irq_desc"))
> + ARRAY_LENGTH_INIT(machdep->nr_irqs, irq_desc,
> + "irq_desc", NULL, 0);
> + else if (kernel_symbol_exists("nr_irqs"))
> + get_symbol_data("nr_irqs", sizeof(unsigned int),
> + &machdep->nr_irqs);
> +
> + MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus",
> + "pr_reg");
> +
> + STRUCT_SIZE_INIT(note_buf, "note_buf_t");
> + break;
> +
> + case POST_VM:
> + /*
> + * crash_notes contains machine specific information about the
> + * crash. In particular, it contains CPU registers at the time
> + * of the crash. We need this information to extract correct
> + * backtraces from the panic task.
> + */
> + if (!ACTIVE() && !riscv64_init_active_task_regs())
> + error(WARNING,
> + "cannot retrieve registers for active task%s\n\n",
> + kt->cpus > 1 ? "s" : "");
> + break;
> + }
> }
>
> +/*
> + * 'help -r' command output
> + */
> void
> riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
> {