Hi Jim,
The problem looks to be the use of PAGEOFFSET() into the 512-byte pgd used
for 3-level/64K-pages:
static int
arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
{
ulong *pgd_base, *pgd_ptr, pgd_val;
ulong *pmd_base, *pmd_ptr, pmd_val;
ulong *pte_base, *pte_ptr, pte_val;
if (verbose)
fprintf(fp, "PAGE DIRECTORY: %lx\n", pgd);
pgd_base = (ulong *)pgd;
FILL_PGD(pgd_base, KVADDR, PTRS_PER_PGD_L3_64K * sizeof(ulong));
pgd_ptr = pgd_base + (((vaddr) >> PGDIR_SHIFT_L3_64K) &
(PTRS_PER_PGD_L3_64K - 1));
pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
^^^^^^^^^^^^^^^^^^
For kernel vmalloc addresses, the kernel's "swapper_pg_dir" is used, which
has a base
address that's aligned on a 64k page boundary. So PAGEOFFSET() works as an index.
But for user-space addresses, the mm_struct->pgd is not 64K page aligned so the
use of PAGEOFFSET() calculates an address that's goes way beyond the end of the
512-byte aligned pgd:
crash> for user vm -p | grep -e PGD -e "k "
MM PGD RSS TOTAL_VM
ffff8003d61ec240 ffff8003d5b12c00 11904k 12992k
MM PGD RSS TOTAL_VM
ffff8003d60aec40 ffff8003dcb7ca00 4864k 88448k
MM PGD RSS TOTAL_VM
ffff8000dbf1dc80 ffff8003d69bca00 8704k 92928k
MM PGD RSS TOTAL_VM
ffff8000dbf1e700 ffff8003d69b4a00 8768k 17536k
MM PGD RSS TOTAL_VM
ffff8003d65c9300 ffff8000d4055600 5696k 16576k
MM PGD RSS TOTAL_VM
ffff8003d65c9300 ffff8000d4055600 5696k 16576k
MM PGD RSS TOTAL_VM
ffff8003d65c9840 ffff8000d4051800 1664k 2560k
MM PGD RSS TOTAL_VM
ffff8003d60a8340 ffff8003dcb7fa00 10944k 303552k
MM PGD RSS TOTAL_VM
ffff8003d65c8880 ffff8000d4052c00 19456k 353600k
MM PGD RSS TOTAL_VM
ffff8003d61ab7c0 ffff8003dc86b800 4864k 14016k
MM PGD RSS TOTAL_VM
ffff8000dbf18340 ffff8003d69b1400 7424k 157632k
MM PGD RSS TOTAL_VM
ffff8003d60a8340 ffff8003dcb7fa00 10944k 303552k
MM PGD RSS TOTAL_VM
ffff8003d60a8340 ffff8003dcb7fa00 10944k 303552k
MM PGD RSS TOTAL_VM
ffff8000dbf18340 ffff8003d69b1400 7424k 157632k
MM PGD RSS TOTAL_VM
ffff8000dbf18340 ffff8003d69b1400 7424k 157632k
MM PGD RSS TOTAL_VM
ffff8000dbf18340 ffff8003d69b1400 7424k 157632k
MM PGD RSS TOTAL_VM
ffff8000dbf18340 ffff8003d69b1400 7424k 157632k
MM PGD RSS TOTAL_VM
ffff8000dbf18340 ffff8003d69b1400 7424k 157632k
MM PGD RSS TOTAL_VM
ffff8003d61ab7c0 ffff8003dc86b800 4864k 14016k
MM PGD RSS TOTAL_VM
ffff8000c403b7c0 ffff8003d6597800 4928k 5888k
MM PGD RSS TOTAL_VM
ffff8003d59db7c0 ffff8003dcd18600 4352k 5248k
MM PGD RSS TOTAL_VM
ffff8003d61ead40 ffff8003d4107400 3776k 5760k
MM PGD RSS TOTAL_VM
ffff8000c403ad40 ffff8003d6592000 4224k 5120k
MM PGD RSS TOTAL_VM
ffff8003d60a78c0 ffff8003dcb72600 2496k 109888k
MM PGD RSS TOTAL_VM
ffff8003d61a8880 ffff8003dc86b200 2176k 109888k
MM PGD RSS TOTAL_VM
ffff8003d65c8880 ffff8000d4052c00 19456k 353600k
MM PGD RSS TOTAL_VM
ffff8003d65c8880 ffff8000d4052c00 19456k 353600k
...
Dave
----- Original Message -----
----- Original Message -----
> > Adds ARM64 support for 3-level page tables with 64K pages and 48 VA bits.
>
> Nicely done, Jim. Queued for crash-7.1.5:
>
>
https://github.com/crash-utility/crash/commit/ab91852f945bfecfa0bca6a4225...
>
> Thanks,
> Dave
>
Hi Jim,
I just noticed today that your 3-level 64K patch does not work for user
virtual address space.
I haven't looked too deeply into it, but for example on a live system, all
user virtual address
vtop operations fail, all disk-backed user memory space shows the "FILE:"
backing, and the
anonymous space shows "(not mapped)":
crash> help -m | grep VM
flags: 10400069
(KSYMS_START|VM_L3_64K|VMEMMAP|KDUMP_ENABLED|IRQ_STACKS|MACHDEP_BT_TEXT)
crash> sys | grep RELEASE
RELEASE: 4.5.0-0.38.el7.aarch64
crash> set
PID: 1212
COMMAND: "crash"
TASK: ffff8003d74f3f00 [THREAD_INFO: ffff8003d7454000]
CPU: 1
STATE: TASK_RUNNING (ACTIVE)
crash> vm -p
PID: 1212 TASK: ffff8003d74f3f00 CPU: 1 COMMAND: "crash"
MM PGD RSS TOTAL_VM
ffff8000c40363c0 ffff8003db6a9200 211904k 355264k
VMA START END FLAGS FILE
ffff8003de746d40 400000 a00000 875 /root/crash.git/crash
VIRTUAL PHYSICAL
400000 FILE: /root/crash.git/crash OFFSET: 0
410000 FILE: /root/crash.git/crash OFFSET: 10000
420000 FILE: /root/crash.git/crash OFFSET: 20000
430000 FILE: /root/crash.git/crash OFFSET: 30000
440000 FILE: /root/crash.git/crash OFFSET: 40000
450000 FILE: /root/crash.git/crash OFFSET: 50000
... [ cut ] ...
VMA START END FLAGS FILE
ffff8003de745d70 a50000 b00000 100073
VIRTUAL PHYSICAL
a50000 (not mapped)
a60000 (not mapped)
a70000 (not mapped)
a80000 (not mapped)
a90000 (not mapped)
aa0000 (not mapped)
ab0000 (not mapped)
ac0000 (not mapped)
...
In all cases, the PGD value reads as 0 and therefore fails:
crash> vtop 400000
VIRTUAL PHYSICAL
400000 (not mapped)
PAGE DIRECTORY: ffff8003db6a9200
PGD: ffff8003db6a9200 => 0
VMA START END FLAGS FILE
ffff8003de746d40 400000 a00000 875 /root/crash.git/crash
FILE: /root/crash.git/crash OFFSET: 0
crash>
That is the correct PGD address, and when read, it looks like a valid PTE:
crash> rd ffff8003db6a9200
ffff8003db6a9200: 00000043dee60003 ....C...
crash>
vmalloc() addresses translate just fine, and since they use the same
function,
I'm not sure what's going on? Did you ever check user-space translations?
Thanks,
Dave
>
>
> > ---
> > arm64.c | 126
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
> > defs.h | 28 +++++++++++----
> > 2 files changed, 133 insertions(+), 21 deletions(-)
> >
> > diff --git a/arm64.c b/arm64.c
> > index f6ea7a1..d1c9c3e 100644
> > --- a/arm64.c
> > +++ b/arm64.c
> > @@ -34,6 +34,7 @@ static void arm64_init_kernel_pgd(void);
> > static int arm64_kvtop(struct task_context *, ulong, physaddr_t *, int);
> > static int arm64_uvtop(struct task_context *, ulong, physaddr_t *, int);
> > static int arm64_vtop_2level_64k(ulong, ulong, physaddr_t *, int);
> > +static int arm64_vtop_3level_64k(ulong, ulong, physaddr_t *, int);
> > static int arm64_vtop_3level_4k(ulong, ulong, physaddr_t *, int);
> > static ulong arm64_get_task_pgd(ulong);
> > static void arm64_irq_stack_init(void);
> > @@ -188,15 +189,29 @@ arm64_init(int when)
> > break;
> >
> > case 65536:
> > - machdep->flags |= VM_L2_64K;
> > - machdep->ptrs_per_pgd = PTRS_PER_PGD_L2_64K;
> > - if ((machdep->pgd =
> > - (char *)malloc(PTRS_PER_PGD_L2_64K * 8)) == NULL)
> > - error(FATAL, "cannot malloc pgd space.");
> > - if ((machdep->ptbl =
> > - (char *)malloc(PTRS_PER_PTE_L2_64K * 8)) == NULL)
> > - error(FATAL, "cannot malloc ptbl space.");
> > - machdep->pmd = NULL; /* not used */
> > + if (machdep->machspec->VA_BITS > PGDIR_SHIFT_L3_64K) {
> > + machdep->flags |= VM_L3_64K;
> > + machdep->ptrs_per_pgd = PTRS_PER_PGD_L3_64K;
> > + if ((machdep->pgd =
> > + (char *)malloc(PTRS_PER_PGD_L3_64K * 8)) == NULL)
> > + error(FATAL, "cannot malloc pgd space.");
> > + if ((machdep->pmd =
> > + (char *)malloc(PTRS_PER_PMD_L3_64K * 8)) == NULL)
> > + error(FATAL, "cannot malloc pmd space.");
> > + if ((machdep->ptbl =
> > + (char *)malloc(PTRS_PER_PTE_L3_64K * 8)) == NULL)
> > + error(FATAL, "cannot malloc ptbl space.");
> > + } else {
> > + machdep->flags |= VM_L2_64K;
> > + machdep->ptrs_per_pgd = PTRS_PER_PGD_L2_64K;
> > + if ((machdep->pgd =
> > + (char *)malloc(PTRS_PER_PGD_L2_64K * 8)) == NULL)
> > + error(FATAL, "cannot malloc pgd space.");
> > + if ((machdep->ptbl =
> > + (char *)malloc(PTRS_PER_PTE_L2_64K * 8)) == NULL)
> > + error(FATAL, "cannot malloc ptbl space.");
> > + machdep->pmd = NULL; /* not used */
> > + }
> > machdep->pud = NULL; /* not used */
> > break;
> >
> > @@ -379,6 +394,8 @@ arm64_dump_machdep_table(ulong arg)
> > fprintf(fp, "%sPHYS_OFFSET", others++ ? "|" :
"");
> > if (machdep->flags & VM_L2_64K)
> > fprintf(fp, "%sVM_L2_64K", others++ ? "|" :
"");
> > + if (machdep->flags & VM_L3_64K)
> > + fprintf(fp, "%sVM_L3_64K", others++ ? "|" :
"");
> > if (machdep->flags & VM_L3_4K)
> > fprintf(fp, "%sVM_L3_4K", others++ ? "|" :
"");
> > if (machdep->flags & VMEMMAP)
> > @@ -410,10 +427,14 @@ arm64_dump_machdep_table(ulong arg)
> > fprintf(fp, " processor_speed: arm64_processor_speed()\n");
> > fprintf(fp, " uvtop: arm64_uvtop()->%s()\n",
> > machdep->flags & VM_L3_4K ?
> > - "arm64_vtop_3level_4k" : "arm64_vtop_2level_64k");
> > + "arm64_vtop_3level_4k" :
> > + machdep->flags & VM_L3_64K ?
> > + "arm64_vtop_3level_64k" : "arm64_vtop_2level_64k");
> > fprintf(fp, " kvtop: arm64_kvtop()->%s()\n",
> > machdep->flags & VM_L3_4K ?
> > - "arm64_vtop_3level_4k" : "arm64_vtop_2level_64k");
> > + "arm64_vtop_3level_4k" :
> > + machdep->flags & VM_L3_64K ?
> > + "arm64_vtop_3level_64k" : "arm64_vtop_2level_64k");
> > fprintf(fp, " get_task_pgd: arm64_get_task_pgd()\n");
> > fprintf(fp, " dump_irq: generic_dump_irq()\n");
> > fprintf(fp, " get_stack_frame: arm64_get_stack_frame()\n");
> > @@ -719,10 +740,12 @@ arm64_kvtop(struct task_context *tc, ulong kvaddr,
> > physaddr_t *paddr, int verbos
> > kernel_pgd = vt->kernel_pgd[0];
> > *paddr = 0;
> >
> > - switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > + switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> > {
> > case VM_L2_64K:
> > return arm64_vtop_2level_64k(kernel_pgd, kvaddr, paddr, verbose);
> > + case VM_L3_64K:
> > + return arm64_vtop_3level_64k(kernel_pgd, kvaddr, paddr, verbose);
> > case VM_L3_4K:
> > return arm64_vtop_3level_4k(kernel_pgd, kvaddr, paddr, verbose);
> > default:
> > @@ -740,10 +763,12 @@ arm64_uvtop(struct task_context *tc, ulong uvaddr,
> > physaddr_t *paddr, int verbos
> >
> > *paddr = 0;
> >
> > - switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > + switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> > {
> > case VM_L2_64K:
> > return arm64_vtop_2level_64k(user_pgd, uvaddr, paddr, verbose);
> > + case VM_L3_64K:
> > + return arm64_vtop_3level_64k(user_pgd, uvaddr, paddr, verbose);
> > case VM_L3_4K:
> > return arm64_vtop_3level_4k(user_pgd, uvaddr, paddr, verbose);
> > default:
> > @@ -820,6 +845,78 @@ no_page:
> > return FALSE;
> > }
> >
> > +static int
> > +arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int
> > verbose)
> > +{
> > + ulong *pgd_base, *pgd_ptr, pgd_val;
> > + ulong *pmd_base, *pmd_ptr, pmd_val;
> > + ulong *pte_base, *pte_ptr, pte_val;
> > +
> > + if (verbose)
> > + fprintf(fp, "PAGE DIRECTORY: %lx\n", pgd);
> > +
> > + pgd_base = (ulong *)pgd;
> > + FILL_PGD(pgd_base, KVADDR, PTRS_PER_PGD_L3_64K * sizeof(ulong));
> > + pgd_ptr = pgd_base + (((vaddr) >> PGDIR_SHIFT_L3_64K) &
> > (PTRS_PER_PGD_L3_64K - 1));
> > + pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
> > + if (verbose)
> > + fprintf(fp, " PGD: %lx => %lx\n",
(ulong)pgd_ptr,
> > pgd_val);
> > + if (!pgd_val)
> > + goto no_page;
> > +
> > + /*
> > + * #define __PAGETABLE_PUD_FOLDED
> > + */
> > +
> > + pmd_base = (ulong *)PTOV(pgd_val & PHYS_MASK &
(s32)machdep->pagemask);
> > + FILL_PMD(pmd_base, KVADDR, PTRS_PER_PMD_L3_64K * sizeof(ulong));
> > + pmd_ptr = pmd_base + (((vaddr) >> PMD_SHIFT_L3_64K) &
> > (PTRS_PER_PMD_L3_64K
> > - 1));
> > + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(pmd_ptr));
> > + if (verbose)
> > + fprintf(fp, " PMD: %lx => %lx\n",
(ulong)pmd_ptr,
> > pmd_val);
> > + if (!pmd_val)
> > + goto no_page;
> > +
> > + if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
> > + ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_512MB) &
PHYS_MASK;
> > + if (verbose) {
> > + fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
> > + arm64_translate_pte(pmd_val, 0, 0);
> > + }
> > + *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
> > + return TRUE;
> > + }
> > +
> > + pte_base = (ulong *)PTOV(pmd_val & PHYS_MASK &
(s32)machdep->pagemask);
> > + FILL_PTBL(pte_base, KVADDR, PTRS_PER_PTE_L3_64K * sizeof(ulong));
> > + pte_ptr = pte_base + (((vaddr) >> machdep->pageshift) &
> > (PTRS_PER_PTE_L3_64K - 1));
> > + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(pte_ptr));
> > + if (verbose)
> > + fprintf(fp, " PTE: %lx => %lx\n",
(ulong)pte_ptr,
> > pte_val);
> > + if (!pte_val)
> > + goto no_page;
> > +
> > + if (pte_val & PTE_VALID) {
> > + *paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
> > + if (verbose) {
> > + fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
> > + arm64_translate_pte(pte_val, 0, 0);
> > + }
> > + } else {
> > + if (IS_UVADDR(vaddr, NULL))
> > + *paddr = pte_val;
> > + if (verbose) {
> > + fprintf(fp, "\n");
> > + arm64_translate_pte(pte_val, 0, 0);
> > + }
> > + goto no_page;
> > + }
> > +
> > + return TRUE;
> > +no_page:
> > + return FALSE;
> > +}
> > +
> > static int
> > arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int
> > verbose)
> > {
> > @@ -2348,9 +2445,10 @@ arm64_calc_virtual_memory_ranges(void)
> >
> > STRUCT_SIZE_INIT(page, "page");
> >
> > - switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > + switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> > {
> > case VM_L2_64K:
> > + case VM_L3_64K:
> > PUD_SIZE = PGDIR_SIZE_L2_64K;
> > break;
> > case VM_L3_4K:
> > diff --git a/defs.h b/defs.h
> > index 56ae06c..d1b49d0 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -2815,7 +2815,7 @@ typedef u64 pte_t;
> >
> > typedef signed int s32;
> >
> > -/*
> > +/*
> > * 3-levels / 4K pages
> > */
> > #define PTRS_PER_PGD_L3_4K (512)
> > @@ -2823,10 +2823,23 @@ typedef signed int s32;
> > #define PTRS_PER_PTE_L3_4K (512)
> > #define PGDIR_SHIFT_L3_4K (30)
> > #define PGDIR_SIZE_L3_4K ((1UL) << PGDIR_SHIFT_L3_4K)
> > -#define PGDIR_MASK_L3 4K (~(PGDIR_SIZE_L3_4K-1))
> > +#define PGDIR_MASK_L3_4K (~(PGDIR_SIZE_L3_4K-1))
> > #define PMD_SHIFT_L3_4K (21)
> > -#define PMD_SIZE_L3_4K (1UL << PMD_SHIFT_4K)
> > -#define PMD_MASK_L3 4K (~(PMD_SIZE_4K-1))
> > +#define PMD_SIZE_L3_4K (1UL << PMD_SHIFT_L3_4K)
> > +#define PMD_MASK_L3_4K (~(PMD_SIZE_L3_4K-1))
> > +
> > +/*
> > + * 3-levels / 64K pages
> > + */
> > +#define PTRS_PER_PGD_L3_64K (64)
> > +#define PTRS_PER_PMD_L3_64K (8192)
> > +#define PTRS_PER_PTE_L3_64K (8192)
> > +#define PGDIR_SHIFT_L3_64K (42)
> > +#define PGDIR_SIZE_L3_64K ((1UL) << PGDIR_SHIFT_L3_64K)
> > +#define PGDIR_MASK_L3_64K (~(PGDIR_SIZE_L3_64K-1))
> > +#define PMD_SHIFT_L3_64K (29)
> > +#define PMD_SIZE_L3_64K (1UL << PMD_SHIFT_L3_64K)
> > +#define PMD_MASK_L3_64K (~(PMD_SIZE_L3_64K-1))
> >
> > /*
> > * 2-levels / 64K pages
> > @@ -2868,9 +2881,10 @@ typedef signed int s32;
> > #define KSYMS_START (0x1)
> > #define PHYS_OFFSET (0x2)
> > #define VM_L2_64K (0x4)
> > -#define VM_L3_4K (0x8)
> > -#define KDUMP_ENABLED (0x10)
> > -#define IRQ_STACKS (0x20)
> > +#define VM_L3_64K (0x8)
> > +#define VM_L3_4K (0x10)
> > +#define KDUMP_ENABLED (0x20)
> > +#define IRQ_STACKS (0x40)
> >
> > /*
> > * sources: Documentation/arm64/memory.txt
> > --
> > 2.1.4
> >
> > --
> > Crash-utility mailing list
> > Crash-utility redhat com
> >
https://www.redhat.com/mailman/listinfo/crash-utility
> >