Hi kuan-ying,
On Thu, Jul 25, 2024 at 2:53 PM Tao Liu <ltao(a)redhat.com> wrote:
Hi Kuan-Ying,
On Thu, Jul 25, 2024 at 2:44 PM <kuan-ying.lee(a)canonical.com> wrote:
>
> Hi crash maintainers,
>
> Could you please help review this patch that supports 16K page size with a 3-level
page table and 47 bits?
>
I don't have much comments for this patch, it looks good so far.
However I'd like to have the patch tested. I'm wondering if there are
linux distros which have enabled 16K page, or can I just compile the
kernel source with 16K page config enabled on any distro like fedora?
I guess there might be problems for makedumpfile when creating vmcore
of 16K page. Any suggestions?
Thanks,
Tao Liu
Yes, it is on my to-do list, since it is a large patch, I need to
take
some time for testing and reviewing.
Thanks,
Tao Liu
> Thank you,
> Kuan-Ying Lee
>
> > Introduction of ARM64 support for 16K page size with 3-level page
> > table and 47 VA bits.
> >
> > Signed-off-by: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
> > ---
> > arm64.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > defs.h | 16 ++++++++
> > 2 files changed, 126 insertions(+), 4 deletions(-)
> >
> > diff --git a/arm64.c b/arm64.c
> > index b3040d757946..5356ae1f5cce 100644
> > --- a/arm64.c
> > +++ b/arm64.c
> > @@ -42,6 +42,7 @@ static int arm64_kvtop(struct task_context *, ulong,
physaddr_t *,
> > int);
> > static int arm64_uvtop(struct task_context *, ulong, physaddr_t *, int);
> > static int arm64_vtop_2level_64k(ulong, ulong, physaddr_t *, int);
> > static int arm64_vtop_3level_64k(ulong, ulong, physaddr_t *, int);
> > +static int arm64_vtop_3level_16k(ulong, ulong, physaddr_t *, int);
> > static int arm64_vtop_3level_4k(ulong, ulong, physaddr_t *, int);
> > static int arm64_vtop_4level_4k(ulong, ulong, physaddr_t *, int);
> > static ulong arm64_get_task_pgd(ulong);
> > @@ -261,8 +262,7 @@ arm64_init(int when)
> > machdep->pagesize = 4096;
> > break;
> > case 2:
> > - /* TODO: machdep->pagesize = 16384; */
> > - error(FATAL, "16K pages not
supported.");
> > + machdep->pagesize = 16384;
> > break;
> > case 3:
> > machdep->pagesize = 65536;
> > @@ -392,6 +392,26 @@ arm64_init(int when)
> > error(FATAL, "cannot malloc ptbl
space.");
> > break;
> >
> > + case 16384:
> > + if (machdep->machspec->VA_BITS >
PGDIR_SHIFT_L3_16K) {
> > + machdep->flags |= VM_L3_16K;
> > + if (!machdep->ptrs_per_pgd)
> > + machdep->ptrs_per_pgd =
PTRS_PER_PGD_L3_16K;
> > + if ((machdep->pgd =
> > + (char *)malloc(machdep->ptrs_per_pgd * 8))
== NULL)
> > + error(FATAL, "cannot malloc pgd
space.");
> > + if ((machdep->pmd =
> > + (char *)malloc(PTRS_PER_PMD_L3_16K * 8)) ==
NULL)
> > + error(FATAL, "cannot malloc pmd
space.");
> > + if ((machdep->ptbl =
> > + (char *)malloc(PTRS_PER_PTE_L3_16K * 8)) ==
NULL)
> > + error(FATAL, "cannot malloc ptbl
space.");
> > + } else {
> > + error(FATAL, "we only support 47 bits, 3
level for 16K page now.");
> > + }
> > + machdep->pud = NULL; /* not used */
> > + break;
> > +
> > case 65536:
> > if (kernel_symbol_exists("idmap_ptrs_per_pgd")
&&
> > readmem(symbol_value("idmap_ptrs_per_pgd"),
KVADDR,
> > @@ -1018,6 +1038,8 @@ arm64_dump_machdep_table(ulong arg)
> > fprintf(fp, "%sVM_L2_64K", others++ ? "|" :
"");
> > if (machdep->flags & VM_L3_64K)
> > fprintf(fp, "%sVM_L3_64K", others++ ? "|" :
"");
> > + if (machdep->flags & VM_L3_16K)
> > + fprintf(fp, "%sVM_L3_16K", others++ ? "|" :
"");
> > if (machdep->flags & VM_L3_4K)
> > fprintf(fp, "%sVM_L3_4K", others++ ? "|" :
"");
> > if (machdep->flags & VM_L4_4K)
> > @@ -1065,6 +1087,8 @@ arm64_dump_machdep_table(ulong arg)
> > "arm64_vtop_3level_4k" :
> > machdep->flags & VM_L4_4K ?
> > "arm64_vtop_4level_4k" :
> > + machdep->flags & VM_L3_16K ?
> > + "arm64_vtop_3level_16k" :
> > machdep->flags & VM_L3_64K ?
> > "arm64_vtop_3level_64k" :
"arm64_vtop_2level_64k");
> > fprintf(fp, " kvtop: arm64_kvtop()->%s()\n",
> > @@ -1072,6 +1096,8 @@ arm64_dump_machdep_table(ulong arg)
> > "arm64_vtop_3level_4k" :
> > machdep->flags & VM_L4_4K ?
> > "arm64_vtop_4level_4k" :
> > + machdep->flags & VM_L3_16K ?
> > + "arm64_vtop_3level_16k" :
> > machdep->flags & VM_L3_64K ?
> > "arm64_vtop_3level_64k" :
"arm64_vtop_2level_64k");
> > fprintf(fp, " get_task_pgd: arm64_get_task_pgd()\n");
> > @@ -1107,6 +1133,7 @@ arm64_dump_machdep_table(ulong arg)
> > fprintf(fp, " last_pgd_read: %lx\n",
machdep->last_pgd_read);
> > fprintf(fp, " last_pud_read: ");
> > if ((PAGESIZE() == 65536) ||
> > + (PAGESIZE() == 16384) ||
> > ((PAGESIZE() == 4096) && !(machdep->flags &
VM_L4_4K)))
> > fprintf(fp, "(not used)\n");
> > else
> > @@ -1761,7 +1788,7 @@ arm64_kvtop(struct task_context *tc, ulong kvaddr,
physaddr_t
> > *paddr, int verbos
> > kernel_pgd = vt->kernel_pgd[0];
> > *paddr = 0;
> >
> > - switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K|VM_L4_4K))
> > + switch (machdep->flags &
(VM_L2_64K|VM_L3_64K|VM_L3_4K|VM_L4_4K|VM_L3_16K))
> > {
> > case VM_L2_64K:
> > return arm64_vtop_2level_64k(kernel_pgd, kvaddr, paddr,
verbose);
> > @@ -1771,6 +1798,8 @@ arm64_kvtop(struct task_context *tc, ulong kvaddr,
physaddr_t
> > *paddr, int verbos
> > return arm64_vtop_3level_4k(kernel_pgd, kvaddr, paddr, verbose);
> > case VM_L4_4K:
> > return arm64_vtop_4level_4k(kernel_pgd, kvaddr, paddr, verbose);
> > + case VM_L3_16K:
> > + return arm64_vtop_3level_16k(kernel_pgd, kvaddr, paddr,
verbose);
> > default:
> > return FALSE;
> > }
> > @@ -1786,7 +1815,7 @@ arm64_uvtop(struct task_context *tc, ulong uvaddr,
physaddr_t
> > *paddr, int verbos
> >
> > *paddr = 0;
> >
> > - switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K|VM_L4_4K))
> > + switch (machdep->flags &
(VM_L2_64K|VM_L3_64K|VM_L3_4K|VM_L4_4K|VM_L3_16K))
> > {
> > case VM_L2_64K:
> > return arm64_vtop_2level_64k(user_pgd, uvaddr, paddr, verbose);
> > @@ -1796,6 +1825,8 @@ arm64_uvtop(struct task_context *tc, ulong uvaddr,
physaddr_t
> > *paddr, int verbos
> > return arm64_vtop_3level_4k(user_pgd, uvaddr, paddr, verbose);
> > case VM_L4_4K:
> > return arm64_vtop_4level_4k(user_pgd, uvaddr, paddr, verbose);
> > + case VM_L3_16K:
> > + return arm64_vtop_3level_16k(user_pgd, uvaddr, paddr, verbose);
> > default:
> > return FALSE;
> > }
> > @@ -1812,6 +1843,7 @@ arm64_uvtop(struct task_context *tc, ulong uvaddr,
physaddr_t
> > *paddr, int verbos
> > #define PMD_TYPE_SECT 1
> > #define PMD_TYPE_TABLE 2
> > #define SECTION_PAGE_MASK_2MB ((long)(~((MEGABYTES(2))-1)))
> > +#define SECTION_PAGE_MASK_32MB ((long)(~((MEGABYTES(32))-1)))
> > #define SECTION_PAGE_MASK_512MB ((long)(~((MEGABYTES(512))-1)))
> > #define SECTION_PAGE_MASK_1GB ((long)(~((GIGABYTES(1))-1)))
> >
> > @@ -1954,6 +1986,80 @@ no_page:
> > return FALSE;
> > }
> >
> > +static int
> > +arm64_vtop_3level_16k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
> > +{
> > + ulong *pgd_base, *pgd_ptr, pgd_val;
> > + ulong *pmd_base, *pmd_ptr, pmd_val;
> > + ulong *pte_base, *pte_ptr, pte_val;
> > +
> > + if (verbose)
> > + fprintf(fp, "PAGE DIRECTORY: %lx\n", pgd);
> > +
> > + pgd_base = (ulong *)pgd;
> > + FILL_PGD(pgd_base, KVADDR, machdep->ptrs_per_pgd * sizeof(ulong));
> > + pgd_ptr = pgd_base + (((vaddr) >> PGDIR_SHIFT_L3_16K) &
> > (machdep->ptrs_per_pgd - 1));
> > + pgd_val = ULONG(machdep->pgd + PGDIR_OFFSET_L3_16K(pgd_ptr));
> > + if (verbose)
> > + fprintf(fp, " PGD: %lx => %lx\n",
(ulong)pgd_ptr,
> > pgd_val);
> > + if (!pgd_val)
> > + goto no_page;
> > +
> > + /*
> > + * #define __PAGETABLE_PUD_FOLDED
> > + */
> > +
> > + pmd_base = (ulong *)PTOV(PTE_TO_PHYS(pgd_val));
> > + FILL_PMD(pmd_base, KVADDR, PTRS_PER_PMD_L3_16K * sizeof(ulong));
> > + pmd_ptr = pmd_base + (((vaddr) >> PMD_SHIFT_L3_16K) &
(PTRS_PER_PMD_L3_16K -
> > 1));
> > + pmd_val = ULONG(machdep->pmd + PAGEOFFSET(pmd_ptr));
> > + if (verbose)
> > + fprintf(fp, " PMD: %lx => %lx\n",
(ulong)pmd_ptr,
> > pmd_val);
> > + if (!pmd_val)
> > + goto no_page;
> > +
> > + if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
> > + ulong sectionbase = PTE_TO_PHYS(pmd_val) &
SECTION_PAGE_MASK_32MB;
> > + if (verbose) {
> > + fprintf(fp, " PAGE: %lx (32MB%s)\n\n",
sectionbase,
> > + IS_ZEROPAGE(sectionbase) ? ", ZERO
PAGE" : "");
> > + arm64_translate_pte(pmd_val, 0, 0);
> > + }
> > + *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_32MB);
> > + return TRUE;
> > + }
> > +
> > + pte_base = (ulong *)PTOV(PTE_TO_PHYS(pmd_val));
> > + FILL_PTBL(pte_base, KVADDR, PTRS_PER_PTE_L3_16K * sizeof(ulong));
> > + pte_ptr = pte_base + (((vaddr) >> machdep->pageshift) &
> > (PTRS_PER_PTE_L3_16K - 1));
> > + pte_val = ULONG(machdep->ptbl + PAGEOFFSET(pte_ptr));
> > + if (verbose)
> > + fprintf(fp, " PTE: %lx => %lx\n",
(ulong)pte_ptr,
> > pte_val);
> > + if (!pte_val)
> > + goto no_page;
> > +
> > + if (pte_val & PTE_VALID) {
> > + *paddr = PTE_TO_PHYS(pte_val) + PAGEOFFSET(vaddr);
> > + if (verbose) {
> > + fprintf(fp, " PAGE: %lx %s\n\n",
PAGEBASE(*paddr),
> > + IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO
PAGE)" : "");
> > + arm64_translate_pte(pte_val, 0, 0);
> > + }
> > + } else {
> > + if (IS_UVADDR(vaddr, NULL))
> > + *paddr = pte_val;
> > + if (verbose) {
> > + fprintf(fp, "\n");
> > + arm64_translate_pte(pte_val, 0, 0);
> > + }
> > + goto no_page;
> > + }
> > +
> > + return TRUE;
> > +no_page:
> > + return FALSE;
> > +}
> > +
> > static int
> > arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
> > {
> > diff --git a/defs.h b/defs.h
> > index 49e6923ede54..1b7649d9f05c 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -3302,6 +3302,21 @@ typedef signed int s32;
> > #define PGDIR_MASK_48VA (~(PGDIR_SIZE_48VA - 1))
> > #define PGDIR_OFFSET_48VA(X) (((ulong)(X)) & (PGDIR_SIZE_48VA - 1))
> >
> > +/*
> > + * 3-levels / 16K pages
> > + * 47-bit VA
> > + */
> > +#define PTRS_PER_PGD_L3_16K ((1UL) << (47 - 36))
> > +#define PTRS_PER_PMD_L3_16K (2048)
> > +#define PTRS_PER_PTE_L3_16K (2048)
> > +#define PGDIR_SHIFT_L3_16K (36)
> > +#define PGDIR_SIZE_L3_16K ((1UL) << PGDIR_SHIFT_L3_16K)
> > +#define PGDIR_MASK_L3_16K (~(PGDIR_SIZE_L3_16K-1))
> > +#define PMD_SHIFT_L3_16K (25)
> > +#define PMD_SIZE_L3_16K (1UL << PMD_SHIFT_L3_16K)
> > +#define PMD_MASK_L3_16K (~(PMD_SIZE_L3_16K-1))
> > +#define PGDIR_OFFSET_L3_16K(X) (((ulong)(X)) & ((machdep->ptrs_per_pgd
* 8) - 1))
> > +
> > /*
> > * 3-levels / 64K pages
> > */
> > @@ -3367,6 +3382,7 @@ typedef signed int s32;
> > #define HAS_PHYSVIRT_OFFSET (0x800)
> > #define OVERFLOW_STACKS (0x1000)
> > #define ARM64_MTE (0x2000)
> > +#define VM_L3_16K (0x4000)
> >
> > /*
> > * Get kimage_voffset from /dev/crash
> --
> Crash-utility mailing list -- devel(a)lists.crash-utility.osci.io
> To unsubscribe send an email to devel-leave(a)lists.crash-utility.osci.io
> https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
> Contribution Guidelines:
https://github.com/crash-utility/crash/wiki