Starting with kernel v4.6, page tables store physical addresses
in upper page table tree levels for book3s. In book3s, Page table
for 64K pagesize is switched to 4-level (pud not folded anymore) to
support both hash and radix page tables in a single kernel. Also,
the pmd masked bits value is changed since kernel v4.5 for book3e.
This patch updates the corresponding changes here.
Signed-off-by: Hari Bathini <hbathini(a)linux.vnet.ibm.com>
---
defs.h | 34 +++++++++++++++++---
ppc64.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 129 insertions(+), 13 deletions(-)
diff --git a/defs.h b/defs.h
index ce49f18..a56fa65 100644
--- a/defs.h
+++ b/defs.h
@@ -3812,6 +3812,8 @@ struct efi_memory_desc_t {
#define PUD_INDEX_SIZE_L4_4K_3_7 9
#define PTE_RPN_SHIFT_L4_4K 17
#define PTE_RPN_SHIFT_L4_4K_4_5 18
+#define PGD_MASKED_BITS_4K 0
+#define PUD_MASKED_BITS_4K 0
#define PMD_MASKED_BITS_4K 0
/* 64K pagesize */
@@ -3822,15 +3824,30 @@ struct efi_memory_desc_t {
#define PTE_INDEX_SIZE_L4_64K_3_10 8
#define PMD_INDEX_SIZE_L4_64K_3_10 10
#define PGD_INDEX_SIZE_L4_64K_3_10 12
+#define PMD_INDEX_SIZE_L4_64K_4_6 5
+#define PUD_INDEX_SIZE_L4_64K_4_6 5
#define PTE_RPN_SHIFT_L4_64K_V1 32
#define PTE_RPN_SHIFT_L4_64K_V2 30
#define PTE_RPN_SHIFT_L4_BOOK3E_64K 28
#define PTE_RPN_SHIFT_L4_BOOK3E_4K 24
+#define PGD_MASKED_BITS_64K 0
+#define PUD_MASKED_BITS_64K 0x1ff
#define PMD_MASKED_BITS_64K 0x1ff
+#define PMD_MASKED_BITS_64K_3_11 0xfff
+#define PMD_MASKED_BITS_BOOK3E_64K_4_5 0x7ff
+#define PGD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL
+#define PUD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL
+#define PMD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL
+
+#define PTE_RPN_MASK_DEFAULT 0xffffffffffffffffUL
+#define PTE_RPN_SIZE_L4_4_6 (PAGESIZE() == PPC64_64K_PAGE_SIZE ? 41 : 45)
+#define PTE_RPN_MASK_L4_4_6 (((1UL << PTE_RPN_SIZE_L4_4_6) - 1) <<
PAGESHIFT())
+#define PTE_RPN_SHIFT_L4_4_6 PAGESHIFT()
#define PD_HUGE 0x8000000000000000
#define HUGE_PTE_MASK 0x03
#define HUGEPD_SHIFT_MASK 0x3f
+#define HUGEPD_ADDR_MASK (0x0fffffffffffffffUL & ~HUGEPD_SHIFT_MASK)
#define PGD_MASK_L4 \
(THIS_KERNEL_VERSION >= LINUX(3,10,0) ? (machdep->ptrs_per_pgd - 1) : 0x1ff)
@@ -3855,6 +3872,9 @@ struct efi_memory_desc_t {
#define _PAGE_DIRTY (machdep->machspec->_page_dirty) /* C: page changed
*/
#define _PAGE_ACCESSED (machdep->machspec->_page_accessed) /* R: page
referenced */
+#define PTE_RPN_MASK (machdep->machspec->pte_rpn_mask)
+#define PTE_RPN_SHIFT (machdep->machspec->pte_rpn_shift)
+
#define TIF_SIGPENDING (2)
#define SWP_TYPE(entry) (((entry) >> 1) & 0x7f)
@@ -5690,7 +5710,10 @@ struct machine_specific {
uint l1_shift;
uint pte_rpn_shift;
- uint l2_masked_bits;
+ ulong pte_rpn_mask;
+ ulong pgd_masked_bits;
+ ulong pud_masked_bits;
+ ulong pmd_masked_bits;
int vmemmap_cnt;
int vmemmap_psize;
@@ -5714,10 +5737,11 @@ void ppc64_init(int);
void ppc64_dump_machdep_table(ulong);
#define display_idt_table() \
error(FATAL, "-d option is not applicable to PowerPC architecture\n")
-#define KSYMS_START (0x1)
-#define VM_ORIG (0x2)
-#define VMEMMAP_AWARE (0x4)
-#define BOOK3E (0x8)
+#define KSYMS_START (0x1)
+#define VM_ORIG (0x2)
+#define VMEMMAP_AWARE (0x4)
+#define BOOK3E (0x8)
+#define PHYS_ENTRY_L4 (0x10)
#define REGION_SHIFT (60UL)
#define REGION_ID(addr) (((unsigned long)(addr)) >> REGION_SHIFT)
diff --git a/ppc64.c b/ppc64.c
index 8733d64..4e18513 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -59,6 +59,9 @@ static uint get_ptetype(ulong pte);
static int is_hugepage(ulong pte);
static int is_hugepd(ulong pte);
static ulong hugepage_dir(ulong pte);
+static ulong pgd_page_vaddr_l4(ulong pgd);
+static ulong pud_page_vaddr_l4(ulong pud);
+static ulong pmd_page_vaddr_l4(ulong pmd);
static inline uint get_ptetype(ulong pte)
{
@@ -127,10 +130,57 @@ static inline ulong hugepage_dir(ulong pte)
if ((machdep->flags & BOOK3E) ||
(THIS_KERNEL_VERSION < LINUX(3,10,0)))
return (ulong)((pte & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
- else
+ else if (machdep->flags & PHYS_ENTRY_L4)
+ return PTOV(pte & ~HUGEPD_ADDR_MASK);
+ else /* BOOK3S, kernel v3.10 - v4.4 */
return (ulong)(pte & ~HUGEPD_SHIFT_MASK);
}
+static inline ulong pgd_page_vaddr_l4(ulong pgd)
+{
+ ulong pgd_val;
+
+ pgd_val = (pgd & ~machdep->machspec->pgd_masked_bits);
+ if (machdep->flags & PHYS_ENTRY_L4) {
+ /*
+ * physical address is stored starting from kernel v4.6
+ */
+ pgd_val = PTOV(pgd_val);
+ }
+
+ return pgd_val;
+}
+
+static inline ulong pud_page_vaddr_l4(ulong pud)
+{
+ ulong pud_val;
+
+ pud_val = (pud & ~machdep->machspec->pud_masked_bits);
+ if (machdep->flags & PHYS_ENTRY_L4) {
+ /*
+ * physical address is stored starting from kernel v4.6
+ */
+ pud_val = PTOV(pud_val);
+ }
+
+ return pud_val;
+}
+
+static inline ulong pmd_page_vaddr_l4(ulong pmd)
+{
+ ulong pmd_val;
+
+ pmd_val = (pmd & ~machdep->machspec->pmd_masked_bits);
+ if (machdep->flags & PHYS_ENTRY_L4) {
+ /*
+ * physical address is stored starting from kernel v4.6
+ */
+ pmd_val = PTOV(pmd_val);
+ }
+
+ return pmd_val;
+}
+
static int book3e_is_kvaddr(ulong addr)
{
return (addr >= BOOK3E_VMBASE);
@@ -322,10 +372,12 @@ ppc64_init(int when)
* Starting with kernel v4.6, to accommodate both
* radix and hash MMU modes in a single kernel,
* _PAGE_PTE & _PAGE_PRESENT page flags are changed.
+ * Also, page table entries store physical addresses.
*/
if (THIS_KERNEL_VERSION >= LINUX(4,6,0)) {
m->_page_pte = 0x1UL << 62;
m->_page_present = 0x1UL << 63;
+ machdep->flags |= PHYS_ENTRY_L4;
}
}
@@ -346,7 +398,14 @@ ppc64_init(int when)
struct machine_specific *m = machdep->machspec;
if (machdep->pagesize == 65536) {
/* 64K pagesize */
- if (THIS_KERNEL_VERSION >= LINUX(3,10,0)) {
+ if (!(machdep->flags & BOOK3E) &&
+ (THIS_KERNEL_VERSION >= LINUX(4,6,0))) {
+ m->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
+ m->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_6;
+ m->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_6;
+ m->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10;
+
+ } else if (THIS_KERNEL_VERSION >= LINUX(3,10,0)) {
m->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
m->l2_index_size = PMD_INDEX_SIZE_L4_64K_3_10;
m->l3_index_size = PUD_INDEX_SIZE_L4_64K;
@@ -358,10 +417,27 @@ ppc64_init(int when)
m->l3_index_size = PUD_INDEX_SIZE_L4_64K;
m->l4_index_size = PGD_INDEX_SIZE_L4_64K;
}
+
if (!(machdep->flags & BOOK3E))
m->pte_rpn_shift = symbol_exists("demote_segment_4k") ?
PTE_RPN_SHIFT_L4_64K_V2 : PTE_RPN_SHIFT_L4_64K_V1;
- m->l2_masked_bits = PMD_MASKED_BITS_64K;
+
+ if (!(machdep->flags & BOOK3E) &&
+ (THIS_KERNEL_VERSION >= LINUX(4,6,0))) {
+ m->pgd_masked_bits = PGD_MASKED_BITS_64K_4_6;
+ m->pud_masked_bits = PUD_MASKED_BITS_64K_4_6;
+ m->pmd_masked_bits = PMD_MASKED_BITS_64K_4_6;
+ } else {
+ m->pgd_masked_bits = PGD_MASKED_BITS_64K;
+ m->pud_masked_bits = PUD_MASKED_BITS_64K;
+ if ((machdep->flags & BOOK3E) &&
+ (THIS_KERNEL_VERSION >= LINUX(4,5,0)))
+ m->pmd_masked_bits = PMD_MASKED_BITS_BOOK3E_64K_4_5;
+ else if (THIS_KERNEL_VERSION >= LINUX(3,11,0))
+ m->pmd_masked_bits = PMD_MASKED_BITS_64K_3_11;
+ else
+ m->pmd_masked_bits = PMD_MASKED_BITS_64K;
+ }
} else {
/* 4K pagesize */
m->l1_index_size = PTE_INDEX_SIZE_L4_4K;
@@ -371,12 +447,22 @@ ppc64_init(int when)
else
m->l3_index_size = PUD_INDEX_SIZE_L4_4K;
m->l4_index_size = PGD_INDEX_SIZE_L4_4K;
+
if (machdep->flags & BOOK3E)
m->pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_4K;
else
m->pte_rpn_shift = THIS_KERNEL_VERSION >= LINUX(4,5,0) ?
PTE_RPN_SHIFT_L4_4K_4_5 : PTE_RPN_SHIFT_L4_4K;
- m->l2_masked_bits = PMD_MASKED_BITS_4K;
+ m->pgd_masked_bits = PGD_MASKED_BITS_4K;
+ m->pud_masked_bits = PUD_MASKED_BITS_4K;
+ m->pmd_masked_bits = PMD_MASKED_BITS_4K;
+ }
+
+ m->pte_rpn_mask = PTE_RPN_MASK_DEFAULT;
+ if (!(machdep->flags & BOOK3E) &&
+ (THIS_KERNEL_VERSION >= LINUX(4,6,0))) {
+ m->pte_rpn_mask = PTE_RPN_MASK_L4_4_6;
+ m->pte_rpn_shift = PTE_RPN_SHIFT_L4_4_6;
}
/* Compute ptrs per each level */
@@ -645,8 +731,11 @@ ppc64_dump_machdep_table(ulong arg)
fprintf(fp, " l3_shift: %d\n",
machdep->machspec->l3_shift);
fprintf(fp, " l2_shift: %d\n",
machdep->machspec->l2_shift);
fprintf(fp, " l1_shift: %d\n",
machdep->machspec->l1_shift);
+ fprintf(fp, " pte_rpn_mask: %lx\n",
machdep->machspec->pte_rpn_mask);
fprintf(fp, " pte_rpn_shift: %d\n",
machdep->machspec->pte_rpn_shift);
- fprintf(fp, " l2_masked_bits: %x\n",
machdep->machspec->l2_masked_bits);
+ fprintf(fp, " pgd_masked_bits: %lx\n",
machdep->machspec->pgd_masked_bits);
+ fprintf(fp, " pud_masked_bits: %lx\n",
machdep->machspec->pud_masked_bits);
+ fprintf(fp, " pmd_masked_bits: %lx\n",
machdep->machspec->pmd_masked_bits);
fprintf(fp, " vmemmap_base: ");
if (machdep->machspec->vmemmap_base)
fprintf(fp, "%lx\n", machdep->machspec->vmemmap_base);
@@ -786,6 +875,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int
verbose)
/* Sometimes we don't have level3 pagetable entries */
if (machdep->machspec->l3_index_size != 0) {
+ pgd_pte = pgd_page_vaddr_l4(pgd_pte);
page_upper = (ulong *)((ulong *)pgd_pte + PUD_OFFSET_L4(vaddr));
FILL_PUD(PAGEBASE(pgd_pte), KVADDR, PAGESIZE());
pud_pte = ULONG(machdep->pud + PAGEOFFSET(page_upper));
@@ -805,6 +895,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int
verbose)
pud_pte = pgd_pte;
}
+ pud_pte = pud_page_vaddr_l4(pud_pte);
page_middle = (ulong *)((ulong *)pud_pte + PMD_OFFSET_L4(vaddr));
FILL_PMD(PAGEBASE(pud_pte), KVADDR, PAGESIZE());
pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(page_middle));
@@ -822,7 +913,8 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int
verbose)
goto out;
}
- page_table = (ulong *)(pmd_pte & ~(machdep->machspec->l2_masked_bits))
+ pmd_pte = pmd_page_vaddr_l4(pmd_pte);
+ page_table = (ulong *)(pmd_pte)
+ (BTOP(vaddr) & (machdep->machspec->ptrs_per_l1 - 1));
if (verbose)
fprintf(fp, " PMD: %lx => %lx\n",(ulong)page_middle,
@@ -867,10 +959,10 @@ out:
return FALSE;
}
- *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_rpn_shift))
+ *paddr = PAGEBASE(PTOB((pte & PTE_RPN_MASK) >> PTE_RPN_SHIFT))
+ (vaddr & ((1UL << pdshift) - 1));
} else {
- *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_rpn_shift))
+ *paddr = PAGEBASE(PTOB((pte & PTE_RPN_MASK) >> PTE_RPN_SHIFT))
+ PAGEOFFSET(vaddr);
}