Starting with kernel v4.5, _PAGE_COHERENT page flag is no longer used.
In kernel v4.5, a new page flag _PAGE_PTE is introduced to distinguish
between PTEs and pointers. Also, pte RPN shift value for 4K page size
is increased by one to accommodate page soft dirty tracking. This patch
addresses these changes here. While we are here, address a TODO for huge
pages and also, replace all instances of pte_shift with pte_rpn_shift to
be in sync with how it is referred to in the kernel.
Signed-off-by: Hari Bathini <hbathini(a)linux.vnet.ibm.com>
---
defs.h | 17 +++++---
ppc64.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++--------------
2 files changed, 116 insertions(+), 38 deletions(-)
diff --git a/defs.h b/defs.h
index 6e537b9..ce49f18 100644
--- a/defs.h
+++ b/defs.h
@@ -3787,7 +3787,7 @@ struct efi_memory_desc_t {
#define PMD_MASK (~((1UL << PMD_SHIFT) - 1))
/* shift to put page number into pte */
-#define PTE_SHIFT 16
+#define PTE_RPN_SHIFT_DEFAULT 16
#define PMD_TO_PTEPAGE_SHIFT 2 /* Used for 2.6 or later */
#define PTE_INDEX_SIZE 9
@@ -3810,7 +3810,8 @@ struct efi_memory_desc_t {
#define PUD_INDEX_SIZE_L4_4K 7
#define PGD_INDEX_SIZE_L4_4K 9
#define PUD_INDEX_SIZE_L4_4K_3_7 9
-#define PTE_SHIFT_L4_4K 17
+#define PTE_RPN_SHIFT_L4_4K 17
+#define PTE_RPN_SHIFT_L4_4K_4_5 18
#define PMD_MASKED_BITS_4K 0
/* 64K pagesize */
@@ -3821,10 +3822,10 @@ struct efi_memory_desc_t {
#define PTE_INDEX_SIZE_L4_64K_3_10 8
#define PMD_INDEX_SIZE_L4_64K_3_10 10
#define PGD_INDEX_SIZE_L4_64K_3_10 12
-#define PTE_SHIFT_L4_64K_V1 32
-#define PTE_SHIFT_L4_64K_V2 30
-#define PTE_SHIFT_L4_BOOK3E_64K 28
-#define PTE_SHIFT_L4_BOOK3E_4K 24
+#define PTE_RPN_SHIFT_L4_64K_V1 32
+#define PTE_RPN_SHIFT_L4_64K_V2 30
+#define PTE_RPN_SHIFT_L4_BOOK3E_64K 28
+#define PTE_RPN_SHIFT_L4_BOOK3E_4K 24
#define PMD_MASKED_BITS_64K 0x1ff
#define PD_HUGE 0x8000000000000000
@@ -3843,6 +3844,7 @@ struct efi_memory_desc_t {
#define PMD_OFFSET_L4(vaddr) \
((vaddr >> (machdep->machspec->l2_shift)) &
(machdep->machspec->ptrs_per_l2 - 1))
+#define _PAGE_PTE (machdep->machspec->_page_pte) /* distinguishes
PTEs from pointers */
#define _PAGE_PRESENT (machdep->machspec->_page_present) /* software: pte
contains a translation */
#define _PAGE_USER (machdep->machspec->_page_user) /* matches one of
the PP bits */
#define _PAGE_RW (machdep->machspec->_page_rw) /* software: user
write access allowed */
@@ -5687,13 +5689,14 @@ struct machine_specific {
uint l2_shift;
uint l1_shift;
- uint pte_shift;
+ uint pte_rpn_shift;
uint l2_masked_bits;
int vmemmap_cnt;
int vmemmap_psize;
ulong vmemmap_base;
struct ppc64_vmemmap *vmemmap_list;
+ ulong _page_pte;
ulong _page_present;
ulong _page_user;
ulong _page_rw;
diff --git a/ppc64.c b/ppc64.c
index 511c189..6b89eaa 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -72,33 +72,63 @@ static inline uint get_ptetype(ulong pte)
return pte_type;
}
-static int is_hugepage(ulong pte)
+static inline int is_hugepage(ulong pte)
{
- /*
- * leaf pte for huge page, bottom two bits != 00
- */
- return ((pte & HUGE_PTE_MASK) != 0x0);
+ if ((machdep->flags & BOOK3E) ||
+ (THIS_KERNEL_VERSION < LINUX(3,10,0))) {
+ /*
+ * hugepage support via hugepd for book3e and
+ * also kernel v3.9 & below.
+ */
+ return 0;
+
+ } else if (THIS_KERNEL_VERSION >= LINUX(4,5,0)) {
+ /*
+ * leaf pte for huge page, if _PAGE_PTE is set.
+ */
+ return !!(pte & _PAGE_PTE);
+
+ } else { /* BOOK3S, kernel v3.10 - v4.4 */
+
+ /*
+ * leaf pte for huge page, bottom two bits != 00
+ */
+ return ((pte & HUGE_PTE_MASK) != 0x0);
+ }
}
static inline int is_hugepd(ulong pte)
{
- if (THIS_KERNEL_VERSION >= LINUX(3,10,0)) {
+ if ((machdep->flags & BOOK3E) ||
+ (THIS_KERNEL_VERSION < LINUX(3,10,0)))
+ return ((pte & PD_HUGE) == 0x0);
+
+ else if (THIS_KERNEL_VERSION >= LINUX(4,5,0)) {
+ /*
+ * hugepd pointer, if _PAGE_PTE is not set and
+ * hugepd shift mask is set.
+ */
+ return (!(pte & _PAGE_PTE) &&
+ ((pte & HUGEPD_SHIFT_MASK) != 0));
+
+ } else { /* BOOK3S, kernel v3.10 - v4.4 */
+
/*
* hugepd pointer, bottom two bits == 00 and next 4 bits
* indicate size of table
- */
+ */
return (((pte & HUGE_PTE_MASK) == 0x0) &&
((pte & HUGEPD_SHIFT_MASK) != 0));
- } else
- return ((pte & PD_HUGE) == 0x0);
+ }
}
static inline ulong hugepage_dir(ulong pte)
{
- if (THIS_KERNEL_VERSION >= LINUX(3,10,0))
- return (ulong)(pte & ~HUGEPD_SHIFT_MASK);
- else
+ if ((machdep->flags & BOOK3E) ||
+ (THIS_KERNEL_VERSION < LINUX(3,10,0)))
return (ulong)((pte & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
+ else
+ return (ulong)(pte & ~HUGEPD_SHIFT_MASK);
}
static int book3e_is_kvaddr(ulong addr)
@@ -122,7 +152,8 @@ struct machine_specific ppc64_machine_specific = {
.hwintrstack = { 0 },
.hwstackbuf = 0,
.hwstacksize = 0,
- .pte_shift = PTE_SHIFT,
+ .pte_rpn_shift = PTE_RPN_SHIFT_DEFAULT,
+ ._page_pte = 0x0UL,
._page_present = 0x1UL,
._page_user = 0x2UL,
._page_rw = 0x4UL,
@@ -140,7 +171,8 @@ struct machine_specific book3e_machine_specific = {
.hwintrstack = { 0 },
.hwstackbuf = 0,
.hwstacksize = 0,
- .pte_shift = PTE_SHIFT_L4_BOOK3E_64K,
+ .pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_64K,
+ ._page_pte = 0x0UL,
._page_present = 0x1UL,
._page_user = 0xCUL,
._page_rw = 0x30UL,
@@ -262,6 +294,31 @@ ppc64_init(int when)
break;
case POST_GDB:
+ if (!(machdep->flags & BOOK3E)) {
+ struct machine_specific *m = machdep->machspec;
+
+ /*
+ * Starting with v3.14 we no longer use _PAGE_COHERENT
+ * bit as it is always set on hash64 and on platforms
+ * that cannot always set it, _PAGE_NO_CACHE and
+ * _PAGE_WRITETHRU can be used to infer it.
+ */
+ if (THIS_KERNEL_VERSION >= LINUX(3,14,0))
+ m->_page_coherent = 0x0UL;
+
+ /*
+ * In kernel v4.5, _PAGE_PTE bit is introduced to
+ * distinguish PTEs from pointers.
+ */
+ if (THIS_KERNEL_VERSION >= LINUX(4,5,0)) {
+ m->_page_pte = 0x1UL;
+ m->_page_present = 0x2UL;
+ m->_page_user = 0x4UL;
+ m->_page_rw = 0x8UL;
+ m->_page_guarded = 0x10UL;
+ }
+ }
+
if (!(machdep->flags & (VM_ORIG|VM_4_LEVEL))) {
if (THIS_KERNEL_VERSION >= LINUX(2,6,14)) {
machdep->flags |= VM_4_LEVEL;
@@ -292,8 +349,8 @@ ppc64_init(int when)
m->l4_index_size = PGD_INDEX_SIZE_L4_64K;
}
if (!(machdep->flags & BOOK3E))
- m->pte_shift = symbol_exists("demote_segment_4k") ?
- PTE_SHIFT_L4_64K_V2 : PTE_SHIFT_L4_64K_V1;
+ m->pte_rpn_shift = symbol_exists("demote_segment_4k") ?
+ PTE_RPN_SHIFT_L4_64K_V2 : PTE_RPN_SHIFT_L4_64K_V1;
m->l2_masked_bits = PMD_MASKED_BITS_64K;
} else {
/* 4K pagesize */
@@ -304,8 +361,11 @@ ppc64_init(int when)
else
m->l3_index_size = PUD_INDEX_SIZE_L4_4K;
m->l4_index_size = PGD_INDEX_SIZE_L4_4K;
- m->pte_shift = (machdep->flags & BOOK3E) ?
- PTE_SHIFT_L4_BOOK3E_4K : PTE_SHIFT_L4_4K;
+ if (machdep->flags & BOOK3E)
+ m->pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_4K;
+ else
+ m->pte_rpn_shift = THIS_KERNEL_VERSION >= LINUX(4,5,0) ?
+ PTE_RPN_SHIFT_L4_4K_4_5 : PTE_RPN_SHIFT_L4_4K;
m->l2_masked_bits = PMD_MASKED_BITS_4K;
}
@@ -575,7 +635,7 @@ ppc64_dump_machdep_table(ulong arg)
fprintf(fp, " l3_shift: %d\n",
machdep->machspec->l3_shift);
fprintf(fp, " l2_shift: %d\n",
machdep->machspec->l2_shift);
fprintf(fp, " l1_shift: %d\n",
machdep->machspec->l1_shift);
- fprintf(fp, " pte_shift: %d\n",
machdep->machspec->pte_shift);
+ fprintf(fp, " pte_rpn_shift: %d\n",
machdep->machspec->pte_rpn_shift);
fprintf(fp, " l2_masked_bits: %x\n",
machdep->machspec->l2_masked_bits);
fprintf(fp, " vmemmap_base: ");
if (machdep->machspec->vmemmap_base)
@@ -662,7 +722,7 @@ ppc64_vtop(ulong vaddr, ulong *pgd, physaddr_t *paddr, int verbose)
if (!(pte & _PAGE_PRESENT)) {
if (pte && verbose) {
fprintf(fp, "\n");
- ppc64_translate_pte(pte, 0, PTE_SHIFT);
+ ppc64_translate_pte(pte, 0, PTE_RPN_SHIFT_DEFAULT);
}
return FALSE;
}
@@ -670,11 +730,11 @@ ppc64_vtop(ulong vaddr, ulong *pgd, physaddr_t *paddr, int verbose)
if (!pte)
return FALSE;
- *paddr = PAGEBASE(PTOB(pte >> PTE_SHIFT)) + PAGEOFFSET(vaddr);
+ *paddr = PAGEBASE(PTOB(pte >> PTE_RPN_SHIFT_DEFAULT)) + PAGEOFFSET(vaddr);
if (verbose) {
fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
- ppc64_translate_pte(pte, 0, PTE_SHIFT);
+ ppc64_translate_pte(pte, 0, PTE_RPN_SHIFT_DEFAULT);
}
return TRUE;
@@ -693,6 +753,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int
verbose)
ulong *page_table;
ulong pgd_pte, pud_pte, pmd_pte;
ulong pte;
+ uint pdshift;
uint hugepage_type = 0; /* 0: regular entry; 1: huge pte; 2: huge pd */
if (verbose)
@@ -709,6 +770,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int
verbose)
hugepage_type = get_ptetype(pgd_pte);
if (hugepage_type) {
pte = pgd_pte;
+ pdshift = machdep->machspec->l4_shift;
goto out;
}
@@ -726,6 +788,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int
verbose)
hugepage_type = get_ptetype(pud_pte);
if (hugepage_type) {
pte = pud_pte;
+ pdshift = machdep->machspec->l3_shift;
goto out;
}
} else {
@@ -745,6 +808,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int
verbose)
hugepage_type = get_ptetype(pmd_pte);
if (hugepage_type) {
pte = pmd_pte;
+ pdshift = machdep->machspec->l2_shift;
goto out;
}
@@ -763,7 +827,7 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int
verbose)
if (!(pte & _PAGE_PRESENT)) {
if (pte && verbose) {
fprintf(fp, "\n");
- ppc64_translate_pte(pte, 0, machdep->machspec->pte_shift);
+ ppc64_translate_pte(pte, 0, machdep->machspec->pte_rpn_shift);
}
return FALSE;
}
@@ -781,13 +845,22 @@ out:
* in this directory for all the huge pages
* in this huge page directory.
*/
- readmem(hugepage_dir(pte), KVADDR, &pte, sizeof(pte),
- "hugepd_entry", RETURN_ON_ERROR);
+ ulong hugepd = hugepage_dir(pte);
+
+ readmem(hugepd, KVADDR, &pte, sizeof(pte),
+ "hugepd_entry", RETURN_ON_ERROR);
+
+ if (verbose)
+ fprintf(fp, " HUGE PD: %lx => %lx\n", hugepd, pte);
+
+ if (!pte)
+ return FALSE;
}
- /* TODO: get page offset for huge pages based on page size */
- *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_shift));
+
+ *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_rpn_shift))
+ + (vaddr & ((1UL << pdshift) - 1));
} else {
- *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_shift))
+ *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_rpn_shift))
+ PAGEOFFSET(vaddr);
}
@@ -796,7 +869,7 @@ out:
fprintf(fp, " HUGE PAGE: %lx\n\n", PAGEBASE(*paddr));
else
fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
- ppc64_translate_pte(pte, 0, machdep->machspec->pte_shift);
+ ppc64_translate_pte(pte, 0, machdep->machspec->pte_rpn_shift);
}
return TRUE;
@@ -1251,7 +1324,7 @@ ppc64_get_task_pgd(ulong task)
* If a physaddr pointer is passed in, don't print anything.
*/
static int
-ppc64_translate_pte(ulong pte, void *physaddr, ulonglong pte_shift)
+ppc64_translate_pte(ulong pte, void *physaddr, ulonglong pte_rpn_shift)
{
int c, len1, len2, len3, others, page_present;
char buf[BUFSIZE];
@@ -1262,7 +1335,7 @@ ppc64_translate_pte(ulong pte, void *physaddr, ulonglong pte_shift)
char *arglist[MAXARGS];
ulong paddr;
- paddr = PTOB(pte >> pte_shift);
+ paddr = PTOB(pte >> pte_rpn_shift);
page_present = (pte & _PAGE_PRESENT);
if (physaddr) {
@@ -1309,6 +1382,8 @@ ppc64_translate_pte(ulong pte, void *physaddr, ulonglong pte_shift)
others = 0;
if (pte) {
+ if (pte & _PAGE_PTE)
+ fprintf(fp, "%sPTE", others++ ? "|" :
"");
if (pte & _PAGE_PRESENT)
fprintf(fp, "%sPRESENT", others++ ? "|" :
"");
if (pte & _PAGE_USER)