As both radix and hash MMU are supported in a single kernel on
Power ISA 3.0 based server processors, identify the current MMU
type and set page table index values accordingly. Also, start
using vmemmap kernel symbol to get vmemmap base value.
Signed-off-by: Hari Bathini <hbathini(a)linux.vnet.ibm.com>
---
defs.h | 13 ++++++++++
ppc64.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++------------
2 files changed, 78 insertions(+), 16 deletions(-)
diff --git a/defs.h b/defs.h
index 142149e..0b6c72d 100644
--- a/defs.h
+++ b/defs.h
@@ -3814,6 +3814,10 @@ struct efi_memory_desc_t {
#define PUD_INDEX_SIZE_L4_4K 7
#define PGD_INDEX_SIZE_L4_4K 9
#define PUD_INDEX_SIZE_L4_4K_3_7 9
+#define PTE_INDEX_SIZE_RADIX_4K 9
+#define PMD_INDEX_SIZE_RADIX_4K 9
+#define PUD_INDEX_SIZE_RADIX_4K 9
+#define PGD_INDEX_SIZE_RADIX_4K 13
#define PTE_RPN_SHIFT_L4_4K 17
#define PTE_RPN_SHIFT_L4_4K_4_5 18
#define PGD_MASKED_BITS_4K 0
@@ -3830,6 +3834,10 @@ struct efi_memory_desc_t {
#define PGD_INDEX_SIZE_L4_64K_3_10 12
#define PMD_INDEX_SIZE_L4_64K_4_6 5
#define PUD_INDEX_SIZE_L4_64K_4_6 5
+#define PTE_INDEX_SIZE_RADIX_64K 5
+#define PMD_INDEX_SIZE_RADIX_64K 9
+#define PUD_INDEX_SIZE_RADIX_64K 9
+#define PGD_INDEX_SIZE_RADIX_64K 13
#define PTE_RPN_SHIFT_L4_64K_V1 32
#define PTE_RPN_SHIFT_L4_64K_V2 30
#define PTE_RPN_SHIFT_L4_BOOK3E_64K 28
@@ -5753,6 +5761,11 @@ void ppc64_dump_machdep_table(ulong);
#define BOOK3E (0x8)
#define PHYS_ENTRY_L4 (0x10)
#define SWAP_ENTRY_L4 (0x20)
+/*
+ * The flag bit for radix MMU in cpu_spec.mmu_features
+ * in the kernel is also 0x40.
+ */
+#define RADIX_MMU (0x40)
#define REGION_SHIFT (60UL)
#define REGION_ID(addr) (((unsigned long)(addr)) >> REGION_SHIFT)
diff --git a/ppc64.c b/ppc64.c
index 17c9a19..8e93979 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -71,7 +71,8 @@ static inline uint get_ptetype(ulong pte)
if (is_hugepage(pte))
pte_type = 1;
- else if (is_hugepd(pte))
+ else if (!(machdep->flags & RADIX_MMU) &&
+ (PAGESIZE() != PPC64_64K_PAGE_SIZE) && is_hugepd(pte))
pte_type = 2;
return pte_type;
@@ -298,6 +299,7 @@ ppc64_init(int when)
machdep->kvbase = BOOK3E_VMBASE;
} else
machdep->kvbase = symbol_value("_stext");
+
if (symbol_exists("__hash_page_64K"))
machdep->pagesize = PPC64_64K_PAGE_SIZE;
else
@@ -336,11 +338,18 @@ ppc64_init(int when)
machdep->value_to_symbol = generic_machdep_value_to_symbol;
machdep->get_kvaddr_ranges = ppc64_get_kvaddr_ranges;
machdep->init_kernel_pgd = NULL;
+
if (symbol_exists("vmemmap_populate")) {
+ if (symbol_exists("vmemmap")) {
+ get_symbol_data("vmemmap", sizeof(void *),
+ &machdep->machspec->vmemmap_base);
+ } else
+ machdep->machspec->vmemmap_base =
+ VMEMMAP_REGION_ID << REGION_SHIFT;
+
machdep->flags |= VMEMMAP;
- machdep->machspec->vmemmap_base =
- VMEMMAP_REGION_ID << REGION_SHIFT;
}
+
machdep->get_irq_affinity = generic_get_irq_affinity;
machdep->show_interrupts = generic_show_interrupts;
break;
@@ -350,6 +359,23 @@ ppc64_init(int when)
struct machine_specific *m = machdep->machspec;
/*
+ * On Power ISA 3.0 based server processors, a kernel can
+ * run with radix MMU or standard MMU. Set the flag,
+ * if it is radix MMU.
+ */
+ if (symbol_exists("cur_cpu_spec") &&
+ MEMBER_EXISTS("cpu_spec", "mmu_features")) {
+ ulong cur_cpu_spec;
+ uint mmu_features, offset;
+
+ get_symbol_data("cur_cpu_spec", sizeof(void *), &cur_cpu_spec);
+ offset = MEMBER_OFFSET("cpu_spec", "mmu_features");
+ readmem(cur_cpu_spec + offset, KVADDR, &mmu_features,
+ sizeof(uint), "cpu mmu features", FAULT_ON_ERROR);
+ machdep->flags |= (mmu_features & RADIX_MMU);
+ }
+
+ /*
* Starting with v3.14 we no longer use _PAGE_COHERENT
* bit as it is always set on hash64 and on platforms
* that cannot always set it, _PAGE_NO_CACHE and
@@ -410,7 +436,13 @@ ppc64_init(int when)
struct machine_specific *m = machdep->machspec;
if (machdep->pagesize == 65536) {
/* 64K pagesize */
- if (!(machdep->flags & BOOK3E) &&
+ if (machdep->flags & RADIX_MMU) {
+ m->l1_index_size = PTE_INDEX_SIZE_RADIX_64K;
+ m->l2_index_size = PMD_INDEX_SIZE_RADIX_64K;
+ m->l3_index_size = PUD_INDEX_SIZE_RADIX_64K;
+ m->l4_index_size = PGD_INDEX_SIZE_RADIX_64K;
+
+ } else if (!(machdep->flags & BOOK3E) &&
(THIS_KERNEL_VERSION >= LINUX(4,6,0))) {
m->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
m->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_6;
@@ -452,19 +484,28 @@ ppc64_init(int when)
}
} else {
/* 4K pagesize */
- m->l1_index_size = PTE_INDEX_SIZE_L4_4K;
- m->l2_index_size = PMD_INDEX_SIZE_L4_4K;
- if (THIS_KERNEL_VERSION >= LINUX(3,7,0))
- m->l3_index_size = PUD_INDEX_SIZE_L4_4K_3_7;
- else
- m->l3_index_size = PUD_INDEX_SIZE_L4_4K;
- m->l4_index_size = PGD_INDEX_SIZE_L4_4K;
+ if (machdep->flags & RADIX_MMU) {
+ m->l1_index_size = PTE_INDEX_SIZE_RADIX_4K;
+ m->l2_index_size = PMD_INDEX_SIZE_RADIX_4K;
+ m->l3_index_size = PUD_INDEX_SIZE_RADIX_4K;
+ m->l4_index_size = PGD_INDEX_SIZE_RADIX_4K;
+
+ } else {
+ m->l1_index_size = PTE_INDEX_SIZE_L4_4K;
+ m->l2_index_size = PMD_INDEX_SIZE_L4_4K;
+ if (THIS_KERNEL_VERSION >= LINUX(3,7,0))
+ m->l3_index_size = PUD_INDEX_SIZE_L4_4K_3_7;
+ else
+ m->l3_index_size = PUD_INDEX_SIZE_L4_4K;
+ m->l4_index_size = PGD_INDEX_SIZE_L4_4K;
+
+ if (machdep->flags & BOOK3E)
+ m->pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_4K;
+ else
+ m->pte_rpn_shift = THIS_KERNEL_VERSION >= LINUX(4,5,0) ?
+ PTE_RPN_SHIFT_L4_4K_4_5 : PTE_RPN_SHIFT_L4_4K;
+ }
- if (machdep->flags & BOOK3E)
- m->pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_4K;
- else
- m->pte_rpn_shift = THIS_KERNEL_VERSION >= LINUX(4,5,0) ?
- PTE_RPN_SHIFT_L4_4K_4_5 : PTE_RPN_SHIFT_L4_4K;
m->pgd_masked_bits = PGD_MASKED_BITS_4K;
m->pud_masked_bits = PUD_MASKED_BITS_4K;
m->pmd_masked_bits = PMD_MASKED_BITS_4K;
@@ -648,6 +689,14 @@ ppc64_dump_machdep_table(ulong arg)
fprintf(fp, "%sVMEMMAP", others++ ? "|" : "");
if (machdep->flags & VMEMMAP_AWARE)
fprintf(fp, "%sVMEMMAP_AWARE", others++ ? "|" : "");
+ if (machdep->flags & BOOK3E)
+ fprintf(fp, "%sBOOK3E", others++ ? "|" : "");
+ if (machdep->flags & PHYS_ENTRY_L4)
+ fprintf(fp, "%sPHYS_ENTRY_L4", others++ ? "|" : "");
+ if (machdep->flags & SWAP_ENTRY_L4)
+ fprintf(fp, "%sSWAP_ENTRY_L4", others++ ? "|" : "");
+ if (machdep->flags & RADIX_MMU)
+ fprintf(fp, "%sRADIX_MMU", others++ ? "|" : "");
fprintf(fp, ")\n");
fprintf(fp, " kvbase: %lx\n", machdep->kvbase);