Michael Holzheu wrote:
Hi Dave,
For s390(x) kernels the page table allocation method will be changed.
Instead of 3 levels, it will be now possible to allocate 4 levels.
The current implementation of the page table walk functions in crash
makes assumptions on how the page tables are allocated by the kernel.
E.g. three levels are hard coded.
This patch changes that and the page table walk is done only according
to the s390 architecture without assumptions on the implementation in
the kernel.
So both old and new kernels are supported.
Hi Michael,
I have complete faith in you... ;-)
And this certainly simplifies things considerably, which
is always good.
But -- can you give me a warm-and-fuzzy feeling by confirming
that you tested this on a RHEL kernel? And that "make warn"
compiles cleanly?
Thanks,
Dave
---
s390.c | 144 +++++++++++++++++++++++-------------------------
s390x.c | 191 ++++++++++++++++++++++++++--------------------------------------
2 files changed, 151 insertions(+), 184 deletions(-)
diff -Naur crash-4.0-4.8/s390.c crash-4.0-4.8-page-table-walk/s390.c
--- crash-4.0-4.8/s390.c 2007-10-30 16:51:54.000000000 +0100
+++ crash-4.0-4.8-page-table-walk/s390.c 2007-11-15 15:44:07.000000000 +0100
@@ -21,17 +21,6 @@
#define S390_WORD_SIZE 4
#define S390_ADDR_MASK 0x7fffffff
-#define S390_PAGE_SHIFT 12
-#define S390_PAGE_SIZE (1UL << S390_PAGE_SHIFT)
-#define S390_PAGE_MASK (~(S390_PAGE_SIZE-1))
-
-#define S390_PGDIR_SHIFT 20
-#define S390_PGDIR_SIZE (1UL << S390_PGDIR_SHIFT)
-#define S390_PGDIR_MASK (~(S390_PGDIR_SIZE-1))
-
-#define S390_PTRS_PER_PGD 2048
-#define S390_PTRS_PER_PTE 256
-
#define S390_PMD_BASE_MASK (~((1UL<<6)-1))
#define S390_PT_BASE_MASK S390_PMD_BASE_MASK
#define S390_PAGE_BASE_MASK (~((1UL<<12)-1))
@@ -44,26 +33,10 @@
#define S390_PAGE_INVALID 0x400 /* HW invalid */
#define S390_PAGE_INVALID_MASK 0x601ULL /* for linux 2.6 */
#define S390_PAGE_INVALID_NONE 0x401ULL /* for linux 2.6 */
-#define S390_PAGE_TABLE_LEN 0xf /* only full page-tables */
-#define S390_PAGE_TABLE_INV 0x20 /* invalid page-table */
#define S390_PTE_INVALID_MASK 0x80000900
#define S390_PTE_INVALID(x) ((x) & S390_PTE_INVALID_MASK)
-#define S390_PMD_INVALID_MASK 0x80000000
-#define S390_PMD_INVALID(x) ((x) & S390_PMD_INVALID_MASK)
-
-/* pgd/pmd/pte query macros */
-#define s390_pmd_none(x) ((x) & S390_PAGE_TABLE_INV)
-#define s390_pmd_bad(x) (((x) & (~S390_PMD_BASE_MASK & \
- ~S390_PAGE_TABLE_INV)) != \
- S390_PAGE_TABLE_LEN)
-
-#define s390_pte_none(x) (((x) & (S390_PAGE_INVALID | S390_RO_S390 | \
- S390_PAGE_PRESENT)) == \
- S390_PAGE_INVALID)
-
-
#define ASYNC_STACK_SIZE STACKSIZE() // can be 4096 or 8192
#define KERNEL_STACK_SIZE STACKSIZE() // can be 4096 or 8192
@@ -73,8 +46,6 @@
* declarations of static functions
*/
static void s390_print_lowcore(char*, struct bt_info*,int);
-static unsigned long s390_pgd_offset(unsigned long, unsigned long);
-static unsigned long s390_pte_offset(unsigned long, unsigned long);
static int s390_kvtop(struct task_context *, ulong, physaddr_t *, int);
static int s390_uvtop(struct task_context *, ulong, physaddr_t *, int);
static int s390_vtop(unsigned long, ulong, physaddr_t*, int);
@@ -292,60 +263,87 @@
/*
* page table traversal functions
*/
-static unsigned long
-s390_pgd_offset(unsigned long pgd_base, unsigned long vaddr)
-{
- unsigned long pgd_off, pmd_base;
- pgd_off = ((vaddr >> S390_PGDIR_SHIFT) & (S390_PTRS_PER_PGD - 1))
- * S390_WORD_SIZE;
- readmem(pgd_base + pgd_off, PHYSADDR, &pmd_base,sizeof(long),
- "pgd_base",FAULT_ON_ERROR);
- return pmd_base;
-}
-
-unsigned long s390_pte_offset(unsigned long pte_base, unsigned long vaddr)
+/* Segment table traversal function */
+static ulong _kl_sg_table_deref_s390(ulong vaddr, ulong table, int len)
{
- unsigned pte_off, pte_val;
+ ulong offset, entry;
+
+ offset = ((vaddr >> 20) & 0x7ffUL) * 4;
+ if (offset >= (len + 1)*64)
+ /* Offset is over the table limit. */
+ return 0;
+ readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
+ FAULT_ON_ERROR);
- pte_off = ((vaddr >> S390_PAGE_SHIFT) & (S390_PTRS_PER_PTE - 1))
- * S390_WORD_SIZE;
- readmem(pte_base + pte_off, PHYSADDR, &pte_val, sizeof(long),
- "pte_val",FAULT_ON_ERROR);
- return pte_val;
+ /*
+ * Check if the segment table entry could be read and doesn't have
+ * any of the reserved bits set.
+ */
+ if (entry & 0x80000000UL)
+ return 0;
+ /* Check if the segment table entry has the invalid bit set. */
+ if (entry & 0x40UL)
+ return 0;
+ /* Segment table entry is valid and well formed. */
+ return entry;
+}
+
+/* Page table traversal function */
+static ulong _kl_pg_table_deref_s390(ulong vaddr, ulong table, int len)
+{
+ ulong offset, entry;
+
+ offset = ((vaddr >> 12) & 0xffUL) * 4;
+ if (offset >= (len + 1)*64)
+ /* Offset is over the table limit. */
+ return 0;
+ readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
+ FAULT_ON_ERROR);
+ /*
+ * Check if the page table entry could be read and doesn't have
+ * any of the reserved bits set.
+ */
+ if (entry & 0x80000900UL)
+ return 0;
+ /* Check if the page table entry has the invalid bit set. */
+ if (entry & 0x400UL)
+ return 0;
+ /* Page table entry is valid and well formed. */
+ return entry;
}
-/*
- * Generic vtop function for user and kernel addresses
- */
+/* lookup virtual address in page tables */
static int
-s390_vtop(unsigned long pgd_base, ulong kvaddr, physaddr_t *paddr, int verbose)
+s390_vtop(unsigned long table, ulong vaddr, physaddr_t *phys_addr, int verbose)
{
- unsigned pte_base, pte_val;
+ ulong entry, paddr;
+ int len;
- /* get the pgd entry */
- pte_base = s390_pgd_offset(pgd_base,kvaddr);
- if(S390_PMD_INVALID(pte_base) ||
- s390_pmd_bad(pte_base) ||
- s390_pmd_none(pte_base)) {
- *paddr = 0;
- return FALSE;
- }
- /* get the pte */
- pte_base = pte_base & S390_PT_BASE_MASK;
- pte_val = s390_pte_offset(pte_base,kvaddr);
- if(S390_PTE_INVALID(pte_val) ||
- s390_pte_none(pte_val)){
- *paddr = 0;
+ /*
+ * Get the segment table entry.
+ * We assume that the segment table length field in the asce
+ * is set to the maximum value of 127 (which translates to
+ * a segment table with 2048 entries) and that the addressing
+ * mode is 31 bit.
+ */
+ entry = _kl_sg_table_deref_s390(vaddr, table, 127);
+ if (!entry)
return FALSE;
- }
- if(!s390_pte_present(pte_val)){
- /* swapped out */
- *paddr = pte_val;
+ table = entry & 0x7ffffc00UL;
+ len = entry & 0xfUL;
+
+ /* Get the page table entry */
+ entry = _kl_pg_table_deref_s390(vaddr, table, len);
+ if (!entry)
return FALSE;
- }
- *paddr = (pte_val & S390_PAGE_BASE_MASK) |
- (kvaddr & (~(S390_PAGE_MASK)));
+
+ /* Isolate the page origin from the page table entry. */
+ paddr = entry & 0x7ffff000UL;
+
+ /* Add the page offset and return the final value. */
+ *phys_addr = paddr + (vaddr & 0xfffUL);
+
return TRUE;
}
diff -Naur crash-4.0-4.8/s390x.c crash-4.0-4.8-page-table-walk/s390x.c
--- crash-4.0-4.8/s390x.c 2007-10-30 16:51:54.000000000 +0100
+++ crash-4.0-4.8-page-table-walk/s390x.c 2007-11-15 15:44:33.000000000 +0100
@@ -20,24 +20,6 @@
#define S390X_WORD_SIZE 8
-#define S390X_PAGE_SHIFT 12
-#define S390X_PAGE_SIZE (1ULL << S390X_PAGE_SHIFT)
-#define S390X_PAGE_MASK (~(S390X_PAGE_SIZE-1))
-
-#define S390X_PGDIR_SHIFT 31
-#define S390X_PGDIR_SIZE (1ULL << S390X_PGDIR_SHIFT)
-#define S390X_PGDIR_MASK (~(S390X_PGDIR_SIZE-1))
-
-#define S390X_PMD_SHIFT 20
-#define S390X_PMD_SIZE (1ULL << S390X_PMD_SHIFT)
-#define S390X_PMD_MASK (~(S390X_PMD_SIZE-1))
-
-#define S390X_PTRS_PER_PGD 2048
-#define S390X_PTRS_PER_PMD 2048
-#define S390X_PTRS_PER_PTE 256
-
-#define S390X_PMD_BASE_MASK (~((1ULL<<12)-1))
-#define S390X_PT_BASE_MASK (~((1ULL<<11)-1))
#define S390X_PAGE_BASE_MASK (~((1ULL<<12)-1))
/* Flags used in entries of page dirs and page tables.
@@ -48,37 +30,11 @@
#define S390X_PAGE_INVALID 0x400ULL /* HW invalid */
#define S390X_PAGE_INVALID_MASK 0x601ULL /* for linux 2.6 */
#define S390X_PAGE_INVALID_NONE 0x401ULL /* for linux 2.6 */
-#define S390X_PMD_ENTRY_INV 0x20ULL /* invalid segment table entry */
-#define S390X_PGD_ENTRY_INV 0x20ULL /* invalid region table entry */
-#define S390X_PMD_ENTRY 0x00
-#define S390X_PGD_ENTRY_FIRST 0x05 /* first part of pmd is valid */
-#define S390X_PGD_ENTRY_SECOND 0xc7 /* second part of pmd is valid */
-#define S390X_PGD_ENTRY_FULL 0x07 /* complete pmd is valid */
/* bits 52, 55 must contain zeroes in a pte */
#define S390X_PTE_INVALID_MASK 0x900ULL
#define S390X_PTE_INVALID(x) ((x) & S390X_PTE_INVALID_MASK)
-/* pgd/pmd/pte query macros */
-#define s390x_pgd_none(x) ((x) & S390X_PGD_ENTRY_INV)
-#define s390x_pgd_bad(x) !( (((x) & S390X_PGD_ENTRY_FIRST) == \
- S390X_PGD_ENTRY_FIRST) || \
- (((x) & S390X_PGD_ENTRY_SECOND) == \
- S390X_PGD_ENTRY_SECOND) || \
- (((x) & S390X_PGD_ENTRY_FULL) == \
- S390X_PGD_ENTRY_FULL))
-
-#define s390x_pmd_none(x) ((x) & S390X_PMD_ENTRY_INV)
-#define s390x_pmd_bad(x) (((x) & (~S390X_PT_BASE_MASK & \
- ~S390X_PMD_ENTRY_INV)) != \
- S390X_PMD_ENTRY)
-
-#define s390x_pte_none(x) (((x) & (S390X_PAGE_INVALID | \
- S390X_PAGE_RO | \
- S390X_PAGE_PRESENT)) == \
- S390X_PAGE_INVALID)
-
-
#define ASYNC_STACK_SIZE STACKSIZE() // can be 8192 or 16384
#define KERNEL_STACK_SIZE STACKSIZE() // can be 8192 or 16384
@@ -88,9 +44,6 @@
* declarations of static functions
*/
static void s390x_print_lowcore(char*, struct bt_info*,int);
-static unsigned long s390x_pgd_offset(unsigned long, unsigned long);
-static unsigned long s390x_pmd_offset(unsigned long, unsigned long);
-static unsigned long s390x_pte_offset(unsigned long, unsigned long);
static int s390x_kvtop(struct task_context *, ulong, physaddr_t *, int);
static int s390x_uvtop(struct task_context *, ulong, physaddr_t *, int);
static int s390x_vtop(unsigned long, ulong, physaddr_t*, int);
@@ -304,81 +257,97 @@
}
}
-/*
+/*
* page table traversal functions
*/
-unsigned long s390x_pgd_offset(unsigned long pgd_base, unsigned long vaddr)
-{
- unsigned long pgd_off, pmd_base;
-
- pgd_off = ((vaddr >> S390X_PGDIR_SHIFT) &
- (S390X_PTRS_PER_PGD - 1)) * 8;
- readmem(pgd_base + pgd_off, PHYSADDR, &pmd_base, sizeof(long),
- "pmd_base",FAULT_ON_ERROR);
-
- return pmd_base;
-}
-unsigned long s390x_pmd_offset(unsigned long pmd_base, unsigned long vaddr)
-{
- unsigned long pmd_off, pte_base;
-
- pmd_off = ((vaddr >> S390X_PMD_SHIFT) & (S390X_PTRS_PER_PMD - 1))
- * 8;
- readmem(pmd_base + pmd_off, PHYSADDR, &pte_base, sizeof(long),
- "pte_base",FAULT_ON_ERROR);
- return pte_base;
-}
-
-unsigned long s390x_pte_offset(unsigned long pte_base, unsigned long vaddr)
-{
- unsigned long pte_off, pte_val;
-
- pte_off = ((vaddr >> S390X_PAGE_SHIFT) & (S390X_PTRS_PER_PTE - 1))
- * 8;
- readmem(pte_base + pte_off, PHYSADDR, &pte_val, sizeof(long),
- "pte_val",FAULT_ON_ERROR);
- return pte_val;
+/* Region or segment table traversal function */
+static ulong _kl_rsg_table_deref_s390x(ulong vaddr, ulong table,
+ int len, int level)
+{
+ ulong offset, entry;
+
+ offset = ((vaddr >> (11*level + 20)) & 0x7ffULL) * 8;
+ if (offset >= (len + 1)*4096)
+ /* Offset is over the table limit. */
+ return 0;
+ readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
+ FAULT_ON_ERROR);
+ /*
+ * Check if the segment table entry could be read and doesn't have
+ * any of the reserved bits set.
+ */
+ if ((entry & 0xcULL) != (level << 2))
+ return 0;
+ /* Check if the region table entry has the invalid bit set. */
+ if (entry & 0x40ULL)
+ return 0;
+ /* Region table entry is valid and well formed. */
+ return entry;
}
-/*
- * Generic vtop function for user and kernel addresses
- */
-static int
-s390x_vtop(unsigned long pgd_base, ulong kvaddr, physaddr_t *paddr, int verbose)
+/* Page table traversal function */
+static ulong _kl_pg_table_deref_s390x(ulong vaddr, ulong table)
{
- unsigned long pmd_base, pte_base, pte_val;
+ ulong offset, entry;
- /* get the pgd entry */
- pmd_base = s390x_pgd_offset(pgd_base,kvaddr);
- if(s390x_pgd_bad(pmd_base) ||
- s390x_pgd_none(pmd_base)){
- *paddr = 0;
+ offset = ((vaddr >> 12) & 0xffULL) * 8;
+ readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
+ FAULT_ON_ERROR);
+ /*
+ * Check if the page table entry could be read and doesn't have
+ * any of the reserved bits set.
+ */
+ if (entry & 0x900ULL)
+ return 0;
+ /* Check if the page table entry has the invalid bit set. */
+ if (entry & 0x400ULL)
+ return 0;
+ /* Page table entry is valid and well formed. */
+ return entry;
+}
+
+/* lookup virtual address in page tables */
+int s390x_vtop(ulong table, ulong vaddr, physaddr_t *phys_addr, int verbose)
+{
+ ulong entry, paddr;
+ int level, len;
+
+ /*
+ * Walk the region and segment tables.
+ * We assume that the table length field in the asce is set to the
+ * maximum value of 3 (which translates to a region first, region
+ * second, region third or segment table with 2048 entries) and that
+ * the addressing mode is 64 bit.
+ */
+ len = 3;
+ /* Read the first entry to find the number of page table levels. */
+ readmem(table, KVADDR, &entry, sizeof(entry), "entry", FAULT_ON_ERROR);
+ level = (entry & 0xcULL) >> 2;
+ if ((vaddr >> (31 + 11*level)) != 0ULL) {
+ /* Address too big for the number of page table levels. */
return FALSE;
}
- /* get the pmd */
- pmd_base = pmd_base & S390X_PMD_BASE_MASK;
- pte_base = s390x_pmd_offset(pmd_base,kvaddr);
- if(s390x_pmd_bad(pte_base) ||
- s390x_pmd_none(pte_base)) {
- *paddr = 0;
- return FALSE;
+ while (level >= 0) {
+ entry = _kl_rsg_table_deref_s390x(vaddr, table, len, level);
+ if (!entry)
+ return 0;
+ table = entry & ~0xfffULL;
+ len = entry & 0x3ULL;
+ level--;
}
- /* get the pte */
- pte_base = pte_base & S390X_PT_BASE_MASK;
- pte_val = s390x_pte_offset(pte_base,kvaddr);
- if (S390X_PTE_INVALID(pte_val) ||
- s390x_pte_none(pte_val)){
- *paddr = 0;
- return FALSE;
- }
- if(!s390x_pte_present(pte_val)){
- /* swapped out */
- *paddr = pte_val;
+
+ /* Get the page table entry */
+ entry = _kl_pg_table_deref_s390x(vaddr, entry & ~0x7ffULL);
+ if (!entry)
return FALSE;
- }
- *paddr = (pte_val & S390X_PAGE_BASE_MASK) |
- (kvaddr & (~(S390X_PAGE_MASK)));
+
+ /* Isolate the page origin from the page table entry. */
+ paddr = entry & ~0xfffULL;
+
+ /* Add the page offset and return the final value. */
+ *phys_addr = paddr + (vaddr & 0xfffULL);
+
return TRUE;
}