Fwd: [PATCH] s390x : Display-hardware-flags-for-RTE-STE-PTE
by Dave Anderson
----- Forwarded Message -----
From: "Zaslonko Mikhail" <zaslonko(a)linux.vnet.ibm.com>
To: "Dave Anderson" <anderson(a)redhat.com>
Cc: "Michael Holzheu" <holzheu(a)linux.vnet.ibm.com>
Sent: Tuesday, August 15, 2017 1:41:25 PM
Subject: [PATCH] s390x : Display-hardware-flags-for-RTE-STE-PTE
Hello Dave,
Please find enclosed patch below.
Enhancement to the S390X "vtop" command to display binary values of
hardware flags for region, segment and page table entries along with
page table walk information.
For example:
crash> vtop -u 0x60000000000000
VIRTUAL PHYSICAL
60000000000000 5b50a000
PAGE DIRECTORY: 000000005cea0000
RFTE: 000000005cea0018 => 000000006612400f (flags = 00f)
flags in binary : P=0; TF=00; I=0; TT=11; TL=11
RSTE: 0000000066124000 => 000000005d91800b (flags = 00b)
flags in binary : P=0; TF=00; I=0; TT=10; TL=11
RTTE: 000000005d918000 => 000000006615c007 (flags = 007)
flags in binary : FC=0; P=0; TF=00; I=0; CR=0; TT=01; TL=11
STE: 000000006615c000 => 000000005ce48800 (flags = 800)
flags in binary : FC=0; P=0; I=0; CS=0; TT=00
PTE: 000000005ce48800 => 000000005b50a03f (flags = 03f)
flags in binary : I=0; P=0
PAGE: 000000005b50a000
or, for large pages:
crash> vtop -k 0x3d100000000
VIRTUAL PHYSICAL
3d100000000 77c00000
PAGE DIRECTORY: 0000000001210000
RTTE: 0000000001213d10 => 0000000077dc4007 (flags = 007)
flags in binary : FC=0; P=0; TF=00; I=0; CR=0; TT=01; TL=11
STE: 0000000077dc4000 => 0000000077c03403 (flags = 03403)
flags in binary : AV=0, ACC=0011; F=0; FC=1; P=0; I=0; CS=0; TT=00
Reviewed-by: Michael Holzheu <holzheu(a)linux.vnet.ibm.com>
Signed-off-by: Mikhail Zaslonko <zaslonko(a)linux.vnet.ibm.com>
---
s390x.c | 183
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 175 insertions(+), 8 deletions(-)
diff --git a/s390x.c b/s390x.c
index 9246f75..fa202bc 100644
--- a/s390x.c
+++ b/s390x.c
@@ -26,7 +26,8 @@
/* Flags used in entries of page dirs and page tables.
*/
-#define S390X_PAGE_PRESENT 0x001ULL /* set: loaded in physical memory
+#define S390X_PTE_FLAG_BITS 0xfffULL /* Page table entry flag bits */
+#define S390X_PAGE_PRESENT 0x001ULL /* set: loaded in physical memory
* clear: not loaded in
physical mem */
#define S390X_PAGE_RO 0x200ULL /* HW read-only */
#define S390X_PAGE_INVALID 0x400ULL /* HW invalid */
@@ -46,6 +47,49 @@
#define S390X_PSW_MASK_PSTATE 0x0001000000000000UL
/*
+ * Flags for Region and Segment table entries.
+ */
+#define S390X_RTE_FLAG_BITS_FC0 0xfffULL
+#define S390X_RTE_FLAG_BITS_FC1 0x7fffffffULL
+#define S390X_RTE_TL 0x3ULL
+#define S390X_RTE_TL_10 0x2ULL
+#define S390X_RTE_TL_01 0x1ULL
+#define S390X_RTE_TT 0xcULL
+#define S390X_RTE_TT_10 0x8ULL
+#define S390X_RTE_TT_01 0x4ULL
+#define S390X_RTE_CR 0x10ULL
+#define S390X_RTE_I 0x20ULL
+#define S390X_RTE_TF 0xc0ULL
+#define S390X_RTE_TF_10 0x80ULL
+#define S390X_RTE_TF_01 0x40ULL
+#define S390X_RTE_P 0x200ULL
+#define S390X_RTE_FC 0x400ULL
+#define S390X_RTE_F 0x800ULL
+#define S390X_RTE_ACC 0xf000ULL
+#define S390X_RTE_ACC_1000 0x8000ULL
+#define S390X_RTE_ACC_0100 0x4000ULL
+#define S390X_RTE_ACC_0010 0x2000ULL
+#define S390X_RTE_ACC_0001 0x1000ULL
+#define S390X_RTE_AV 0x10000ULL
+
+#define S390X_STE_FLAG_BITS_FC0 0x7ffULL
+#define S390X_STE_FLAG_BITS_FC1 0xfffffULL
+#define S390X_STE_TT 0xcULL
+#define S390X_STE_TT_10 0x8ULL
+#define S390X_STE_TT_01 0x4ULL
+#define S390X_STE_CS 0x10ULL
+#define S390X_STE_I 0x20ULL
+#define S390X_STE_P 0x200ULL
+#define S390X_STE_FC 0x400ULL
+#define S390X_STE_F 0x800ULL
+#define S390X_STE_ACC 0xf000ULL
+#define S390X_STE_ACC_1000 0x8000ULL
+#define S390X_STE_ACC_0100 0x4000ULL
+#define S390X_STE_ACC_0010 0x2000ULL
+#define S390X_STE_ACC_0001 0x1000ULL
+#define S390X_STE_AV 0x10000ULL
+
+/*
* S390x prstatus ELF Note
*/
struct s390x_nt_prstatus {
@@ -611,12 +655,115 @@ static inline int s390x_pte_present(unsigned long x){
* page table traversal functions
*/
+/* Print flags of Segment-Table entry with format control = 1 */
+static void print_segment_entry_fc1(ulong val)
+{
+ fprintf(fp, "AV=%u; ACC=%u%u%u%u; F=%u; FC=%u; P=%u; I=%u; CS=%u;
TT=%u%u\n",
+ !!(val & S390X_STE_AV),
+ !!(val & S390X_STE_ACC_1000),
+ !!(val & S390X_STE_ACC_0100),
+ !!(val & S390X_STE_ACC_0010),
+ !!(val & S390X_STE_ACC_0001),
+ !!(val & S390X_STE_F),
+ !!(val & S390X_STE_FC),
+ !!(val & S390X_STE_P),
+ !!(val & S390X_STE_I),
+ !!(val & S390X_STE_CS),
+ !!(val & S390X_STE_TT_10),
+ !!(val & S390X_STE_TT_01));
+}
+
+/* Print flags of Segment-Table entry with format control = 0 */
+static void print_segment_entry_fc0(ulong val)
+{
+ fprintf(fp, "FC=%u; P=%u; I=%u; CS=%u; TT=%u%u\n",
+ !!(val & S390X_STE_FC),
+ !!(val & S390X_STE_P),
+ !!(val & S390X_STE_I),
+ !!(val & S390X_STE_CS),
+ !!(val & S390X_STE_TT_10),
+ !!(val & S390X_STE_TT_01));
+}
+
+/* Print flags of Region-Third-Table entry with format control = 1 */
+static void print_region_third_entry_fc1(ulong val)
+{
+ fprintf(fp, "AV=%u; ACC=%u%u%u%u; F=%u; FC=%u; P=%u; I=%u; CR=%u;
TT=%u%u\n",
+ !!(val & S390X_RTE_AV),
+ !!(val & S390X_RTE_ACC_1000),
+ !!(val & S390X_RTE_ACC_0100),
+ !!(val & S390X_RTE_ACC_0010),
+ !!(val & S390X_RTE_ACC_0001),
+ !!(val & S390X_RTE_F),
+ !!(val & S390X_RTE_FC),
+ !!(val & S390X_RTE_P),
+ !!(val & S390X_RTE_I),
+ !!(val & S390X_RTE_CR),
+ !!(val & S390X_RTE_TT_10),
+ !!(val & S390X_RTE_TT_01));
+}
+
+/* Print flags of Region-Third-Table entry with format control = 0 */
+static void print_region_third_entry_fc0(ulong val)
+{
+ fprintf(fp, "FC=%u; P=%u; TF=%u%u; I=%u; CR=%u; TT=%u%u; TL=%u%u\n",
+ !!(val & S390X_RTE_FC),
+ !!(val & S390X_RTE_P),
+ !!(val & S390X_RTE_TF_10),
+ !!(val & S390X_RTE_TF_01),
+ !!(val & S390X_RTE_I),
+ !!(val & S390X_RTE_CR),
+ !!(val & S390X_RTE_TT_10),
+ !!(val & S390X_RTE_TT_01),
+ !!(val & S390X_RTE_TL_10),
+ !!(val & S390X_RTE_TL_01));
+}
+
+/* Print flags of Region-First/Second-Table entry */
+static void print_region_first_second_entry(ulong val)
+{
+ fprintf(fp, "P=%u; TF=%u%u; I=%u; TT=%u%u; TL=%u%u\n",
+ !!(val & S390X_RTE_P),
+ !!(val & S390X_RTE_TF_10),
+ !!(val & S390X_RTE_TF_01),
+ !!(val & S390X_RTE_I),
+ !!(val & S390X_RTE_TT_10),
+ !!(val & S390X_RTE_TT_01),
+ !!(val & S390X_RTE_TL_10),
+ !!(val & S390X_RTE_TL_01));
+}
+
+/* Print the binary flags for Region or Segment table entry */
+static void s390x_print_te_binary_flags(ulong val, int level)
+{
+ fprintf(fp, " flags in binary : ");
+ switch (level) {
+ case 0:
+ if (val & S390X_STE_FC)
+ print_segment_entry_fc1(val);
+ else
+ print_segment_entry_fc0(val);
+ break;
+ case 1:
+ if (val & S390X_RTE_FC)
+ print_region_third_entry_fc1(val);
+ else
+ print_region_third_entry_fc0(val);
+ break;
+ case 2:
+ case 3:
+ print_region_first_second_entry(val);
+ break;
+ }
+}
+
/* Region or segment table traversal function */
static ulong _kl_rsg_table_deref_s390x(ulong vaddr, ulong table,
int len, int level, int verbose)
{
const char *name_vec[] = {"STE", "RTTE", "RSTE", "RFTE"};
- ulong offset, entry, addr;
+ ulong offset, entry, flags, addr;
+ int flags_prt_len;
offset = ((vaddr >> (11*level + 20)) & 0x7ffULL) * 8;
if (offset >= (len + 1)*4096)
@@ -624,16 +771,33 @@ static ulong _kl_rsg_table_deref_s390x(ulong
vaddr, ulong table,
return 0;
addr = table + offset;
readmem(addr, KVADDR, &entry, sizeof(entry), "entry", FAULT_ON_ERROR);
- if (verbose)
- fprintf(fp, "%5s: %016lx => %016lx\n", name_vec[level], addr,
entry);
+ if (verbose) {
+ flags_prt_len = 3;
+ if (entry & S390X_RTE_FC)
+ if (level) {
+ flags = entry & S390X_RTE_FLAG_BITS_FC1;
+ flags_prt_len = 8;
+ } else {
+ flags = entry & S390X_STE_FLAG_BITS_FC1;
+ flags_prt_len = 5;
+ }
+ else
+ if (level)
+ flags = entry & S390X_RTE_FLAG_BITS_FC0;
+ else
+ flags = entry & S390X_STE_FLAG_BITS_FC0;
+ fprintf(fp, "%5s: %016lx => %016lx (flags = %0*lx)\n",
+ name_vec[level], addr, entry, flags_prt_len, flags);
+ s390x_print_te_binary_flags(entry, level);
+ }
/*
* Check if the segment table entry could be read and doesn't have
* any of the reserved bits set.
*/
- if ((entry & 0xcULL) != (level << 2))
+ if ((entry & S390X_RTE_TT) != (level << 2))
return 0;
/* Check if the region table entry has the invalid bit set. */
- if (entry & 0x20ULL)
+ if (entry & S390X_RTE_I)
return 0;
/* Region table entry is valid and well formed. */
return entry;
@@ -664,8 +828,11 @@ static ulong _kl_pg_table_deref_s390x(ulong vaddr,
ulong table, int verbose)
addr = table + offset;
readmem(addr, KVADDR, &entry, sizeof(entry), "entry", FAULT_ON_ERROR);
if (verbose) {
- fprintf(fp, "%5s: %016lx => %016lx\n", "PTE", addr, entry);
- fprintf(fp, "%5s: %016llx\n", "PAGE", entry & ~0xfffULL);
+ fprintf(fp, "%5s: %016lx => %016lx (flags = %03llx)\n",
+ "PTE", addr, entry, entry & S390X_PTE_FLAG_BITS);
+ fprintf(fp, " flags in binary : I=%u; P=%u\n",
+ !!(entry & S390X_PAGE_INVALID), !!(entry & S390X_PAGE_RO));
+ fprintf(fp, "%5s: %016llx\n", "PAGE", entry &
~S390X_PTE_FLAG_BITS);
}
/*
* Return zero if the page table entry has the reserved (0x800) or
--
2.11.2
7 years, 4 months
[PATCH] s390x: Fix s390dbf time stamps for kernels 4.11 and 4.14
by Michael Holzheu
Hi Dave,
With kernel commit ea417aa8a38bc7db ("s390/debug: make debug event time
stamps relative to the boot TOD clock") for s390dbf time is stored
relative to the kernel boot time.
In order to still show absolute time since 1970 we have to detect
those kernels and re-add the boot time before printing the records.
We can use the tod_to_timeval() symbol to check for those kernels
because the patch has removed the symbol.
With commit 6e2ef5e4f6cc5734 ("s390/time: add support for the TOD clock
epoch extension") the symbol for storing the boot time has changed from
"sched_clock_base_cc" to "tod_clock_base".
This commit is currently on the s390 features branch and will be integrated
in linux 4.14:
https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=...
Cc: Martin Schwidefsky <schwidefsky(a)de.ibm.com>
Signed-off-by: Michael Holzheu <holzheu(a)linux.vnet.ibm.com>
Reviewed-by: Mikhail Zaslonko <zaslonko(a)linux.vnet.ibm.com>
---
s390dbf.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 46 insertions(+), 5 deletions(-)
diff --git a/s390dbf.c b/s390dbf.c
index d3581c2..f21cd3a 100644
--- a/s390dbf.c
+++ b/s390dbf.c
@@ -57,6 +57,9 @@
#define KL_PTRSZ 4
#endif
+/* Start TOD time of kernel in usecs for relative time stamps */
+static uint64_t tod_clock_base_us;
+
typedef unsigned long uaddr_t;
typedef unsigned long kaddr_t;
@@ -162,13 +165,23 @@ static inline int set_cmd_flags(command_t *cmd, int flags, char *extraops)
return 0;
}
+/* Time of day clock value for 1970/01/01 */
+#define TOD_UNIX_EPOCH (0x8126d60e46000000LL - (0x3c26700LL * 1000000 * 4096))
+/* Time of day clock value for 1970/01/01 in usecs */
+#define TOD_UNIX_EPOCH_US (TOD_UNIX_EPOCH >> 12)
+
static inline void kl_s390tod_to_timeval(uint64_t todval, struct timeval *xtime)
{
- todval -= 0x8126d60e46000000LL - (0x3c26700LL * 1000000 * 4096);
-
- todval >>= 12;
- xtime->tv_sec = todval / 1000000;
- xtime->tv_usec = todval % 1000000;
+ uint64_t todval_us;
+
+ /* Convert TOD to usec (51th bit of TOD is us) */
+ todval_us = todval >> 12;
+ /* Add base if we have relative time stamps */
+ todval_us += tod_clock_base_us;
+ /* Subtract EPOCH that we get time in usec since 1970 */
+ todval_us -= TOD_UNIX_EPOCH_US;
+ xtime->tv_sec = todval_us / 1000000;
+ xtime->tv_usec = todval_us % 1000000;
}
static inline int kl_struct_len(char* struct_name)
@@ -846,10 +859,38 @@ find_debug_area(const char *area_name)
return NULL;
}
+static void tod_clock_base_init(void)
+{
+ if (kernel_symbol_exists("tod_clock_base")) {
+ /*
+ * Kernels >= 4.14 that contain 6e2ef5e4f6cc5734 ("s390/time:
+ * add support for the TOD clock epoch extension")
+ */
+ get_symbol_data("tod_clock_base", sizeof(tod_clock_base_us),
+ &tod_clock_base_us);
+ /* Bit for usecs is at position 59 - therefore shift 4 */
+ tod_clock_base_us >>= 4;
+ } else if (kernel_symbol_exists("sched_clock_base_cc") &&
+ !kernel_symbol_exists("tod_to_timeval")) {
+ /*
+ * Kernels >= 4.11 that contain ea417aa8a38bc7db ("s390/debug:
+ * make debug event time stamps relative to the boot TOD clock")
+ */
+ get_symbol_data("sched_clock_base_cc",
+ sizeof(tod_clock_base_us), &tod_clock_base_us);
+ /* Bit for usecs is at position 51 - therefore shift 12 */
+ tod_clock_base_us >>= 12;
+ } else {
+ /* All older kernels use absolute time stamps */
+ tod_clock_base_us = 0;
+ }
+}
+
static void
dbf_init(void)
{
if (!initialized) {
+ tod_clock_base_init();
if(dbf_version >= DBF_VERSION_V2)
add_lcrash_debug_view(&pages_view);
add_lcrash_debug_view(&ascii_view);
--
2.11.2
7 years, 4 months
[PATCH] s390x: Add page table walk information to vtop command
by Michael Holzheu
Hello Dave,
This patch adds some output to the vtop command on s390x. Like on x86 we now
print information for the page table walk:
* "Region-First-Table Entry" (RFTE)
* "Region-Second-Table Entry" (RSTE)
* "Region-Third-Table Entry" (RTTE)
* "Segment Table Entry" (STE)
* "Page Table Entry" (PTE)
* "Read address of page" (PAGE)
Depending on the size of the address space the page tables can start at
different levels.
Example for three level page tables:
crash> vtop 3ff8000c000
VIRTUAL PHYSICAL
3ff8000c000 2e3832000
PAGE DIRECTORY: 0000000000aaa000
RTTE: 0000000000aadff8 => 00000002e3c00007
STE: 00000002e3c00000 => 00000002e3df7000
PTE: 00000002e3df7060 => 00000002e383203d
PAGE: 00000002e3832000
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
3d10b8e0c80 2e3832000 0 0 1 7fffc0000000000
The first entry e.g. "PTE: 00000002e3df7060" is the physical address
of the entry in the table. The second, e.g. "=> 00000002e383203d" is the
content of the entry itself (address and flags).
Reviewed-by: Heiko Carstens <heiko.carstens(a)de.ibm.com>
Tested-by: Mikhail Zaslonko <zaslonko(a)linux.vnet.ibm.com>
Signed-off-by: Michael Holzheu <holzheu(a)linux.vnet.ibm.com>
---
s390x.c | 31 +++++++++++++++++++++----------
1 file changed, 21 insertions(+), 10 deletions(-)
diff --git a/s390x.c b/s390x.c
index 96ce3dc..9246f75 100644
--- a/s390x.c
+++ b/s390x.c
@@ -613,16 +613,19 @@ static inline int s390x_pte_present(unsigned long x){
/* Region or segment table traversal function */
static ulong _kl_rsg_table_deref_s390x(ulong vaddr, ulong table,
- int len, int level)
+ int len, int level, int verbose)
{
- ulong offset, entry;
+ const char *name_vec[] = {"STE", "RTTE", "RSTE", "RFTE"};
+ ulong offset, entry, addr;
offset = ((vaddr >> (11*level + 20)) & 0x7ffULL) * 8;
if (offset >= (len + 1)*4096)
/* Offset is over the table limit. */
return 0;
- readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
- FAULT_ON_ERROR);
+ addr = table + offset;
+ readmem(addr, KVADDR, &entry, sizeof(entry), "entry", FAULT_ON_ERROR);
+ if (verbose)
+ fprintf(fp, "%5s: %016lx => %016lx\n", name_vec[level], addr, entry);
/*
* Check if the segment table entry could be read and doesn't have
* any of the reserved bits set.
@@ -653,13 +656,17 @@ static int swap_entry(ulong entry)
}
/* Page table traversal function */
-static ulong _kl_pg_table_deref_s390x(ulong vaddr, ulong table)
+static ulong _kl_pg_table_deref_s390x(ulong vaddr, ulong table, int verbose)
{
- ulong offset, entry;
+ ulong offset, entry, addr;
offset = ((vaddr >> 12) & 0xffULL) * 8;
- readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
- FAULT_ON_ERROR);
+ addr = table + offset;
+ readmem(addr, KVADDR, &entry, sizeof(entry), "entry", FAULT_ON_ERROR);
+ if (verbose) {
+ fprintf(fp, "%5s: %016lx => %016lx\n", "PTE", addr, entry);
+ fprintf(fp, "%5s: %016llx\n", "PAGE", entry & ~0xfffULL);
+ }
/*
* Return zero if the page table entry has the reserved (0x800) or
* the invalid (0x400) bit set and it is not a swap entry.
@@ -676,6 +683,9 @@ int s390x_vtop(ulong table, ulong vaddr, physaddr_t *phys_addr, int verbose)
ulong entry, paddr;
int level, len;
+ if (verbose)
+ fprintf(fp, "PAGE DIRECTORY: %016lx\n", table);
+
*phys_addr = 0;
/*
* Walk the region and segment tables.
@@ -693,7 +703,8 @@ int s390x_vtop(ulong table, ulong vaddr, physaddr_t *phys_addr, int verbose)
return FALSE;
}
while (level >= 0) {
- entry = _kl_rsg_table_deref_s390x(vaddr, table, len, level);
+ entry = _kl_rsg_table_deref_s390x(vaddr, table, len, level,
+ verbose);
if (!entry)
return FALSE;
table = entry & ~0xfffULL;
@@ -717,7 +728,7 @@ int s390x_vtop(ulong table, ulong vaddr, physaddr_t *phys_addr, int verbose)
}
/* Get the page table entry */
- entry = _kl_pg_table_deref_s390x(vaddr, entry & ~0x7ffULL);
+ entry = _kl_pg_table_deref_s390x(vaddr, entry & ~0x7ffULL, verbose);
if (!entry)
return FALSE;
--
2.11.2
7 years, 4 months