[PATCH] sadump: Fix a problem of PTI enabled kernel
by Takao Indoh
This patch fixes a problem that a dumpfile of sadump cannot be opened
by crash when Page Table Isolation(PTI) is enabled.
When PTI is enabled, bit 12 of CR3 register is used to split user
space and kernel space. Also bit 11:0 is used for Process Context
IDentifiers(PCID). To open a dump file of sadump, a value of CR3 is
used to calculate KASLR offset and phys_base, therefore this patch
fixes to mask CR3 register value collectly for PTI enabled kernel.
This patch also includes code cleanup.
Signed-off-by: Takao Indoh <indou.takao(a)jp.fujitsu.com>
---
defs.h | 2 ++
sadump.c | 33 +++++++++++++++++++++------------
symbols.c | 9 +++++++++
3 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/defs.h b/defs.h
index dcd6c26..a19f280 100644
--- a/defs.h
+++ b/defs.h
@@ -2605,6 +2605,8 @@ struct symbol_table_data {
ulong divide_error_vmlinux;
ulong idt_table_vmlinux;
ulong saved_command_line_vmlinux;
+ ulong pti_init_vmlinux;
+ ulong kaiser_init_vmlinux;
};
/* flags for st */
diff --git a/sadump.c b/sadump.c
index 6b912d4..25cefe9 100644
--- a/sadump.c
+++ b/sadump.c
@@ -1749,7 +1749,7 @@ static ulong memparse(char *ptr, char **retptr)
* of elfcorehdr.
*/
static ulong
-get_elfcorehdr(ulong cr3, ulong kaslr_offset)
+get_elfcorehdr(ulong kaslr_offset)
{
char cmdline[BUFSIZE], *ptr;
ulong cmdline_vaddr;
@@ -1906,7 +1906,7 @@ get_vmcoreinfo(ulong elfcorehdr, ulong *addr, int *len)
* using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo.
*/
static int
-get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong orig_kaslr_offset,
+get_kaslr_offset_from_vmcoreinfo(ulong orig_kaslr_offset,
ulong *kaslr_offset, ulong *phys_base)
{
ulong elfcorehdr_addr = 0;
@@ -1916,7 +1916,7 @@ get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong orig_kaslr_offset,
int ret = FALSE;
/* Find "elfcorehdr=" in the kernel boot parameter */
- elfcorehdr_addr = get_elfcorehdr(cr3, orig_kaslr_offset);
+ elfcorehdr_addr = get_elfcorehdr(orig_kaslr_offset);
if (!elfcorehdr_addr)
return FALSE;
@@ -1973,8 +1973,8 @@ quit:
* 1) Get IDTR and CR3 value from the dump header.
* 2) Get a virtual address of IDT from IDTR value
* --- (A)
- * 3) Translate (A) to physical address using CR3, which points a top of
- * page table.
+ * 3) Translate (A) to physical address using CR3, the upper 52 bits
+ * of which points a top of page table.
* --- (B)
* 4) Get an address of vector0 (Devide Error) interrupt handler from
* IDT, which are pointed by (B).
@@ -2023,12 +2023,15 @@ quit:
* kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and
* get kaslr_offset and phys_base from vmcoreinfo.
*/
+#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
+#define CR3_PCID_MASK 0xFFFull
int
sadump_calc_kaslr_offset(ulong *kaslr_offset)
{
ulong phys_base = 0;
struct sadump_smram_cpu_state scs;
- uint64_t idtr = 0, cr3 = 0, idtr_paddr;
+ uint64_t idtr = 0, pgd = 0, idtr_paddr;
ulong divide_error_vmcore;
ulong kaslr_offset_kdump, phys_base_kdump;
int ret = FALSE;
@@ -2039,7 +2042,10 @@ sadump_calc_kaslr_offset(ulong *kaslr_offset)
memset(&scs, 0, sizeof(scs));
get_sadump_smram_cpu_state_any(&scs);
- cr3 = scs.Cr3;
+ if (st->pti_init_vmlinux || st->kaiser_init_vmlinux)
+ pgd = scs.Cr3 & ~(CR3_PCID_MASK|PTI_USER_PGTABLE_MASK);
+ else
+ pgd = scs.Cr3 & ~CR3_PCID_MASK;
idtr = ((uint64_t)scs.IdtUpper)<<32 | (uint64_t)scs.IdtLower;
/*
@@ -2050,12 +2056,12 @@ sadump_calc_kaslr_offset(ulong *kaslr_offset)
*
* TODO: XEN and 5-level is not supported
*/
- vt->kernel_pgd[0] = cr3;
+ vt->kernel_pgd[0] = pgd;
machdep->machspec->last_pml4_read = vt->kernel_pgd[0];
machdep->machspec->physical_mask_shift = __PHYSICAL_MASK_SHIFT_2_6;
machdep->machspec->pgdir_shift = PGDIR_SHIFT;
- if (!readmem(cr3, PHYSADDR, machdep->machspec->pml4, PAGESIZE(),
- "cr3", RETURN_ON_ERROR))
+ if (!readmem(pgd, PHYSADDR, machdep->machspec->pml4, PAGESIZE(),
+ "pgd", RETURN_ON_ERROR))
goto quit;
/* Convert virtual address of IDT table to physical address */
@@ -2070,7 +2076,7 @@ sadump_calc_kaslr_offset(ulong *kaslr_offset)
if (CRASHDEBUG(1)) {
fprintf(fp, "calc_kaslr_offset: idtr=%lx\n", idtr);
- fprintf(fp, "calc_kaslr_offset: cr3=%lx\n", cr3);
+ fprintf(fp, "calc_kaslr_offset: pgd=%lx\n", pgd);
fprintf(fp, "calc_kaslr_offset: idtr(phys)=%lx\n", idtr_paddr);
fprintf(fp, "calc_kaslr_offset: divide_error(vmlinux): %lx\n",
st->divide_error_vmlinux);
@@ -2084,9 +2090,12 @@ sadump_calc_kaslr_offset(ulong *kaslr_offset)
* from vmcoreinfo
*/
if (get_kaslr_offset_from_vmcoreinfo(
- cr3, *kaslr_offset, &kaslr_offset_kdump, &phys_base_kdump)) {
+ *kaslr_offset, &kaslr_offset_kdump, &phys_base_kdump)) {
*kaslr_offset = kaslr_offset_kdump;
phys_base = phys_base_kdump;
+ } else if (CRASHDEBUG(1)) {
+ fprintf(fp, "sadump: failed to determine which kernel was running at crash,\n");
+ fprintf(fp, "sadump: asssuming the kdump 1st kernel.\n");
}
if (CRASHDEBUG(1)) {
diff --git a/symbols.c b/symbols.c
index 2372887..26b319a 100644
--- a/symbols.c
+++ b/symbols.c
@@ -3072,10 +3072,14 @@ dump_symbol_table(void)
fprintf(fp, "divide_error_vmlinux: %lx\n", st->divide_error_vmlinux);
fprintf(fp, " idt_table_vmlinux: %lx\n", st->idt_table_vmlinux);
fprintf(fp, "saved_command_line_vmlinux: %lx\n", st->saved_command_line_vmlinux);
+ fprintf(fp, " pti_init_vmlinux: %lx\n", st->pti_init_vmlinux);
+ fprintf(fp, " kaiser_init_vmlinux: %lx\n", st->kaiser_init_vmlinux);
} else {
fprintf(fp, "divide_error_vmlinux: (unused)\n");
fprintf(fp, " idt_table_vmlinux: (unused)\n");
fprintf(fp, "saved_command_line_vmlinux: (unused)\n");
+ fprintf(fp, " pti_init_vmlinux: (unused)\n");
+ fprintf(fp, " kaiser_init_vmlinux: (unused)\n");
}
fprintf(fp, " symval_hash[%d]: %lx\n", SYMVAL_HASH,
@@ -12306,6 +12310,11 @@ numeric_forward(const void *P_x, const void *P_y)
st->saved_command_line_vmlinux = valueof(x);
else if (STREQ(y->name, "saved_command_line"))
st->saved_command_line_vmlinux = valueof(y);
+
+ if (STREQ(x->name, "pti_init"))
+ st->pti_init_vmlinux = valueof(x);
+ else if (STREQ(y->name, "kaiser_init"))
+ st->kaiser_init_vmlinux = valueof(y);
}
xs = bfd_get_section(x);
--
1.8.3.1
6 years, 10 months
Using crash with structure layout randomized kernel
by Cao jin
Hi Dave,
Recently I was trying crash tool with kdump dumpfile & structure
layout randomized kernel[*](), and it fails without any surprise. After
looking into the different errors crash reports, I can confirm it is a
result from randomized structure layout.
So my questions is, do you ever consider supporting this feature[*] in
crash?
If yes, do you have any plan & technique evaluation about it?
If no, what's the reason?
[*]https://lwn.net/Articles/722293/
--
Sincerely,
Cao jin
6 years, 10 months
Crash debugging vmcore, start error problem, crash seekerror: kernel virtual address: fffffffffffffffa0 type: "kmem_cache buffer"
by 于宸xpp
hi:
There is a problem that has been bothering me for several days. On the x86_64 platform, I used crash to debug the vmcore file, but the crash cannot be started normally during startup, and the error is as follows:
# crash vmlinux vmcore
please wait...(gathering kmem slab cache data)
crash seek error: kernel virtual address: fffffffffffffffa0 type: "kmem_cache buffer"
crash: unable to initialize kmem slab cache subsystem
crash: cannot determine idle task addresses from init_tasks[] or runqueues[]
crash: cannot resolve "init_task_union"
At the beginning, I used crash7.0 version and reported the error: vmlinux is not SMP -- vmcore is SMP, so I have updated to version crash7.2. it was solved。But there are still another startup errors, crash: cannot determine idle task addresses from init_tasks[] or runqueues[] ------- crash: cannot resolve "init_task_union"
--My hardware and software information:
The arch: x86_64
intel 8 the core CPU:
Memroy: 8 gb
The kernel version: 4.4.39
Kexec version: 2.0.14
Crash version: 7.2.0
kernel config:
CONFIG_CRASH_DUMP=y
CONFIG_KEXEC=y
CONFIG_SYSFS=y
CONFIG_DEBUG_INFO=y
CONFIG_PROC_VMCORE=y
CONFIG_RELOCATABLE=y
Ask for help:
1.Why did crash throw these errors? Is there a problem with my kernel configuration, or is there something else?
2.Can you give me some help according to my log?
-- eager for your response
【网易自营|30天无忧退货】无印良品制造商直供便携拖鞋等好物,限时29元起>>
6 years, 10 months
[PATCH 0/5] Second phase of future support for x86_64 5-level page tables
by Dou Liyang
I found Dave had alread done the first phase of future support for x86_64
5-level page tables(commit 307e7f35f510). when I asked him about the
state of this work, he gave me a more detailed answer and suggestion.
I follow his advice, and do the following job.
1. Refine the original logical:
1) Create some new common function for getting the offset of page table
2) Repace the PML4 and UPML with the common PGD:
machdep->machspec->pml4/upml ==> machdep->pdg
3) Using the PUD in x86_64
2. Add 5-level page tables support for x86_64_k/uvtop()
This patchset is the second phase of the work, As Dave said, we need to be
a manner of determining very early on whether the kernel page tables are
using 5-level and whether each user-space task is using 4- or 5-level page
tables. These will be done after this phase.
About test work:
I have tested this patchset with 4-level and 5-level paging table.
sadump/ Xen/ Old Linux / RHEL4 are not be tested.
Dou Liyang (5):
defs.h: Fix the PHYSICAL_PAGE_MASK macro
x86_64: Extract public page table mapping code
x86_64: Unify the page table parsing for 4-level mode
x86_64.c: Add the VMEMMAP support for 5 level page table
x86_64: Add 5-level paging support for x86_64_k/uvtop()
defs.h | 57 ++----
sadump.c | 9 +-
x86_64.c | 702 +++++++++++++++++++++++++++++++--------------------------------
3 files changed, 367 insertions(+), 401 deletions(-)
--
2.14.3
6 years, 10 months
Question about extension modules
by Fei, Jie/费 杰
Hello Dave,
Several weeks ago there was a bugzilla report about crash-trace-command
which was caused by load_module structure change:
https://bugzilla.redhat.com/show_bug.cgi?id=1520825
We can solve the problem by updating crash-trace-cmd with the latest
crash-devel package.
And I'm wondering if there is a need for trace.c to have an error check
function for this kind of data structure change problems or for crash to
have
some "extensions" test processes when new version is released which can
find
these problems ealier.
Thanks,
Fei
6 years, 10 months
[PATCH] defs.h: Refine the FILL_PML4() macro
by Dou Liyang
The FILL_PML4() updates the machdep->machspec->last_pml4_read every time.
But the last_pml4_read only need to be updated when IS_LAST_PML4_READ()
macro returns false.
Refine the FILL_PML4() to keep it simply and do some cleanup.
Signed-off-by: Dou Liyang <douly.fnst(a)cn.fujitsu.com>
---
defs.h | 38 ++++++++++++++++++--------------------
1 file changed, 18 insertions(+), 20 deletions(-)
diff --git a/defs.h b/defs.h
index 97738a6..9a33b41 100644
--- a/defs.h
+++ b/defs.h
@@ -3344,7 +3344,7 @@ struct arm64_stackframe {
#define PTRS_PER_P4D 512
#define __PGDIR_SHIFT (machdep->machspec->pgdir_shift)
-
+
#define pml4_index(address) (((address) >> PML4_SHIFT) & (PTRS_PER_PML4-1))
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
#define pgd_index(address) (((address) >> __PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -3353,26 +3353,24 @@ struct arm64_stackframe {
#define IS_LAST_PML4_READ(pml4) ((ulong)(pml4) == machdep->machspec->last_pml4_read)
-#define FILL_PML4() { \
- if (!(pc->flags & RUNTIME) || ACTIVE()) { \
- if (!IS_LAST_PML4_READ(vt->kernel_pgd[0])) \
- readmem(vt->kernel_pgd[0], KVADDR, machdep->machspec->pml4, \
- PAGESIZE(), "init_level4_pgt", FAULT_ON_ERROR); \
- machdep->machspec->last_pml4_read = (ulong)(vt->kernel_pgd[0]); \
- } \
-}
+#define FILL_PML4() \
+ if (!(pc->flags & RUNTIME) || ACTIVE()) { \
+ if (!IS_LAST_PML4_READ(vt->kernel_pgd[0])) { \
+ readmem(vt->kernel_pgd[0], KVADDR, machdep->machspec->pml4, \
+ PAGESIZE(), "init_level4_pgt", FAULT_ON_ERROR); \
+ machdep->machspec->last_pml4_read = (ulong)(vt->kernel_pgd[0]); \
+ } \
+ }
-#define FILL_PML4_HYPER() { \
- if (!machdep->machspec->last_pml4_read) { \
- unsigned long idle_pg_table = \
- symbol_exists("idle_pg_table_4") ? symbol_value("idle_pg_table_4") : \
- symbol_value("idle_pg_table"); \
- readmem(idle_pg_table, KVADDR, \
- machdep->machspec->pml4, PAGESIZE(), "idle_pg_table", \
- FAULT_ON_ERROR); \
- machdep->machspec->last_pml4_read = idle_pg_table; \
- }\
-}
+#define FILL_PML4_HYPER() \
+ if (!machdep->machspec->last_pml4_read) { \
+ unsigned long idle_pg_table = symbol_exists("idle_pg_table_4") ? \
+ symbol_value("idle_pg_table_4") : \
+ symbol_value("idle_pg_table"); \
+ readmem(idle_pg_table, KVADDR, machdep->machspec->pml4, PAGESIZE(), \
+ "idle_pg_table", FAULT_ON_ERROR); \
+ machdep->machspec->last_pml4_read = idle_pg_table; \
+ }
#define IS_LAST_UPML_READ(pml) ((ulong)(pml) == machdep->machspec->last_upml_read)
--
2.14.3
6 years, 10 months
Why does crash check the program context flags in FILL_PML4() macro
by Dou Liyang
Hi Dave,
One question:
#define FILL_PML4() { \
if (!(pc->flags & RUNTIME) || ACTIVE()) { \
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
I am confused that crash checks the flags in the FILL_PML4() macro, but
not check that in other macros, such as FILL_PGD().
Could you tell me why we need to do this check?
if (!IS_LAST_PML4_READ(vt->kernel_pgd[0])) \
readmem(vt->kernel_pgd[0], KVADDR,
machdep->machspec->pml4, \
PAGESIZE(), "init_level4_pgt", FAULT_ON_ERROR); \
machdep->machspec->last_pml4_read =
(ulong)(vt->kernel_pgd[0]); \
} \
}
Thanks,
dou
6 years, 10 months
[RFC PATCH] defs.h: Update the range of legacy vsyscall to 4kB
by Dou Liyang
Currently, There used to be 8 MB reserved for future vsyscalls, but
that's long gone in Kernel. and it updates to 4kB
So Update it to [ffffffffff600000 - ffffffffff600fff].
Signed-off-by: Dou Liyang <douly.fnst(a)cn.fujitsu.com>
---
defs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 4cd07b8..97738a6 100644
--- a/defs.h
+++ b/defs.h
@@ -3324,7 +3324,7 @@ struct arm64_stackframe {
#define VMEMMAP_END_5LEVEL 0xffd5ffffffffffff
#define VSYSCALL_START 0xffffffffff600000
-#define VSYSCALL_END 0xffffffffffe00000
+#define VSYSCALL_END 0xffffffffff601000
#define PTOV(X) ((unsigned long)(X)+(machdep->kvbase))
#define VTOP(X) x86_64_VTOP((ulong)(X))
--
2.14.3
6 years, 10 months
[PATCH] defs.h: Update the vmalloc start address of 5-Level
by Dou Liyang
Due to the support of PTI in Kernel, the range of Virtual memory map with
5 level page tables [ff90000000000000 - ff9fffffffffffff] is assigned to
LDT remap for PTI. and the vmalloc/ioremap space is updated.
So, update it.
Signed-off-by: Dou Liyang <douly.fnst(a)cn.fujitsu.com>
---
defs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index ba9abad..4cd07b8 100644
--- a/defs.h
+++ b/defs.h
@@ -3316,7 +3316,7 @@ struct arm64_stackframe {
#define USERSPACE_TOP_5LEVEL 0x0100000000000000
#define PAGE_OFFSET_5LEVEL 0xff10000000000000
-#define VMALLOC_START_ADDR_5LEVEL 0xff92000000000000
+#define VMALLOC_START_ADDR_5LEVEL 0xffa0000000000000
#define VMALLOC_END_5LEVEL 0xffd1ffffffffffff
#define MODULES_VADDR_5LEVEL 0xffffffffa0000000
#define MODULES_END_5LEVEL 0xffffffffff5fffff
--
2.14.3
6 years, 10 months