Re: [PATCH] x86_64: Fix the bug of getting incorrect framesize
by lijiang
On Mon, Sep 16, 2024 at 3:46 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Mon, 16 Sep 2024 19:44:58 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH] x86_64: Fix the bug of getting
> incorrect framesize
> To: devel(a)lists.crash-utility.osci.io
> Cc: mycomplexlove(a)gmail.com
> Message-ID: <20240916074458.105832-1-ltao(a)redhat.com>
> Content-Type: text/plain; charset="US-ASCII"; x-default=true
>
> Previously, "retq" is used to determine the end of a function, so the end
> of framesize calculation. However "ret" might be outputted by gdb rather
> than "retq", as a result, the framesize is returned incorrectly, and bogus
> stack trace will be outputted.
>
> Without the patch:
>
> $ crash -d 3 vmcore vmlinux
> crash> bt
> 0xffffffff92da7545 <copy_process+5>: push %rbp [framesize: 8]
> ...
> 0xffffffff92da7561 <copy_process+33>: sub $0x238,%rsp
> [framesize: 624]
> ...
> 0xffffffff92da776a <copy_process+554>: pop %r15
> [framesize: 8]
> 0xffffffff92da776c <copy_process+556>: pop %rbp
> [framesize: 0]
> 0xffffffff92da776d <copy_process+557>: ret
>
> crash> bt -D dump
> framesize_cache_entries:
> ...
> [ 3]: ffffffff92dadcbd 0 CF (copy_process+26493)
>
> crash> bt
> ...
> #9 [ffff888263157bc0] copy_process at ffffffff92dadcbd
> #10 [ffff888263157d20] __mutex_init at ffffffff92ed8dd5
> #11 [ffff888263157d38] __alloc_file at ffffffff93458397
> #12 [ffff888263157d60] alloc_empty_file at ffffffff934585d2
> #13 [ffff888263157da8] __alloc_fd at ffffffff934b5ead
> #14 [ffff888263157e38] _do_fork at ffffffff92dae7a1
> #15 [ffff888263157f28] do_syscall_64 at ffffffff92c085f4
>
> Stack #10 ~ #13 are bogus and misleading.
>
> With the patch:
> ...
> 0xffffffff92da776d <copy_process+557>: ret [framesize
> restored to: 624]
>
> crash> bt -D dump
> ...
> [ 3]: ffffffff92dadcbd 624 CF (copy_process+26493)
>
> crash> bt
> ...
> #9 [ffff888263157bc0] copy_process at ffffffff92dadcbd
> #10 [ffff888263157e38] _do_fork at ffffffff92dae7a1
> #11 [ffff888263157f28] do_syscall_64 at ffffffff92c085f4
>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> x86_64.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/x86_64.c b/x86_64.c
> index 469d26b..7aa9430 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -8781,7 +8781,8 @@ x86_64_get_framesize(struct bt_info *bt, ulong
> textaddr, ulong rsp, char *stack_
> if (CRASHDEBUG(2) || (bt->flags &
> BT_FRAMESIZE_DEBUG))
> fprintf(fp, "%s\t[framesize: %d]\n",
> strip_linefeeds(buf2), framesize);
> - } else if (STRNEQ(arglist[instr], "retq")) {
> + } else if (STRNEQ(arglist[instr], "retq") ||
> + STRNEQ(arglist[instr], "ret")) {
>
Thank you for the fix, Tao.
This looks good. Applied:
https://github.com/crash-utility/crash/commit/0d2ad774532db3c4dad6cda05d5...
Lianbo
if (!exception) {
> framesize = max;
> if (CRASHDEBUG(2) || (bt->flags &
> BT_FRAMESIZE_DEBUG))
> --
> 2.40.1
>
15 hours, 42 minutes
Re: [PATCH v2] kmem: fix the determination for slab page
by lijiang
Thank you for the update, Qiwu.
On Wed, Sep 11, 2024 at 10:27 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Wed, 11 Sep 2024 02:25:40 -0000
> From: qiwu.chen(a)transsion.com
> Subject: [Crash-utility] [PATCH v2] kmem: fix the determination for
> slab page
> To: devel(a)lists.crash-utility.osci.io
> Message-ID: <20240911022540.15869.84683(a)lists.crash-utility.osci.io>
> Content-Type: text/plain; charset="utf-8"
>
> The determination for a slab page has changed due to changing
> PG_slab from a page flag to a page type since kernel commit
> 46df8e73a4a3.
>
> Before apply this patch:
> crash> kmem -s ffff000002aa4100
> kmem: address is not allocated in slab subsystem: ffff000002aa4100
>
> After apply this patch:
> crash> kmem -s ffff000002aa4100
> CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
> ffff00000140f900 4096 94 126 18 32k task_struct
> SLAB MEMORY NODE TOTAL ALLOCATED FREE
> fffffdffc00aa800 ffff000002aa0000 0 7 5 2
> FREE / [ALLOCATED]
> [ffff000002aa4100]
>
> Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
> ---
> defs.h | 7 ++++++
> memory.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++---------
> 2 files changed, 62 insertions(+), 10 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 2231cb6..e2a9278 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2243,6 +2243,7 @@ struct offset_table { /* stash of
> commonly-used offsets */
> long vmap_node_busy;
> long rb_list_head;
> long file_f_inode;
> + long page_page_type;
> };
>
> struct size_table { /* stash of commonly-used sizes */
> @@ -2651,6 +2652,7 @@ struct vm_table { /* kernel
> VM-related data */
> ulong max_mem_section_nr;
> ulong zero_paddr;
> ulong huge_zero_paddr;
> + uint page_type_base;
> };
>
>
Can you try to dump the values of the above two variables from
the dump_vm_table() and dump_offset_table()? We can display their values by
help -v and help -o options.
> #define NODES (0x1)
> @@ -2684,6 +2686,11 @@ struct vm_table { /* kernel
> VM-related data */
> #define SLAB_CPU_CACHE (0x10000000)
> #define SLAB_ROOT_CACHES (0x20000000)
> #define USE_VMAP_NODES (0x40000000)
> +/*
> + * The SLAB_PAGEFLAGS flag is introduced to detect the change of
> + * PG_slab's type from a page flag to a page type.
> + */
> +#define SLAB_PAGEFLAGS (0x80000000)
>
> #define IS_FLATMEM() (vt->flags & FLATMEM)
> #define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
> diff --git a/memory.c b/memory.c
> index 967a9cf..48ac627 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -351,6 +351,43 @@ static ulong handle_each_vm_area(struct
> handle_each_vm_area_args *);
>
> static ulong DISPLAY_DEFAULT;
>
> +/*
> + * Before kernel commit ff202303c398e, the value is defined as a macro,
> so copy it here;
> + * After this commit, the value is defined as an enum, which can be
> evaluated at runtime.
> + */
> +#define PAGE_TYPE_BASE 0xf0000000
> +#define PageType(page_type, flag)
> \
> + ((page_type & (vt->page_type_base | flag)) == vt->page_type_base)
> +
> +static void page_type_init(void)
> +{
> + if (!enumerator_value("PAGE_TYPE_BASE", (long
> *)&vt->page_type_base))
> + vt->page_type_base = PAGE_TYPE_BASE;
> +}
> +
> +/*
> + * The PG_slab's type has changed from a page flag to a page type
> + * since kernel commit 46df8e73a4a3.
> + */
> +static bool page_slab(ulong page, ulong flags)
> +{
> + if (vt->flags & SLAB_PAGEFLAGS) {
> + if ((flags >> vt->PG_slab) & 1)
> + return TRUE;
> + }
> +
> + if (VALID_MEMBER(page_page_type)) {
> + uint page_type;
> +
> + readmem(page+OFFSET(page_page_type), KVADDR, &page_type,
> + sizeof(page_type), "page_type", FAULT_ON_ERROR);
> + if (PageType(page_type, (uint)vt->PG_slab))
> + return TRUE;
> + }
> +
> + return FALSE;
> +}
> +
> /*
> * Verify that the sizeof the primitive types are reasonable.
> */
> @@ -504,6 +541,10 @@ vm_init(void)
> ANON_MEMBER_OFFSET_INIT(page_compound_head, "page",
> "compound_head");
> MEMBER_OFFSET_INIT(page_private, "page", "private");
> MEMBER_OFFSET_INIT(page_freelist, "page", "freelist");
> + if (MEMBER_EXISTS("page", "page_type")) {
>
When initializing the page_page_type, the above "if" check seems to be
redundant.
> + MEMBER_OFFSET_INIT(page_page_type, "page", "page_type");
> + page_type_init();
>
Can this page_type_init() be moved to the page_flags_init()?
Other changes are fine to me.
Thanks
Lianbo
+ }
>
>
MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd");
>
> @@ -5931,7 +5972,7 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
> if ((flags >> v22_PG_Slab) & 1)
> slabs++;
> } else if (vt->PG_slab) {
> - if ((flags >> vt->PG_slab) & 1)
> + if (page_slab(pp, flags))
> slabs++;
> } else {
> if ((flags >> v24_PG_slab) & 1)
> @@ -6381,7 +6422,7 @@ dump_mem_map(struct meminfo *mi)
> if ((flags >> v22_PG_Slab) & 1)
> slabs++;
> } else if (vt->PG_slab) {
> - if ((flags >> vt->PG_slab) & 1)
> + if (page_slab(pp, flags))
> slabs++;
> } else {
> if ((flags >> v24_PG_slab) & 1)
> @@ -6775,6 +6816,9 @@ page_flags_init_from_pageflag_names(void)
> vt->pageflags_data[i].name = nameptr;
> vt->pageflags_data[i].mask = mask;
>
> + if (!strncmp(nameptr, "slab", 4))
> + vt->flags |= SLAB_PAGEFLAGS;
> +
> if (CRASHDEBUG(1)) {
> fprintf(fp, " %08lx %s\n",
> vt->pageflags_data[i].mask,
> @@ -6836,7 +6880,8 @@ page_flags_init_from_pageflags_enum(void)
> strcpy(nameptr, arglist[0] + strlen("PG_"));
> vt->pageflags_data[p].name = nameptr;
> vt->pageflags_data[p].mask = 1 <<
> atoi(arglist[2]);
> -
> + if (!strncmp(nameptr, "slab", 4))
> + vt->flags |= SLAB_PAGEFLAGS;
> p++;
> }
> } else
> @@ -9736,14 +9781,14 @@ vaddr_to_kmem_cache(ulong vaddr, char *buf, int
> verbose)
> readmem(page+OFFSET(page_flags), KVADDR,
> &page_flags, sizeof(ulong), "page.flags",
> FAULT_ON_ERROR);
> - if (!(page_flags & (1 << vt->PG_slab))) {
> + if (!page_slab(page, page_flags)) {
> if (((vt->flags & KMALLOC_SLUB) ||
> VALID_MEMBER(page_compound_head)) ||
> ((vt->flags & KMALLOC_COMMON) &&
> VALID_MEMBER(page_slab) &&
> VALID_MEMBER(page_first_page))) {
>
> readmem(compound_head(page)+OFFSET(page_flags), KVADDR,
> &page_flags, sizeof(ulong),
> "page.flags",
> FAULT_ON_ERROR);
> - if (!(page_flags & (1 << vt->PG_slab)))
> + if (!page_slab(compound_head(page),
> page_flags))
> return NULL;
> } else
> return NULL;
> @@ -20195,7 +20240,7 @@ char *
> is_slab_page(struct meminfo *si, char *buf)
> {
> int i, cnt;
> - ulong page_slab, page_flags, name;
> + ulong pg_slab, page_flags, name;
> ulong *cache_list;
> char *retval;
>
> @@ -20210,11 +20255,11 @@ is_slab_page(struct meminfo *si, char *buf)
> RETURN_ON_ERROR|QUIET))
> return NULL;
>
> - if (!(page_flags & (1 << vt->PG_slab)))
> + if (!page_slab(si->spec_addr, page_flags))
> return NULL;
>
> - if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
> - &page_slab, sizeof(ulong), "page.slab",
> + if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
> + &pg_slab, sizeof(ulong), "page.slab",
> RETURN_ON_ERROR|QUIET))
> return NULL;
>
> @@ -20222,7 +20267,7 @@ is_slab_page(struct meminfo *si, char *buf)
> cnt = get_kmem_cache_list(&cache_list);
>
> for (i = 0; i < cnt; i++) {
> - if (page_slab == cache_list[i]) {
> + if (pg_slab == cache_list[i]) {
> if (!readmem(cache_list[i] +
> OFFSET(kmem_cache_name),
> KVADDR, &name, sizeof(char *),
> "kmem_cache.name", QUIET|RETURN_ON_ERROR))
> --
> 2.25.1
>
17 hours, 47 minutes
[PATCH] ppc64: Fix bt printing error stack trace
by Tao Liu
A error stack trace of bt cmd observed:
crash> bt 1
PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
#0 [c0000000037735c0] _end at c0000000037154b0 (unreliable)
#1 [c000000003773770] __switch_to at c00000000001fa9c
#2 [c0000000037737d0] __schedule at c00000000112e4ec
#3 [c0000000037738b0] schedule at c00000000112ea80
...
The #0 stack trace is incorrect, the function address shouldn't exceed _end.
The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from
STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For
CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will read a
wrong value from stack when ABI_V2 enabled.
To determine if ABI_V2 enabled is tricky. This patch do it by check the
following:
In arch/powerpc/include/asm/ppc_asm.h:
#ifdef CONFIG_PPC64_ELF_ABI_V2
#define STK_GOT 24
#else
#define STK_GOT 40
In arch/powerpc/kernel/tm.S:
_GLOBAL(tm_reclaim)
mfcr r5
mflr r0
stw r5, 8(r1)
std r0, 16(r1)
std r2, STK_GOT(r1)
...
So a disassemble on tm_reclaim, and extract the STK_GOT value from std
instruction is used as the approach.
After the patch:
crash> bt 1
PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
#0 [c0000000037737d0] __schedule at c00000000112e4ec
#1 [c0000000037738b0] schedule at c00000000112ea80
...
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
This patch is the follow-up of gdb stack unwinding support v7 discussion[1],
where a "gdb bt" fail is observed. After applying the patch, the "gdb bt" can
also work normal for gdb stack unwinding support v7.
[1]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01120.html
---
defs.h | 1 +
ppc64.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 57 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index 2231cb6..d5cb8cc 100644
--- a/defs.h
+++ b/defs.h
@@ -4643,6 +4643,7 @@ struct efi_memory_desc_t {
#define MSR_PR_LG 14 /* Problem State / Privilege Level */
/* Used to find the user or kernel-mode frame*/
+#define STACK_SWITCH_FRAME_REGS 48
#define STACK_FRAME_OVERHEAD 112
#define EXCP_FRAME_MARKER 0x7265677368657265
diff --git a/ppc64.c b/ppc64.c
index e8930a1..5e2595e 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -2813,6 +2813,51 @@ ppc64_get_sp(ulong task)
return sp;
}
+static bool
+is_ppc64_elf_abi_v2(void)
+{
+ char buf1[BUFSIZE];
+ char *pos1, *pos2;
+ int errflag = 0;
+ ulong stk_got = 0;
+ static bool ret = false;
+ static bool checked = false;
+
+ if (checked == true || !symbol_exists("tm_reclaim"))
+ return ret;
+
+ sprintf(buf1, "x/16i tm_reclaim");
+ open_tmpfile();
+ if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR))
+ goto out;
+ checked = true;
+ rewind(pc->tmpfile);
+ while (fgets(buf1, BUFSIZE, pc->tmpfile)) {
+ // "std r2, STK_GOT(r1)" is expected
+ if (strstr(buf1, "std") &&
+ strstr(buf1, "(r1)") &&
+ (pos1 = strstr(buf1, "r2,"))) {
+ pos1 += strlen("r2,");
+ for (pos2 = pos1; *pos2 != '\0' && *pos2 != '('; pos2++);
+ *pos2 = '\0';
+ stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag);
+ break;
+ }
+ }
+
+ if (!errflag) {
+ switch (stk_got) {
+ case 24:
+ ret = true;
+ case 40:
+ goto out;
+ }
+ }
+ error(WARNING, "Unstable elf_abi v1/v2 detection.\n");
+out:
+ close_tmpfile();
+ return ret;
+}
/*
* get the SP and PC values for idle tasks.
@@ -2834,9 +2879,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp)
sp = ppc64_get_sp(task);
if (!INSTACK(sp, bt))
goto out;
- readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
- sizeof(struct ppc64_pt_regs),
- "PPC64 pt_regs", FAULT_ON_ERROR);
+
+ if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) {
+ readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s,
+ sizeof(struct ppc64_pt_regs),
+ "PPC64 pt_regs", FAULT_ON_ERROR);
+ } else {
+ readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
+ sizeof(struct ppc64_pt_regs),
+ "PPC64 pt_regs", FAULT_ON_ERROR);
+ }
+
ip = regs.nip;
closest = closest_symbol(ip);
if (STREQ(closest, ".__switch_to") || STREQ(closest, "__switch_to")) {
--
2.40.1
1 day, 1 hour
[Question] There are differences when gdb7.6 and gdb10.2 parse the stack
by mycomplexlove@gmail.com
Hello, crash main programmers.
I found a problem. On crash with gdb10.2, I have a vmcore that prints parts
that shouldn't appear when parsing the process stack.
I have had some discussions with liutgnu. I recompiled and tried based on https://github.com/liutgnu/crash-preview.
Unfortunately, it seems that the crash version based on gdb13.2 still has this problem.
Here is the output of my test:
-------------------
crash 8.0.4++
Copyright (C) 2002-2022 Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation
Copyright (C) 1999-2006 Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited
Copyright (C) 2006, 2007 VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011, 2020-2022 NEC Corporation
Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
Copyright (C) 2015, 2021 VMware, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions. Enter "help copying" to see the conditions.
This program has absolutely no warranty. Enter "help warranty" for details.
GNU gdb (GDB) 13.2
Copyright (C) 2023 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu".
Type "show configuration" for configuration details.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
KERNEL: /root/hungtask/vmlinux [TAINTED]
DUMPFILE: /root/hungtask/2024_09_06_05_02_15.kernel_core [PARTIAL DUMP]
CPUS: 64
DATE: Fri Sep 6 05:01:47 CST 2024
UPTIME: 12:27:05
LOAD AVERAGE: 56.87, 25.40, 18.24
TASKS: 4319
NODENAME: host-047bcb37834d
RELEASE: 4.19.90-89.11.v2401.osc.sfc.6.11.0.0070.ky10.x86_64+debug
VERSION: #1 SMP Fri Aug 30 08:21:33 UTC 2024
MACHINE: x86_64 (2499 Mhz)
MEMORY: 255.9 GB
PANIC: "Kernel panic - not syncing: softlockup: hung tasks"
PID: 112450
COMMAND: "vtpstatd"
TASK: ffff88816ae80000 [THREAD_INFO: ffff88816ae80000]
CPU: 41
STATE: TASK_RUNNING (PANIC)
crash> bt
PID: 112450 TASK: ffff88816ae80000 CPU: 41 COMMAND: "vtpstatd"
#0 [ffff889e3fa87af8] machine_kexec at ffffffff92d059ab
#1 [ffff889e3fa87c18] __crash_kexec at ffffffff92fb9a99
#2 [ffff889e3fa87d30] panic at ffffffff9483ed43
#3 [ffff889e3fa87df8] watchdog_timer_fn at ffffffff93052cf6
#4 [ffff889e3fa87e30] __hrtimer_run_queues at ffffffff92f5e96e
#5 [ffff889e3fa87f28] hrtimer_interrupt at ffffffff92f5ffe7
#6 [ffff889e3fa87fc8] smp_apic_timer_interrupt at ffffffff94a03176
#7 [ffff889e3fa87ff0] apic_timer_interrupt at ffffffff94a0192f
--- <IRQ stack> ---
#8 [ffff888263157938] apic_timer_interrupt at ffffffff94a0192f
[exception RIP: copy_page_range+3681]
RIP: ffffffff9331d461 RSP: ffff8882631579e8 RFLAGS: 00000246
RAX: 1ffffd4018a95ad1 RBX: 8000003152b5a805 RCX: ffffea00c54ad688
RDX: ffffea00c54aee88 RSI: 00007f80d117f000 RDI: ffffffff956468e0
RBP: ffff8881c5bc2bf8 R8: fffff94018a2e22f R9: fffff94018a2e22f
R10: 0000000000000001 R11: fffff94018a2e22e R12: 0000000000000018
R13: dffffc0000000000 R14: ffffea00c54ad680 R15: 00007f80d117f000
ORIG_RAX: ffffffffffffff13 CS: 0010 SS: 0018
#9 [ffff888263157bc0] copy_process at ffffffff92dadcbd
#10 [ffff888263157d20] __mutex_init at ffffffff92ed8dd5
#11 [ffff888263157d38] __alloc_file at ffffffff93458397
#12 [ffff888263157d60] alloc_empty_file at ffffffff934585d2
#13 [ffff888263157da8] __alloc_fd at ffffffff934b5ead
#14 [ffff888263157e38] _do_fork at ffffffff92dae7a1
#15 [ffff888263157f28] do_syscall_64 at ffffffff92c085f4
#16 [ffff888263157f50] entry_SYSCALL_64_after_hwframe at ffffffff94a000a4
RIP: 00007f80ec93641a RSP: 00007ffcb38bbd50 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: 00007ffcb38bbd50 RCX: 00007f80ec93641a
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011
RBP: 00007ffcb38bbde0 R8: 000000000001b742 R9: 00007f80ee1a0f80
R10: 00007f80ee1a1250 R11: 0000000000000246 R12: 000000000001b742
R13: 00007ffcb38bbd70 R14: 0000000000000000 R15: 00007ffcb38bbf00
ORIG_RAX: 0000000000000038 CS: 0033 SS: 002b
-------------------
#10....#13 They seem redundant.
The following is the analysis output based on gdb7.6 and the latest crash code:
-------------------
crash_805_gdb76 8.0.5++
Copyright (C) 2002-2024 Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation
Copyright (C) 1999-2006 Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited
Copyright (C) 2006, 2007 VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011, 2020-2024 NEC Corporation
Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions. Enter "help copying" to see the conditions.
This program has absolutely no warranty. Enter "help warranty" for details.
GNU gdb (GDB) 7.6
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-unknown-linux-gnu"...
WARNING: kernel relocated [284MB]: patching 99408 gdb minimal_symbol values
crash_805_gdb76: gdb cannot find text block for address: dd_init_queue
KERNEL: vmlinux [TAINTED]
DUMPFILE: 2024_09_06_05_02_15.kernel_core [PARTIAL DUMP]
CPUS: 64
DATE: Fri Sep 6 05:01:47 CST 2024
UPTIME: 12:27:05
LOAD AVERAGE: 56.87, 25.40, 18.24
TASKS: 4319
NODENAME: host-047bcb37834d
RELEASE: 4.19.90-89.11.v2401.osc.sfc.6.11.0.0070.ky10.x86_64+debug
VERSION: #1 SMP Fri Aug 30 08:21:33 UTC 2024
MACHINE: x86_64 (2499 Mhz)
MEMORY: 255.9 GB
PANIC: "Kernel panic - not syncing: softlockup: hung tasks"
PID: 112450
COMMAND: "vtpstatd"
TASK: ffff88816ae80000 [THREAD_INFO: ffff88816ae80000]
CPU: 41
STATE: TASK_RUNNING (PANIC)
crash_805_gdb76> bt
PID: 112450 TASK: ffff88816ae80000 CPU: 41 COMMAND: "vtpstatd"
#0 [ffff889e3fa87af8] machine_kexec at ffffffff92d059ab
#1 [ffff889e3fa87c18] __crash_kexec at ffffffff92fb9a99
#2 [ffff889e3fa87d30] panic at ffffffff9483ed43
#3 [ffff889e3fa87df8] watchdog_timer_fn at ffffffff93052cf6
#4 [ffff889e3fa87e30] __hrtimer_run_queues at ffffffff92f5e96e
#5 [ffff889e3fa87f28] hrtimer_interrupt at ffffffff92f5ffe7
#6 [ffff889e3fa87fc8] smp_apic_timer_interrupt at ffffffff94a03176
#7 [ffff889e3fa87ff0] apic_timer_interrupt at ffffffff94a0192f
--- <IRQ stack> ---
#8 [ffff888263157938] apic_timer_interrupt at ffffffff94a0192f
[exception RIP: copy_page_range+3681]
RIP: ffffffff9331d461 RSP: ffff8882631579e8 RFLAGS: 00000246
RAX: 1ffffd4018a95ad1 RBX: 8000003152b5a805 RCX: ffffea00c54ad688
RDX: ffffea00c54aee88 RSI: 00007f80d117f000 RDI: ffffffff956468e0
RBP: ffff8881c5bc2bf8 R8: fffff94018a2e22f R9: fffff94018a2e22f
R10: 0000000000000001 R11: fffff94018a2e22e R12: 0000000000000018
R13: dffffc0000000000 R14: ffffea00c54ad680 R15: 00007f80d117f000
ORIG_RAX: ffffffffffffff13 CS: 0010 SS: 0018
#9 [ffff888263157bc0] copy_process at ffffffff92dadcbd
#10 [ffff888263157e38] _do_fork at ffffffff92dae7a1
#11 [ffff888263157f28] do_syscall_64 at ffffffff92c085f4
#12 [ffff888263157f50] entry_SYSCALL_64_after_hwframe at ffffffff94a000a4
RIP: 00007f80ec93641a RSP: 00007ffcb38bbd50 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: 00007ffcb38bbd50 RCX: 00007f80ec93641a
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011
RBP: 00007ffcb38bbde0 R8: 000000000001b742 R9: 00007f80ee1a0f80
R10: 00007f80ee1a1250 R11: 0000000000000246 R12: 000000000001b742
R13: 00007ffcb38bbd70 R14: 0000000000000000 R15: 00007ffcb38bbf00
ORIG_RAX: 0000000000000038 CS: 0033 SS: 002b
-------------------
It seems that gdb7.6 parsing is more convincing. This version is compiled by reverting the commit of update gdb
(github url: https://github.com/crash-utility/crash/commit/9fab193).
I also tried the release versions of crash 7.3.2 and 8.0.1 (I had problems compiling 8.0.0),
and the results are consistent with the above. 7.3.2 parsing is normal, and 8.0.1 has the problem.
In crash_805_gdb76 x86_64_framesize_cache[3].framesize=624 :
(gdb) p x86_64_framesize_cache[0]
$136 = {textaddr = 18446744071880546969, framesize = 272, exception = 0}
(gdb) p x86_64_framesize_cache[1]
$137 = {textaddr = 18446744071906258243, framesize = 192, exception = 0}
(gdb) p x86_64_framesize_cache[2]
$138 = {textaddr = 18446744071908104495, framesize = 8, exception = 0}
(gdb) p x86_64_framesize_cache[3]
$139 = {textaddr = 18446744071878401213, framesize = 624, exception = 0}
but In crash_805_gdb102 x86_64_framesize_cache[3].framesize=0 :
(gdb) p x86_64_framesize_cache[0]
$86 = {textaddr = 18446744071880546969, framesize = 272, exception = 0}
(gdb) p x86_64_framesize_cache[1]
$87 = {textaddr = 18446744071906258243, framesize = 192, exception = 0}
(gdb) p x86_64_framesize_cache[2]
$88 = {textaddr = 18446744071908104495, framesize = 8, exception = 0}
(gdb) p x86_64_framesize_cache[3]
$89 = {textaddr = 18446744071878401213, framesize = 0, exception = 0}
---------------------------------------------
After [Walk the process stack. ] of x86_64_low_budget_back_trace_cmd, the value of *up is as follows:
x86_64.c:4059 switch (x86_64_print_stack_entry(bt, ofp, level, i,*up))
The address returned by crash_805_gdb76:
0xffffffff92d059ab
0xffffffff92fb9a99
0xffffffff9483ed43
0xffffffff93052cf6
0xffffffff92f5e96e
0xffffffff92f5ffe7
0xffffffff94a03176
0xffffffff94a0192f
0xffffffff92dadcbd <-copy_page_range
0xffffffff92dae7a1
0xffffffff92c085f4
0xffffffff94a000a4
The address returned by crash_805_gdb102:
0xffffffff92d059ab
0xffffffff92fb9a99
0xffffffff9483ed43
0xffffffff93052cf6
0xffffffff92f5e96e
0xffffffff92f5ffe7
0xffffffff94a03176
0xffffffff94a0192f
0xffffffff92dadcbd <-copy_page_range
0xffffffff92ed8dd5 -------Parts that shouldn't appear
0xffffffff93458397
0xffffffff934585d2
0xffffffff934b5ead --------Parts that shouldn't appear
0xffffffff92dae7a1
0xffffffff92c085f4
0xffffffff94a000a4
Analyze its symbols:
0xffffffff92ed8dd5 __mutex_init+181
0xffffffff93458397 __alloc_file+407
0xffffffff934585d2 alloc_empty_file+146
0xffffffff934b5ead __alloc_fd+141
Generate vmcore parameters:
makedumpfile -l -d 31 /proc/vmcore [date].kernel_core
Unfortunately, I am not using a regular distribution, it is a deeply customized one
vmcore google drive url:
https://drive.google.com/file/d/1pDICRP6zQafe00c4LWRV-SklkM75971P/view
2 days, 14 hours
[PATCH] x86_64: Fix the bug of getting incorrect framesize
by Tao Liu
Previously, "retq" is used to determine the end of a function, so the end
of framesize calculation. However "ret" might be outputted by gdb rather
than "retq", as a result, the framesize is returned incorrectly, and bogus
stack trace will be outputted.
Without the patch:
$ crash -d 3 vmcore vmlinux
crash> bt
0xffffffff92da7545 <copy_process+5>: push %rbp [framesize: 8]
...
0xffffffff92da7561 <copy_process+33>: sub $0x238,%rsp [framesize: 624]
...
0xffffffff92da776a <copy_process+554>: pop %r15 [framesize: 8]
0xffffffff92da776c <copy_process+556>: pop %rbp [framesize: 0]
0xffffffff92da776d <copy_process+557>: ret
crash> bt -D dump
framesize_cache_entries:
...
[ 3]: ffffffff92dadcbd 0 CF (copy_process+26493)
crash> bt
...
#9 [ffff888263157bc0] copy_process at ffffffff92dadcbd
#10 [ffff888263157d20] __mutex_init at ffffffff92ed8dd5
#11 [ffff888263157d38] __alloc_file at ffffffff93458397
#12 [ffff888263157d60] alloc_empty_file at ffffffff934585d2
#13 [ffff888263157da8] __alloc_fd at ffffffff934b5ead
#14 [ffff888263157e38] _do_fork at ffffffff92dae7a1
#15 [ffff888263157f28] do_syscall_64 at ffffffff92c085f4
Stack #10 ~ #13 are bogus and misleading.
With the patch:
...
0xffffffff92da776d <copy_process+557>: ret [framesize restored to: 624]
crash> bt -D dump
...
[ 3]: ffffffff92dadcbd 624 CF (copy_process+26493)
crash> bt
...
#9 [ffff888263157bc0] copy_process at ffffffff92dadcbd
#10 [ffff888263157e38] _do_fork at ffffffff92dae7a1
#11 [ffff888263157f28] do_syscall_64 at ffffffff92c085f4
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
x86_64.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/x86_64.c b/x86_64.c
index 469d26b..7aa9430 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -8781,7 +8781,8 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
if (CRASHDEBUG(2) || (bt->flags & BT_FRAMESIZE_DEBUG))
fprintf(fp, "%s\t[framesize: %d]\n",
strip_linefeeds(buf2), framesize);
- } else if (STRNEQ(arglist[instr], "retq")) {
+ } else if (STRNEQ(arglist[instr], "retq") ||
+ STRNEQ(arglist[instr], "ret")) {
if (!exception) {
framesize = max;
if (CRASHDEBUG(2) || (bt->flags & BT_FRAMESIZE_DEBUG))
--
2.40.1
2 days, 17 hours
Re: [PATCH v7 00/15] gdb stack unwinding support for crash utility
by lijiang
Hi, Tao
Thank you for the update.
The following patch is a regression issue, so I tend to discuss it as a
separate patch.
[PATCH v7 01/15] Fix the regression of cpumask_t for xen hyper
In addition, I found another issue in my tests(on ppc64le), the gdb bt can
display the back trace for the panic task, but when I switch to another
task, the gdb bt can not display the back trace:
crash> gdb bt
#0 0xc0000000002bde04 in crash_setup_regs (newregs=0xc00000003264b858,
oldregs=0x0) at ./arch/powerpc/include/asm/kexec.h:133
#1 0xc0000000002be4f8 in __crash_kexec (regs=0x0) at
kernel/crash_core.c:122
#2 0xc00000000016c254 in panic (fmt=0xc0000000015eef20 "sysrq triggered
crash\n") at kernel/panic.c:373
#3 0xc000000000a708b8 in sysrq_handle_crash (key=<optimized out>) at
drivers/tty/sysrq.c:154
#4 0xc000000000a713d4 in __handle_sysrq (key=key@entry=99 'c',
check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:612
#5 0xc000000000a71e94 in write_sysrq_trigger (file=<optimized out>,
buf=<optimized out>, count=2, ppos=<optimized out>) at
drivers/tty/sysrq.c:1181
#6 0xc00000000073260c in pde_write (pde=0xc00000000af9cc00,
file=<optimized out>, buf=<optimized out>, count=<optimized out>,
ppos=<optimized out>) at fs/proc/inode.c:334
#7 proc_reg_write (file=<optimized out>, buf=<optimized out>,
count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:346
#8 0xc00000000063c0e0 in vfs_write (file=0xc0000000092d2900,
buf=0x10012536f60 <error: Cannot access memory at address 0x10012536f60>,
count=2, pos=0xc00000003264bd30) at fs/read_write.c:588
#9 vfs_write (file=0xc0000000092d2900, buf=0x10012536f60 <error: Cannot
access memory at address 0x10012536f60>, count=<optimized out>,
pos=0xc00000003264bd30) at fs/read_write.c:570
#10 0xc00000000063c690 in ksys_write (fd=<optimized out>, buf=0x10012536f60
<error: Cannot access memory at address 0x10012536f60>, count=2) at
fs/read_write.c:643
#11 0xc000000000031a28 in system_call_exception (regs=0xc00000003264be80,
r0=<optimized out>) at arch/powerpc/kernel/syscall.c:153
#12 0xc00000000000d05c in system_call_vectored_common () at
arch/powerpc/kernel/interrupt_64.S:198
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
0 0 0 c000000002bda980 RU 0.0 0 0
[swapper/0]
> 0 0 1 c000000003864c80 RU 0.0 0 0
[swapper/1]
...
8017 923 0 c000000043a20000 IN 0.2 22528 16256
sshd-session
8025 8017 6 c000000032271880 IN 0.1 22784 11840
sshd-session
> 8026 8025 0 c000000043a26600 RU 0.1 9664 6208 bash
...
11645 2 3 c000000032264c80 ID 0.0 0 0
[kworker/u32:2]
11738 6188 2 c00000003811b180 IN 0.1 43520 9408 pickup
12326 2 0 c00000003226b280 ID 0.0 0 0
[kworker/0:1]
13112 6089 2 c00000000c809900 IN 0.0 7232 3456 sleep
Let's take the "pickup" task as an example:
crash> set 11738
PID: 11738
COMMAND: "pickup"
TASK: c00000003811b180 [THREAD_INFO: c00000003811b180]
CPU: 2
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 0xc0000000a7f876a0 in ?? ()
gdb: gdb request failed: bt
crash> set gdb on
gdb: on
gdb> bt
#0 0xc0000000a7f876a0 in ?? ()
gdb>
Anyway, I did the same test on x86 64 and aarch64, it can work well as
expected. Can you help to double check on ppc64 architecture?
X86 64:
crash> set 14599
PID: 14599
COMMAND: "pickup"
TASK: ffff8f57a0d7c180 [THREAD_INFO: ffff8f57a0d7c180]
CPU: 41
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 0xffffffff8b3efe29 in context_switch (rq=0xffff8f6f1f835900,
prev=0xffff8f57a0d7c180, next=0xffff8f5786720000, rf=0xffff9df22fea7b80) at
kernel/sched/core.c:5208
#1 __schedule (sched_mode=sched_mode@entry=0) at kernel/sched/core.c:6549
#2 0xffffffff8b3f0217 in __schedule_loop (sched_mode=<optimized out>) at
kernel/sched/core.c:6626
#3 schedule () at kernel/sched/core.c:6641
#4 0xffffffff8b3f6eef in schedule_hrtimeout_range_clock
(expires=expires@entry=0xffff9df22fea7cb0, delta=<optimized out>,
delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS,
clock_id=clock_id@entry=1) at kernel/time/hrtimer.c:2293
#5 0xffffffff8b3f7003 in schedule_hrtimeout_range
(expires=expires@entry=0xffff9df22fea7cb0,
delta=delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS) at
kernel/time/hrtimer.c:2340
#6 0xffffffff8aae301c in ep_poll (ep=0xffff8f5790d15d40,
events=events@entry=0x7ffea91b6b90, maxevents=maxevents@entry=100,
timeout=timeout@entry=0xffff9df22fea7d58) at fs/eventpoll.c:2062
#7 0xffffffff8aae3138 in do_epoll_wait (epfd=epfd@entry=8,
events=events@entry=0x7ffea91b6b90, maxevents=maxevents@entry=100,
to=0xffff9df22fea7d58) at fs/eventpoll.c:2464
#8 0xffffffff8aae44a1 in __do_sys_epoll_wait (epfd=<optimized out>,
events=0x7ffea91b6b90, maxevents=<optimized out>, timeout=<optimized out>)
at fs/eventpoll.c:2476
#9 __se_sys_epoll_wait (epfd=<optimized out>, events=<optimized out>,
maxevents=<optimized out>, timeout=<optimized out>) at fs/eventpoll.c:2471
#10 __x64_sys_epoll_wait (regs=<optimized out>) at fs/eventpoll.c:2471
#11 0xffffffff8b3e293d in do_syscall_x64 (regs=0xffff9df22fea7f48, nr=232)
at arch/x86/entry/common.c:52
#12 do_syscall_64 (regs=0xffff9df22fea7f48, nr=232) at
arch/x86/entry/common.c:83
#13 0xffffffff8b40012f in entry_SYSCALL_64 () at
arch/x86/entry/entry_64.S:121
crash>
aarch64:
crash> set 9338
PID: 9338
COMMAND: "pickup"
TASK: ffff0000c7b05400 [THREAD_INFO: ffff0000c7b05400]
CPU: 3
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 __switch_to (prev=<unavailable>, prev@entry=0xffff0000c7b05400,
next=next@entry=<unavailable>) at arch/arm64/kernel/process.c:555
#1 0xffffafc5b5ebd744 in context_switch (rq=0xffff00077bbd0ec0,
prev=0xffff0000c7b05400, next=<unavailable>, rf=0xffff80008ac63a60) at
kernel/sched/core.c:5208
#2 __schedule (sched_mode=sched_mode@entry=0) at kernel/sched/core.c:6549
#3 0xffffafc5b5ebdc2c in __schedule_loop (sched_mode=<optimized out>) at
kernel/sched/core.c:6626
#4 schedule () at kernel/sched/core.c:6641
#5 0xffffafc5b5ec6030 in schedule_hrtimeout_range_clock
(expires=expires@entry=0xffff80008ac63be8, delta=delta@entry=99999999,
mode=mode@entry=HRTIMER_MODE_ABS, clock_id=clock_id@entry=1) at
kernel/time/hrtimer.c:2293
#6 0xffffafc5b5ec618c in schedule_hrtimeout_range
(expires=expires@entry=0xffff80008ac63be8,
delta=delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS) at
kernel/time/hrtimer.c:2340
#7 0xffffafc5b545d33c in ep_poll (ep=<unavailable>,
events=events@entry=0xffffde5c3f68,
maxevents=maxevents@entry=100, timeout=timeout@entry=0xffff80008ac63ce0) at
fs/eventpoll.c:2062
#8 0xffffafc5b545d4e4 in do_epoll_wait (epfd=epfd@entry=8,
events=events@entry=0xffffde5c3f68, maxevents=maxevents@entry=100,
to=to@entry=0xffff80008ac63ce0) at fs/eventpoll.c:2464
#9 0xffffafc5b545d534 in do_epoll_pwait (epfd=epfd@entry=8,
events=events@entry=0xffffde5c3f68, maxevents=maxevents@entry=100,
to=to@entry=0xffff80008ac63ce0, sigsetsize=<optimized out>,
sigmask=<optimized out>) at fs/eventpoll.c:2498
#10 0xffffafc5b545e7c8 in do_epoll_pwait (epfd=8, events=0xffffde5c3f68,
maxevents=100, to=0xffff80008ac63ce0, sigmask=<optimized out>,
sigsetsize=<optimized out>) at fs/eventpoll.c:2495
#11 __do_sys_epoll_pwait (epfd=8, events=0xffffde5c3f68, maxevents=100,
timeout=<optimized out>, sigmask=<optimized out>, sigsetsize=<optimized
out>) at fs/eventpoll.c:2511
#12 __se_sys_epoll_pwait (epfd=8, events=281474412330856, maxevents=100,
timeout=<optimized out>, sigmask=<optimized out>, sigsetsize=<optimized
out>) at fs/eventpoll.c:2505
#13 __arm64_sys_epoll_pwait (regs=<optimized out>) at fs/eventpoll.c:2505
#14 0xffffafc5b4fa99bc in __invoke_syscall (regs=0xffff80008ac63eb0,
syscall_fn=<optimized out>) at arch/arm64/kernel/syscall.c:35
#15 invoke_syscall (regs=regs@entry=0xffff80008ac63eb0, scno=<optimized
out>, sc_nr=sc_nr@entry=463, syscall_table=<optimized out>) at
arch/arm64/kernel/syscall.c:49
#16 0xffffafc5b4fa9ac8 in el0_svc_common (sc_nr=463,
syscall_table=<optimized out>, regs=0xffff80008ac63eb0, scno=<optimized
out>) at arch/arm64/kernel/syscall.c:132
#17 do_el0_svc (regs=regs@entry=0xffff80008ac63eb0) at
arch/arm64/kernel/syscall.c:151
#18 0xffffafc5b5eb6fa4 in el0_svc (regs=0xffff80008ac63eb0) at
arch/arm64/kernel/entry-common.c:712
#19 0xffffafc5b5eb74c0 in el0t_64_sync_handler (regs=<optimized out>) at
arch/arm64/kernel/entry-common.c:730
#20 0xffffafc5b4f91634 in el0t_64_sync () at arch/arm64/kernel/entry.S:598
crash>
BTW: other changes are fine to me.
Thanks
Lianbo
On Wed, Sep 4, 2024 at 3:54 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Wed, 4 Sep 2024 19:49:25 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v7 00/15] gdb stack unwinding support
> for crash utility
> To: devel(a)lists.crash-utility.osci.io
> Cc: Tao Liu <ltao(a)redhat.com>
> Message-ID: <20240904074940.21331-1-ltao(a)redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> This patchset is a rebase/merged version of the following 3 patchsets:
>
> 1): [PATCH v10 0/5] Improve stack unwind on ppc64 [1]
> 2): [PATCH 0/5] x86_64 gdb stack unwinding support [2]
> 3): Clean up on top of one-thread-v2 [3]
>
> A complete description of gdb stack unwinding support for crash can be
> found in [1].
>
> This patchset can be divided into the following 3 parts:
>
> 1) part1: preparations before stack unwinding support, some
> bugs/regressions found when drafting this patchset.
> 2) part2: common part for all CPU archs, mainly dealing with
> crash_target.c/gdb_interface.c files, in order to
> support different archs.
> 3) part3: arch specific, for each ppc64/x86_64/arm64/vmware
> stack unwinding support.
>
> === part 3
> arm64: Add gdb stack unwinding support
> vmware_guestdump: Various format versions support
> x86_64: Add gdb stack unwinding support
> ppc64: correct gdb passthroughs by implementing
> machdep->get_current_task_reg
>
> === part 2
> Conditionally output gdb stack unwinding stop reasons
> Stop stack unwinding at non-kernel address
> Print task pid/command instead of CPU index
> Rename get_cpu_reg to get_current_task_reg
> Let crash change gdb context
> Leave only one gdb thread for crash
> Remove 'frame' from prohibited commands list
>
> === part 1
> Fix gdb_interface: restore gdb's output streams at end of gdb_interface
> x86_64: Fix invalid input "=>" for bt command
> Fix cpumask_t recursive dependence issue
> Fix the regression of cpumask_t for xen hyper
> ===
>
> v7 -> v6:
> 1) Reorganise the patchset, re-divided them into 3 part against the
> previous 2 parts.
> 2) Re-dealed with the cpumask_t part, which solved the comment No.4
> pointed out by lianbo in [4].
> 3) Add conditional output for the failing message of gdb stack unwinding.
> see [PATCH 11/15] Conditionally output gdb stack unwinding stop reasons
> 4) Redraft the commit messages, updated some outdated info.
> 5) Merged "Let crash change gdb context" and "set_context(): check if
> context is already current" into one.
>
> [4]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01067.html
>
> v6 -> v5:
> 1) Refactor patch 4 & 9, which changed the function signature of struct
> get_cpu_reg/get_current_task_reg, and let each patch compile with no
> error when added on.
> 2) Rebased the patchset on top of latest upstream:
> ("79b93ecb2e72ec Fix a "Bus error" issue caused by 'crash --osrelease'
> or
> crash loading")
>
> v5 -> v4:
> 1) Plenty of code refactoring based on Lianbo's comments on v4.
> 2) Removed the magic number when dealing with regs bitmap, see [6].
> 3) Rebased the patchset on top of latest upstream:
> ("1c6da3eaff8207 arm64: Fix bt command show wrong stacktrace on ramdump
> source")
>
> v4 -> v3:
> Fixed the author issue in [PATCH v3 06/16] Fix gdb_interface: restore gdb's
> output streams at end of gdb_interface.
>
> v3 -> v2:
> 1) Updated CC list as pointed out in [4]
> 2) Compiling issues as in [5]
>
> v2 -> v1:
> 1) Added the patch: x86_64: Fix invalid input "=>" for bt command,
> thanks for Kazu's testing.
> 2) Modify the patch: x86_64: Add gdb stack unwinding support, added the
> pcp_save, spp_save and sp, for restoring the value in match of the
> original
> code logic.
>
> [1]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00469.html
> [2]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00488.html
> [3]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00554.html
> [4]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00681.html
> [5]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00715.html
> [6]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00819.html
>
> Aditya Gupta (3):
> Fix gdb_interface: restore gdb's output streams at end of
> gdb_interface
> Remove 'frame' from prohibited commands list
> ppc64: correct gdb passthroughs by implementing
> machdep->get_current_task_reg
>
> Alexey Makhalov (1):
> vmware_guestdump: Various format versions support
>
> Tao Liu (11):
> Fix the regression of cpumask_t for xen hyper
> Fix cpumask_t recursive dependence issue
> x86_64: Fix invalid input "=>" for bt command
> Leave only one gdb thread for crash
> Let crash change gdb context
> Rename get_cpu_reg to get_current_task_reg
> Print task pid/command instead of CPU index
> Stop stack unwinding at non-kernel address
> Conditionally output gdb stack unwinding stop reasons
> x86_64: Add gdb stack unwinding support
> arm64: Add gdb stack unwinding support
>
> arm64.c | 120 +++++++++++++++--
> crash_target.c | 71 ++++++----
> defs.h | 194 ++++++++++++++++++++++++++-
> gdb-10.2.patch | 96 ++++++++++++++
> gdb_interface.c | 39 ++----
> kernel.c | 63 +++++++--
> ppc64.c | 174 +++++++++++++++++++++++-
> symbols.c | 15 +++
> task.c | 34 +++--
> tools.c | 16 ++-
> unwind_x86_64.h | 4 -
> vmware_guestdump.c | 321 +++++++++++++++++++++++++++++++-------------
> x86_64.c | 323 ++++++++++++++++++++++++++++++++++++++++-----
> 13 files changed, 1247 insertions(+), 223 deletions(-)
>
> --
> 2.40.1
>
5 days, 18 hours
[PATCH v2 2/2] kmem: introduce -t flag to get slab debug trace
by qiwu.chen@transsion.com
Introduce -t flag for kmem command to get slab debug trace.
Here is the user help manual:
1. Dump slab debug trace when used "-st" with an allocated slab object address:
crash> kmem -st ffff000007e79d00
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff000001c0ed00 3392 93 104 13 32k task_struct
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffc00001f9e00 ffff000007e78000 0 8 6 2
FREE / [ALLOCATED]
[ffff000007e79d00]
object ffff000007e79d00 allocated in alloc_task_struct_node+36 when=4294915270 cpu=2 pid=415
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
__do_sys_clone+152
__se_sys_clone+60
__arm64_sys_clone+88
__invoke_syscall+36
invoke_syscall+284
el0_svc_common+248
do_el0_svc+56
el0_svc+248
el0t_64_sync_handler+92
el0t_64_sync+344
object ffff000007e79d00 freed in free_task_struct+32 when=4294911569 cpu=1 pid=0
kmem_cache_free+780
free_task_struct+32
free_task+164
__put_task_struct+328
put_task_struct+44
delayed_put_task_struct+64
rcu_do_batch+972
rcu_core+592
rcu_core_si+24
__softirqentry_text_start+388
do_softirq_own_stack+12
invoke_softirq+216
__irq_exit_rcu+164
irq_exit+20
handle_domain_irq+120
gic_handle_irq+312
2. Dump slab debug trace for each allocated object belongs to this slab
when used "-st" with an slab page address:
crash> kmem -st fffffc00001f9e00
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff000001c0ed00 3392 93 104 13 32k task_struct
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffc00001f9e00 ffff000007e78000 0 8 6 2
FREE / [ALLOCATED]
[ffff000007e78000]
object ffff000007e78000 allocated in alloc_task_struct_node+36 when=4294911106 cpu=3 pid=1
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
__do_sys_clone+152
__se_sys_clone+60
__arm64_sys_clone+88
__invoke_syscall+36
invoke_syscall+284
el0_svc_common+248
do_el0_svc+56
el0_svc+248
el0t_64_sync_handler+92
el0t_64_sync+344
object ffff000007e78000 freed in free_task_struct+32 when=4294911104 cpu=1 pid=0
kmem_cache_free+780
free_task_struct+32
free_task+164
__put_task_struct+328
put_task_struct+44
delayed_put_task_struct+64
rcu_do_batch+972
rcu_core+592
rcu_core_si+24
__softirqentry_text_start+388
do_softirq_own_stack+12
invoke_softirq+216
__irq_exit_rcu+164
irq_exit+20
handle_domain_irq+120
gic_handle_irq+312
3. Dump slab debug trace for each allocated object belongs to slab cache
when used "-S -t" with a slab cache address.
crash> kmem -S -t ffff000001c0ed00
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff000001c0ed00 3392 93 104 13 32k task_struct
CPU 0 KMEM_CACHE_CPU:
ffff00003fd6b7a0
CPU 0 SLAB:
(empty)
CPU 0 PARTIAL:
(empty)
CPU 1 KMEM_CACHE_CPU:
ffff00003fd8a7a0
CPU 1 SLAB:
(empty)
CPU 1 PARTIAL:
(empty)
CPU 2 KMEM_CACHE_CPU:
ffff00003fda97a0
CPU 2 SLAB:
(empty)
CPU 2 PARTIAL:
(empty)
CPU 3 KMEM_CACHE_CPU:
ffff00003fdc87a0
CPU 3 SLAB:
(empty)
CPU 3 PARTIAL:
(empty)
KMEM_CACHE_NODE NODE SLABS PARTIAL PER-CPU
ffff000001eeb200 0 13 5 0
NODE 0 PARTIAL:
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffc00000e5e00 ffff000003978000 0 8 5 3
fffffc00000e5e00 ffff000003978000 0 8 5 3
FREE / [ALLOCATED]
[ffff000003978000]
object ffff000003978000 allocated in alloc_task_struct_node+36 when=4294914449 cpu=1 pid=1
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
__do_sys_clone+152
__se_sys_clone+60
__arm64_sys_clone+88
__invoke_syscall+36
invoke_syscall+284
el0_svc_common+248
do_el0_svc+56
el0_svc+248
el0t_64_sync_handler+92
el0t_64_sync+344
With this patch, the slab allocation/free times can be sorted by a script,
which will be helpful to inspect slab memory leak.
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
defs.h | 7 ++++
help.c | 4 ++-
memory.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 108 insertions(+), 4 deletions(-)
diff --git a/defs.h b/defs.h
index 3d729c8..a46c702 100644
--- a/defs.h
+++ b/defs.h
@@ -2283,6 +2283,12 @@ struct offset_table { /* stash of commonly-used offsets */
long page_owner_handle;
long page_owner_free_handle;
long mem_section_page_ext;
+ long track_addr;
+ long track_addrs;
+ long track_pid;
+ long track_cpu;
+ long track_when;
+ long track_handle;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2462,6 +2468,7 @@ struct size_table { /* stash of commonly-used sizes */
long page_ext;
long page_owner;
long stack_record;
+ long track;
};
struct array_table {
diff --git a/help.c b/help.c
index f8ec62f..81c70af 100644
--- a/help.c
+++ b/help.c
@@ -6816,7 +6816,7 @@ char *help_kmem[] = {
"kmem",
"kernel memory",
"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n"
-" [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
+" [[-s|-S|-S=cpu[s]|-r|-t] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
" This command displays information about the use of kernel memory.\n",
" -f displays the contents of the system free memory headers.",
" also verifies that the page count equals nr_free_pages.",
@@ -6894,6 +6894,8 @@ char *help_kmem[] = {
" address when used with -s or -S, searches the kmalloc() slab subsystem",
" for the slab containing of this virtual address, showing whether",
" it is in use or free.",
+" when added extra -t, displays the slab debug trace for the allocated",
+" object belongs to this slab",
" address when used with -f, the address can be either a page pointer,",
" a physical address, or a kernel virtual address; the free_area",
" header containing the page (if any) is displayed.",
diff --git a/memory.c b/memory.c
index 6c69b6a..3c4766b 100644
--- a/memory.c
+++ b/memory.c
@@ -865,6 +865,15 @@ vm_init(void)
"kmem_cache_node", "partial");
MEMBER_OFFSET_INIT(kmem_cache_node_full,
"kmem_cache_node", "full");
+ STRUCT_SIZE_INIT(track, "track");
+ MEMBER_OFFSET_INIT(track_addr, "track", "addr");
+ if (MEMBER_EXISTS("track", "addrs"))
+ MEMBER_OFFSET_INIT(track_addrs, "track", "addrs");
+ if (MEMBER_EXISTS("track", "handle"))
+ MEMBER_OFFSET_INIT(track_handle, "track", "handle");
+ MEMBER_OFFSET_INIT(track_when, "track", "when");
+ MEMBER_OFFSET_INIT(track_cpu, "track", "cpu");
+ MEMBER_OFFSET_INIT(track_pid, "track", "pid");
} else {
MEMBER_OFFSET_INIT(kmem_cache_s_c_nextp,
"kmem_cache_s", "c_nextp");
@@ -5047,6 +5056,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
#define SLAB_GATHER_FAILURE (ADDRESS_SPECIFIED << 26)
#define GET_SLAB_ROOT_CACHES (ADDRESS_SPECIFIED << 27)
#define GET_PAGE_OWNER (ADDRESS_SPECIFIED << 28)
+#define GET_SLAB_DEBUG_TRACE (ADDRESS_SPECIFIED << 29)
#define GET_ALL \
(GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
@@ -5309,6 +5319,8 @@ cmd_kmem(void)
meminfo.reqname = p1;
meminfo.cache = value[i];
meminfo.flags |= CACHE_SET;
+ if (tflag)
+ meminfo.flags |= GET_SLAB_DEBUG_TRACE;
if ((i+1) == spec_addr) { /* done? */
if (meminfo.calls++)
fprintf(fp, "\n");
@@ -5318,6 +5330,8 @@ cmd_kmem(void)
} else {
meminfo.spec_addr = value[i];
meminfo.flags = ADDRESS_SPECIFIED;
+ if (tflag)
+ meminfo.flags |= GET_SLAB_DEBUG_TRACE;
if (Sflag && (vt->flags & KMALLOC_SLUB))
meminfo.flags |= VERBOSE;
if (meminfo.calls++)
@@ -20015,6 +20029,85 @@ do_kmem_cache_slub(struct meminfo *si)
FREEBUF(per_cpu);
}
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline uint get_info_end(struct meminfo *si)
+{
+ uint inuse = UINT(si->cache_buf + OFFSET(kmem_cache_inuse));
+ uint offset = UINT(si->cache_buf + OFFSET(kmem_cache_offset));
+
+ if (offset >= inuse)
+ return inuse + sizeof(void *);
+ else
+ return inuse;
+}
+
+#define TRACK_ADDRS_COUNT 16
+void print_track(struct meminfo *si, char *track, ulong object, enum track_item alloc)
+{
+ ulong track_addr, addr, addrs, when, entries, nr_entries;
+ uint i, cpu, pid, handle;
+ ulonglong jiffies;
+ char buf[BUFSIZE];
+
+ track_addr = object + get_info_end(si) + alloc * STRUCT_SIZE("track");
+ if (!readmem(track_addr, KVADDR, track, SIZE(track), "track", FAULT_ON_ERROR))
+ return;
+
+ addr = ULONG(track + OFFSET(track_addr));
+ if (addr) {
+ when = ULONG(track + OFFSET(track_when));
+ cpu = UINT(track + OFFSET(track_cpu));
+ pid = UINT(track + OFFSET(track_pid));
+ get_uptime(NULL, &jiffies);
+ fprintf(fp, "object %lx %s in %s when=%lu cpu=%u pid=%d\n",
+ object, alloc ? "freed" : "allocated",
+ value_to_symstr(addr, buf, 0),
+ when, cpu, pid);
+ if (VALID_MEMBER(track_addrs)) {
+ addrs = track_addr + OFFSET(track_addrs);
+ stack_trace_print(addrs, TRACK_ADDRS_COUNT);
+ } else if (VALID_MEMBER(track_handle)) {
+ handle = UINT(track + OFFSET(track_handle));
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ } else {
+ fprintf(fp, "stack trace missing\n");
+ handle = track_addr + OFFSET(track_handle);
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ }
+ }
+}
+
+#define SLAB_STORE_USER (0x00010000UL)
+static ulong get_slab_store_user_flag(void)
+{
+ ulong slab_store_user_flag;
+
+ if (enumerator_value("_SLAB_STORE_USER", &slab_store_user_flag))
+ return (1 << slab_store_user_flag);
+ else
+ return SLAB_STORE_USER;
+}
+
+static void slab_debug_trace_show(struct meminfo *si, ulong object)
+{
+ ulong flags;
+ char *track;
+
+ flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
+ if (!(flags & get_slab_store_user_flag()))
+ return;
+
+ track = (char *)GETBUF(SIZE(track));
+ print_track(si, track, object, TRACK_ALLOC);
+ print_track(si, track, object, TRACK_FREE);
+ FREEBUF(track);
+}
+
#define DUMP_SLAB_INFO_SLUB() \
{ \
char b1[BUFSIZE], b2[BUFSIZE]; \
@@ -20070,7 +20163,8 @@ do_slab_slub(struct meminfo *si, int verbose)
if (!verbose) {
DUMP_SLAB_INFO_SLUB();
- return TRUE;
+ if (!(si->flags & GET_SLAB_DEBUG_TRACE))
+ return TRUE;
}
cpu_freelist = 0;
@@ -20173,6 +20267,8 @@ do_slab_slub(struct meminfo *si, int verbose)
if (is_free && (cpu_slab >= 0))
fprintf(fp, "(cpu %d cache)", cpu_slab);
fprintf(fp, "\n");
+ if (!is_free && (si->flags & GET_SLAB_DEBUG_TRACE))
+ slab_debug_trace_show(si, p + red_left_pad);
}
return TRUE;
@@ -20283,11 +20379,10 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
}
-#define SLAB_STORE_USER (0x00010000UL)
flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
if (INVALID_MEMBER(kmem_cache_node_full) ||
- !(flags & SLAB_STORE_USER)) {
+ !(flags & get_slab_store_user_flag())) {
fprintf(fp, "NODE %d FULL:\n (not tracked)\n", node);
return;
}
--
2.25.1
6 days, 21 hours
[PATCH v2 1/2] kmem: introduce -t flag to get page owner
by qiwu.chen@transsion.com
Introduce -t flag for kmem command to get page owner.
Here is the user help manual:
1. Dump page_owner allocated stack trace for each allocated page in
buddy system when used with "kmem -pt":
crash> kmem -pt
Page allocated via order 0, mask 0x1112c4a, pid 1, ts 16155269152 ns
PFN 0x40000, type Movable, Flags 0xffff00000020836
set_page_owner+84
post_alloc_hook+308
prep_new_page+48
get_page_from_freelist+736
__alloc_pages+348
alloc_pages+280
__page_cache_alloc+120
page_cache_ra_unbounded+272
do_page_cache_ra+172
do_sync_mmap_readahead+492
filemap_fault+340
__do_fault+64
__handle_mm_fault+528
handle_mm_fault+208
__do_page_fault+232
do_page_fault+1264
......
2. Dump page_owner allocated/freed trace for an allocated page when used
"kmem -pt" with a page address.
crash> kmem -pt fffffc00001f9e40
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
fffffc00001f9e40 47e79000 dead000000000008 0 0 ffff00000000000
page_owner tracks the page 0xfffffc00001f9e40 as allocated
Page allocated via order 3, mask 0xd20c0, pid 163, ts 39197221904 ns
PFN 0x47e79, type Unmovable, Flags 0xffff00000000000
set_page_owner+84
post_alloc_hook+308
prep_new_page+48
get_page_from_freelist+736
__alloc_pages+348
alloc_pages+280
alloc_slab_page+60
allocate_slab+212
new_slab+200
___slab_alloc+1432
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
page last free ts 38730338480 ns, stack trace:
set_page_owner+84
post_alloc_hook+308
prep_new_page+48
get_page_from_freelist+736
__alloc_pages+348
alloc_pages+280
alloc_slab_page+60
allocate_slab+212
new_slab+200
___slab_alloc+1432
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
With this patch, the page allocation times can be sorted by page_owner_sort
tool easily.
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
defs.h | 43 ++++++
help.c | 4 +-
memory.c | 434 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 461 insertions(+), 20 deletions(-)
diff --git a/defs.h b/defs.h
index 2231cb6..3d729c8 100644
--- a/defs.h
+++ b/defs.h
@@ -206,6 +206,34 @@ static inline int string_exists(char *s) { return (s ? TRUE : FALSE); }
#undef roundup
#endif
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define rounddown(x, y) ((x) & ~__round_mask(x, y))
+#define IS_ALIGNED(x, y) (((x) & ((typeof(x))(y) - 1)) == 0)
+
+/* stack depot macros before kernel commit 8151c7a35d8bd */
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_NULL_PROTECTION_BITS 1
+#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGESHIFT() - STACK_ALLOC_ALIGN)
+#define DEPOT_STACK_BITS (sizeof(uint) * 8)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
+ STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
+
+/* stack depot macros since kernel commit 8151c7a35d8bd */
+#define STACK_DEPOT_EXTRA_BITS 5
+#define DEPOT_HANDLE_BITS (sizeof(uint) * 8)
+#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
+#define DEPOT_POOL_SIZE (1LL << (PAGESHIFT() + DEPOT_POOL_ORDER))
+#define DEPOT_STACK_ALIGN 4
+#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGESHIFT() - DEPOT_STACK_ALIGN)
+#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
+ STACK_DEPOT_EXTRA_BITS)
+
+/* GFP flags */
+#define __GFP_RECLAIMABLE 0x10u
+#define __GFP_MOVABLE 0x08u
+#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+#define GFP_MOVABLE_SHIFT 3
typedef uint64_t physaddr_t;
@@ -2243,6 +2271,18 @@ struct offset_table { /* stash of commonly-used offsets */
long vmap_node_busy;
long rb_list_head;
long file_f_inode;
+ long pglist_data_node_page_ext;
+ long stack_record_size;
+ long stack_record_entries;
+ long stack_record_count;
+ long page_owner_order;
+ long page_owner_gfp_mask;
+ long page_owner_ts_nsec;
+ long page_owner_free_ts_nsec;
+ long page_owner_pid;
+ long page_owner_handle;
+ long page_owner_free_handle;
+ long mem_section_page_ext;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2419,6 +2459,9 @@ struct size_table { /* stash of commonly-used sizes */
long module_memory;
long fred_frame;
long vmap_node;
+ long page_ext;
+ long page_owner;
+ long stack_record;
};
struct array_table {
diff --git a/help.c b/help.c
index e95ac1d..f8ec62f 100644
--- a/help.c
+++ b/help.c
@@ -6815,7 +6815,7 @@ NULL
char *help_kmem[] = {
"kmem",
"kernel memory",
-"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n"
+"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n"
" [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
" This command displays information about the use of kernel memory.\n",
" -f displays the contents of the system free memory headers.",
@@ -6845,6 +6845,7 @@ char *help_kmem[] = {
" mem_map[] array, made up of the page struct address, its associated",
" physical address, the page.mapping, page.index, page._count and",
" page.flags fields.",
+" -t displays page_owner allocated stack trace for each allocated page.",
" -m member similar to -p, but displays page structure contents specified by",
" a comma-separated list of one or more struct page members. The",
" \"flags\" member will always be expressed in hexadecimal format, and",
@@ -6899,6 +6900,7 @@ char *help_kmem[] = {
" address when used with -p, the address can be either a page pointer, a",
" physical address, or a kernel virtual address; its basic mem_map",
" page information is displayed.",
+" when added extra -t, display the page_owner traces for this page",
" address when used with -m, the address can be either a page pointer, a",
" physical address, or a kernel virtual address; the specified",
" members of the associated page struct are displayed.",
diff --git a/memory.c b/memory.c
index 967a9cf..6c69b6a 100644
--- a/memory.c
+++ b/memory.c
@@ -323,6 +323,11 @@ static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
static ulong freelist_ptr(struct meminfo *, ulong, ulong);
static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
+static void page_owner_init(void);
+static int page_owner_enabled(void);
+static void stack_depot_init(void);
+static void dump_page_owner(struct meminfo *, ulong, physaddr_t);
+enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_ALL };
/*
* Memory display modes specific to this file.
@@ -983,6 +988,8 @@ vm_init(void)
vt->flags |= DISCONTIGMEM;
sparse_mem_init();
+ page_owner_init();
+ stack_depot_init();
vt->vmalloc_start = machdep->vmalloc_start();
if (IS_VMALLOC_ADDR(vt->mem_map))
@@ -1099,6 +1106,8 @@ vm_init(void)
MEMBER_OFFSET_INIT(pglist_data_bdata, "pglist_data", "bdata");
MEMBER_OFFSET_INIT(pglist_data_nr_zones, "pglist_data",
"nr_zones");
+ MEMBER_OFFSET_INIT(pglist_data_node_page_ext, "pglist_data",
+ "node_page_ext");
MEMBER_OFFSET_INIT(pglist_data_node_start_pfn, "pglist_data",
"node_start_pfn");
MEMBER_OFFSET_INIT(pglist_data_pgdat_next, "pglist_data",
@@ -5037,6 +5046,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
#define SLAB_BITFIELD (ADDRESS_SPECIFIED << 25)
#define SLAB_GATHER_FAILURE (ADDRESS_SPECIFIED << 26)
#define GET_SLAB_ROOT_CACHES (ADDRESS_SPECIFIED << 27)
+#define GET_PAGE_OWNER (ADDRESS_SPECIFIED << 28)
#define GET_ALL \
(GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
@@ -5048,7 +5058,7 @@ cmd_kmem(void)
int c;
int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag;
int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag;
- int rflag;
+ int rflag, tflag;
struct meminfo meminfo;
ulonglong value[MAXARGS];
char buf[BUFSIZE];
@@ -5061,13 +5071,13 @@ cmd_kmem(void)
spec_addr = choose_cpu = 0;
sflag = Sflag = pflag = fflag = Fflag = Pflag = zflag = oflag = 0;
vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0;
- gflag = hflag = rflag = 0;
+ gflag = hflag = rflag = tflag = 0;
escape = FALSE;
BZERO(&meminfo, sizeof(struct meminfo));
BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
pc->curcmd_flags &= ~HEADER_PRINTED;
- while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh")) != EOF) {
+ while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoht")) != EOF) {
switch(c)
{
case 'V':
@@ -5204,6 +5214,10 @@ cmd_kmem(void)
gflag = 1;
break;
+ case 't':
+ tflag = 1;
+ break;
+
default:
argerrs++;
break;
@@ -5213,7 +5227,7 @@ cmd_kmem(void)
if (argerrs)
cmd_usage(pc->curcmd, SYNOPSIS);
- if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag +
+ if ((fflag + Fflag + Vflag + oflag +
vflag + Cflag + cflag + iflag + lflag + Lflag + gflag +
hflag + rflag) > 1) {
error(INFO, "only one flag allowed!\n");
@@ -5264,10 +5278,13 @@ cmd_kmem(void)
if (pflag) {
meminfo.spec_addr = value[i];
meminfo.flags = ADDRESS_SPECIFIED;
+ if (tflag)
+ meminfo.flags |= GET_PAGE_OWNER;
dump_mem_map(&meminfo);
pflag++;
}
+
if (sflag || Sflag) {
if (vt->flags & KMEM_CACHE_UNAVAIL)
error(FATAL,
@@ -5346,25 +5363,25 @@ cmd_kmem(void)
gflag++;
}
- /*
- * no value arguments allowed!
- */
- if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
+ /*
+ * no value arguments allowed!
+ */
+ if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
Vflag || oflag || hflag || rflag) {
error(INFO,
"no address arguments allowed with this option\n");
cmd_usage(pc->curcmd, SYNOPSIS);
}
- if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
- lflag + Lflag + gflag)) {
+ if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
+ lflag + Lflag + gflag + tflag)) {
meminfo.spec_addr = value[i];
- meminfo.flags = ADDRESS_SPECIFIED;
- if (meminfo.calls++)
- fprintf(fp, "\n");
+ meminfo.flags = ADDRESS_SPECIFIED;
+ if (meminfo.calls++)
+ fprintf(fp, "\n");
else
kmem_cache_init();
- kmem_search(&meminfo);
+ kmem_search(&meminfo);
}
}
@@ -5372,8 +5389,11 @@ cmd_kmem(void)
if (iflag == 1)
dump_kmeminfo();
- if (pflag == 1)
+ if (pflag == 1) {
+ if (tflag)
+ meminfo.flags = GET_PAGE_OWNER;
dump_mem_map(&meminfo);
+ }
if (fflag == 1)
vt->dump_free_pages(&meminfo);
@@ -5457,7 +5477,7 @@ cmd_kmem(void)
if (!(sflag + Sflag + pflag + fflag + Fflag + vflag +
Vflag + zflag + oflag + cflag + Cflag + iflag +
nflag + lflag + Lflag + gflag + hflag + rflag +
- meminfo.calls))
+ tflag + meminfo.calls))
cmd_usage(pc->curcmd, SYNOPSIS);
}
@@ -5749,7 +5769,8 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
switch (mi->flags)
{
- case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
switch (mi->memtype)
{
case KVADDR:
@@ -5774,6 +5795,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
print_hdr = TRUE;
break;
+ case GET_PAGE_OWNER:
+ print_hdr = FALSE;
+ break;
+
case GET_ALL:
shared = 0;
reserved = 0;
@@ -5959,6 +5984,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
shared++;
}
continue;
+
+ case GET_PAGE_OWNER:
+ dump_page_owner(mi, pp, phys);
+ continue;
}
page_mapping = VALID_MEMBER(page_mapping);
@@ -6083,6 +6112,7 @@ display_members:
if (done)
break;
+
}
if (done)
@@ -6119,7 +6149,10 @@ display_members:
break;
case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
mi->retval = done;
+ if (mi->flags & GET_PAGE_OWNER)
+ dump_page_owner(mi, pp, phys);
break;
}
@@ -6129,6 +6162,345 @@ display_members:
FREEBUF(page_cache);
}
+static int stack_depot_enabled(void)
+{
+ struct syment *sp;
+ int disable = TRUE;
+
+ if ((sp = symbol_search("stack_depot_disable")))
+ readmem(sp->value, KVADDR, &disable, sizeof(int),
+ "stack_depot_disable", RETURN_ON_ERROR);
+ else if ((sp = symbol_search("stack_depot_disabled")))
+ readmem(sp->value, KVADDR, &disable, sizeof(int),
+ "stack_depot_disabled", RETURN_ON_ERROR);
+ else if ((sp = symbol_search("stack_slabs")))
+ return sp->value ? FALSE : TRUE;
+
+ return !disable;
+}
+
+static void stack_depot_init(void)
+{
+ if (stack_depot_enabled()) {
+ STRUCT_SIZE_INIT(stack_record, "stack_record");
+ MEMBER_OFFSET_INIT(stack_record_size, "stack_record", "size");
+ MEMBER_OFFSET_INIT(stack_record_entries, "stack_record", "entries");
+ if (MEMBER_EXISTS("stack_record", "count"))
+ MEMBER_OFFSET_INIT(stack_record_count, "stack_record", "count");
+ }
+}
+
+/* Fetch stack entries from a depot. */
+static unsigned int stack_depot_fetch(uint handle, ulong *entries)
+{
+ struct syment *sp;
+ uint valid, offset, slabindex, poolindex, pools_num, stack_record_count;
+ uint stack_size = 0;
+ ulong stack_record_addr, sym_value;
+
+ if (!handle)
+ return 0;
+
+ if ((sp = symbol_search("stack_slabs"))) {
+ valid = (handle >> (STACK_ALLOC_INDEX_BITS + STACK_ALLOC_OFFSET_BITS))
+ & STACK_ALLOC_NULL_PROTECTION_BITS;
+ if (!valid)
+ return 0;
+
+ slabindex = handle & ((1 << STACK_ALLOC_INDEX_BITS) - 1);
+ readmem(sp->value + slabindex * sizeof(void *), KVADDR, &stack_record_addr,
+ sizeof(void *), "stack_record_addr", FAULT_ON_ERROR);
+
+ offset = (handle >> STACK_ALLOC_INDEX_BITS) &
+ ((1 << STACK_ALLOC_OFFSET_BITS) - 1);
+ stack_record_addr += (offset << STACK_ALLOC_ALIGN);
+ *entries = stack_record_addr + OFFSET(stack_record_entries);
+ readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size,
+ sizeof(stack_size), "stack_record_entries", FAULT_ON_ERROR);
+ } else if ((sp = symbol_search("stack_pools")) &&
+ (sym_value = symbol_value("pools_num"))) {
+ poolindex = handle & ((1 << DEPOT_POOL_INDEX_BITS) - 1);
+ readmem(sym_value, KVADDR, &pools_num, sizeof(int),
+ "pools_num", RETURN_ON_ERROR);
+ if (poolindex >= pools_num) {
+ error(INFO, "pool index %d out of bounds (%d) for stack id %08x\n",
+ poolindex, pools_num, handle);
+ return 0;
+ }
+
+ readmem(sp->value + (poolindex-1) * sizeof(void *), KVADDR, &stack_record_addr,
+ sizeof(void *), "stack_record_addr", FAULT_ON_ERROR);
+ if (!stack_record_addr)
+ return 0;
+
+ offset = (handle >> DEPOT_POOL_INDEX_BITS) & ((1 << DEPOT_OFFSET_BITS) - 1);
+ stack_record_addr += (offset << DEPOT_STACK_ALIGN);
+ readmem(stack_record_addr + OFFSET(stack_record_count), KVADDR, &stack_record_count,
+ sizeof(stack_record_count), "stack_record_count", FAULT_ON_ERROR);
+ if (!stack_record_count)
+ return 0;
+
+ *entries = stack_record_addr + OFFSET(stack_record_entries);
+ readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size,
+ sizeof(stack_size), "stack_record_entries", FAULT_ON_ERROR);
+ }
+
+ return stack_size;
+}
+
+static void stack_trace_print(ulong entries, uint nr_entries)
+{
+ int i;
+ struct syment *sp;
+ ulong value, offset;
+ char buf[BUFSIZE];
+
+ if (!nr_entries)
+ return;
+
+ for (i = 0; i < nr_entries; i++) {
+ if (!readmem(entries, KVADDR, &value, sizeof(value),
+ "stack_trace", FAULT_ON_ERROR))
+ break;
+
+ entries += sizeof(ulong);
+ sp = value_search(value, &offset);
+ if (!sp)
+ break;
+
+ fprintf(fp, "%s\n", value_to_symstr(sp->value+offset, buf, 0));
+ }
+ fprintf(fp, "\n");
+}
+
+static ulong gfp_migratetype(ulong gfp_flags)
+{
+ struct syment *sp;
+ int page_group_by_mobility_disabled;
+
+ if ((sp = symbol_search("page_group_by_mobility_disabled"))) {
+ readmem(sp->value, KVADDR, &page_group_by_mobility_disabled, sizeof(int),
+ "page_group_by_mobility_disabled", RETURN_ON_ERROR);
+ if (page_group_by_mobility_disabled) {
+ ulong migrate_unmovable;
+
+ enumerator_value("MIGRATE_UNMOVABLE", &migrate_unmovable);
+ return migrate_unmovable;
+ }
+ }
+
+ return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
+}
+
+static void migratetype_name(ulong migratetype, char *buf)
+{
+ struct syment *sp;
+ ulong migratetype_name_addr;
+
+ sp = symbol_search("migratetype_names");
+ if (!sp)
+ return;
+
+ readmem(sp->value + migratetype * sizeof(ulong), KVADDR, &migratetype_name_addr,
+ sizeof(ulong), "migratetype_name", RETURN_ON_ERROR);
+ read_string(migratetype_name_addr, buf, BUFSIZE-1);
+}
+
+static void print_page_owner(ulong pfn, ulong page, char *page_owner, enum track_item alloc)
+{
+ int i, pid;
+ ushort order;
+ uint handle, free_handle, gfp_mask, nr_entries;
+ u64 ts_nsec, free_ts_nsec;
+ ulong entries, offset, page_flags;
+ struct syment *sp;
+ char buf[BUFSIZE];
+
+ order = USHORT(page_owner + OFFSET(page_owner_order));
+ gfp_mask = UINT(page_owner + OFFSET(page_owner_gfp_mask));
+ handle = UINT(page_owner + OFFSET(page_owner_handle));
+ free_handle = UINT(page_owner + OFFSET(page_owner_free_handle));
+ ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_ts_nsec));
+ free_ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_free_ts_nsec));
+ pid = INT(page_owner + OFFSET(page_owner_pid));
+
+ if (handle && (alloc != TRACK_FREE)) {
+ fprintf(fp, "Page allocated via order %u, mask %#x, pid %d, ts %llu ns\n",
+ order, gfp_mask, pid, ts_nsec);
+ migratetype_name(gfp_migratetype(gfp_mask), buf);
+ readmem(page+OFFSET(page_flags), KVADDR, &page_flags, sizeof(ulong),
+ "page.flags", FAULT_ON_ERROR);
+ fprintf(fp, "PFN %#lx, type %s, Flags %#lx\n", pfn, buf, page_flags);
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ }
+
+ if (alloc != TRACK_ALLOC &&
+ (free_handle = UINT(page_owner + OFFSET(page_owner_free_handle)))) {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ fprintf(fp, "page last free ts %llu ns, stack trace:\n", free_ts_nsec);
+ stack_trace_print(entries, nr_entries);
+ }
+}
+
+/* Get the max order for zoned buddy allocator */
+static inline ulong get_max_order(void)
+{
+ char *string;
+
+ if ((kt->ikconfig_flags & IKCONFIG_AVAIL) &&
+ get_kernel_config("CONFIG_FORCE_MAX_ZONEORDER", &string) == IKCONFIG_STR)
+ return atol(string);
+
+ return 11;
+}
+
+#define MAX_ORDER get_max_order()
+#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+
+static int lookup_page_ext(ulong pfn, ulong pp, ulong *page_ext)
+{
+ int node;
+ ulong page_ext_size, section, section_nr, pgdat;
+ ulong node_page_ext, node_start_pfn, page_ext_idx;
+
+ if (!kernel_symbol_exists("page_ext_size"))
+ return FALSE;
+
+ readmem(symbol_value("page_ext_size"), KVADDR, &page_ext_size,
+ sizeof(page_ext_size), "page_ext_size", FAULT_ON_ERROR);
+
+ if (IS_SPARSEMEM()) {
+ section_nr = pfn_to_section_nr(pfn);
+ if (!(section = valid_section_nr(section_nr)))
+ return FALSE;
+
+ readmem(section + OFFSET(mem_section_page_ext), KVADDR, &node_page_ext,
+ sizeof(ulong), "mem_section page_ext", FAULT_ON_ERROR);
+ if (!node_page_ext)
+ return FALSE;
+
+ *page_ext = node_page_ext + pfn * page_ext_size;
+ return TRUE;
+ }
+
+ if ((node = page_to_nid(pp) >= 0)) {
+ pgdat = vt->node_table[node].pgdat;
+ if (!VALID_MEMBER(pglist_data_node_page_ext) ||
+ !VALID_MEMBER(pglist_data_node_start_pfn))
+ return FALSE;
+
+ readmem(pgdat + OFFSET(pglist_data_node_page_ext), KVADDR, &node_page_ext,
+ sizeof(ulong), "pglist node_page_ext", FAULT_ON_ERROR);
+ if (!node_page_ext)
+ return FALSE;
+
+ readmem(pgdat + OFFSET(pglist_data_node_start_pfn), KVADDR, &node_start_pfn,
+ sizeof(ulong), "pglist node_start_pfn", FAULT_ON_ERROR);
+ if (!node_start_pfn)
+ return FALSE;
+
+ page_ext_idx = pfn - rounddown(node_start_pfn, MAX_ORDER_NR_PAGES);
+ *page_ext = node_page_ext + pfn * page_ext_size;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static ulong get_page_owner(ulong page_ext)
+{
+ struct syment *sp;
+ ulong page_owner_ops_offset;
+
+ sp = symbol_search("page_owner_ops");
+ if (!sp)
+ return FALSE;
+
+ readmem(sp->value, KVADDR, &page_owner_ops_offset, sizeof(ulong),
+ "page_owner_ops_offset", RETURN_ON_ERROR);
+
+ return page_ext + page_owner_ops_offset;
+}
+
+static int page_owner_enabled(void)
+{
+ struct syment *sp;
+ int enabled;
+
+ if ((sp = symbol_search("page_owner_enabled")) &&
+ readmem(sp->value, KVADDR, &enabled, sizeof(int),
+ "page_owner_enabled", RETURN_ON_ERROR))
+ return enabled;
+
+ if ((sp = symbol_search("page_owner_inited")) &&
+ readmem(sp->value, KVADDR, &enabled, sizeof(int),
+ "page_owner_inited", RETURN_ON_ERROR))
+ return enabled;
+
+ return FALSE;
+}
+
+static void page_owner_init(void)
+{
+ if (page_owner_enabled()) {
+ STRUCT_SIZE_INIT(page_ext, "page_ext");
+ STRUCT_SIZE_INIT(page_owner, "page_owner");
+ MEMBER_OFFSET_INIT(mem_section_page_ext, "mem_section", "page_ext");
+ MEMBER_OFFSET_INIT(page_owner_handle, "page_owner", "handle");
+ MEMBER_OFFSET_INIT(page_owner_free_handle, "page_owner", "free_handle");
+ MEMBER_OFFSET_INIT(page_owner_order, "page_owner", "order");
+ MEMBER_OFFSET_INIT(page_owner_gfp_mask, "page_owner", "gfp_mask");
+ MEMBER_OFFSET_INIT(page_owner_ts_nsec, "page_owner", "ts_nsec");
+ MEMBER_OFFSET_INIT(page_owner_free_ts_nsec, "page_owner", "free_ts_nsec");
+ MEMBER_OFFSET_INIT(page_owner_pid, "page_owner", "pid");
+ }
+}
+
+static void dump_page_owner(struct meminfo *mi, ulong pp, physaddr_t phys)
+{
+ ulong pfn, page_ext_addr, page_owner_addr, page_ext;
+ long page_ext_owner, page_ext_owner_allocated;
+ char *page_owner;
+
+ pfn = BTOP(phys);
+ if (!lookup_page_ext(pfn, pp, &page_ext_addr))
+ return;
+
+ page_owner_addr = get_page_owner(page_ext_addr);
+ if (!page_owner_addr)
+ return;
+
+ page_owner = (char *)GETBUF(SIZE(page_owner));
+ readmem(page_owner_addr, KVADDR, page_owner, SIZE(page_owner),
+ "page_owner", FAULT_ON_ERROR);
+
+ enumerator_value("PAGE_EXT_OWNER", &page_ext_owner);
+ readmem(page_ext_addr, KVADDR, &page_ext, sizeof(ulong),
+ "page_ext", FAULT_ON_ERROR);
+ if (!(page_ext & (1 << page_ext_owner)))
+ goto exit;
+
+ enumerator_value("PAGE_EXT_OWNER_ALLOCATED", &page_ext_owner_allocated);
+ if (mi->flags == GET_PAGE_OWNER) {
+ if (!(page_ext & (1 << page_ext_owner_allocated)) ||
+ !IS_ALIGNED(pfn, 1 << USHORT(page_owner + OFFSET(page_owner_order))))
+ goto exit;
+
+ /* dump allocated page owner for current memory usage */
+ print_page_owner(pfn, pp, page_owner, TRACK_ALLOC);
+ } else {
+ if (page_ext & (1 << page_ext_owner_allocated))
+ fprintf(fp, "page_owner tracks the page 0x%lx as allocated\n", pp);
+ else
+ fprintf(fp, "page_owner tracks the page 0x%lx as freed\n", pp);
+ print_page_owner(pfn, pp, page_owner, TRACK_ALL);
+ }
+
+exit:
+ FREEBUF(page_owner);
+}
+
static void
dump_mem_map(struct meminfo *mi)
{
@@ -6161,6 +6533,19 @@ dump_mem_map(struct meminfo *mi)
char style3[100];
char style4[100];
+ if (mi->flags & GET_PAGE_OWNER) {
+ if (!page_owner_enabled()) {
+ error(INFO, "page_owner is disabled\n");
+ return;
+ }
+
+ /* TODO: support page owner for early kernels without stack depot */
+ if (!stack_depot_enabled()) {
+ error(INFO, "stack_depot is disabled\n");
+ return;
+ }
+ }
+
if (IS_SPARSEMEM()) {
dump_mem_map_SPARSEMEM(mi);
return;
@@ -6238,7 +6623,8 @@ dump_mem_map(struct meminfo *mi)
switch (mi->flags)
{
- case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
switch (mi->memtype)
{
case KVADDR:
@@ -6263,6 +6649,10 @@ dump_mem_map(struct meminfo *mi)
print_hdr = TRUE;
break;
+ case GET_PAGE_OWNER:
+ print_hdr = FALSE;
+ break;
+
case GET_ALL:
shared = 0;
reserved = 0;
@@ -6376,6 +6766,10 @@ dump_mem_map(struct meminfo *mi)
/* FALLTHROUGH */
+ case GET_PAGE_OWNER:
+ dump_page_owner(mi, pp, phys);
+ break;
+
case GET_SLAB_PAGES:
if (v22) {
if ((flags >> v22_PG_Slab) & 1)
@@ -6570,7 +6964,10 @@ display_members:
break;
case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
mi->retval = done;
+ if (mi->flags & GET_PAGE_OWNER)
+ dump_page_owner(mi, pp, phys);
break;
}
@@ -19776,7 +20173,6 @@ do_slab_slub(struct meminfo *si, int verbose)
if (is_free && (cpu_slab >= 0))
fprintf(fp, "(cpu %d cache)", cpu_slab);
fprintf(fp, "\n");
-
}
return TRUE;
--
2.25.1
6 days, 21 hours
Revert "arm64: section_size_bits compatible with macro definitions"
by Guanyou Chen
Hi tao
crash load 5.12 before vmcore, parse section_size_bits equals 27,
but it should be equals 30. because defined "CONFIG_ARM64_4K_PAGES",
The commit "568c6f04" for compat android gki, so we should cleanup.
Exp:
Before:
crash vmcore vmlinux -d1
...
xtime timespec.tv_sec: 603549d0: Wed Feb 24 02:30:40 CST 2021
utsname:
sysname: Linux
nodename: localhost
release: 4.14.180-perf-g4483caa8ae80-dirty
version: #1 SMP PREEMPT Wed Feb 24 03:16:01 CST 2021
machine: aarch64
domainname: localdomain
...
SECTION_SIZE_BITS: 27
...
After:
crash vmcore vmlinux -d1
...
xtime timespec.tv_sec: 603549d0: Wed Feb 24 02:30:40 CST 2021
utsname:
sysname: Linux
nodename: localhost
release: 4.14.180-perf-g4483caa8ae80-dirty
version: #1 SMP PREEMPT Wed Feb 24 03:16:01 CST 2021
machine: aarch64
domainname: localdomain
...
SECTION_SIZE_BITS: 30
...
diff --git a/arm64.c b/arm64.c
index 06e7451..05ffdb3 100644
--- a/arm64.c
+++ b/arm64.c
@@ -1698,14 +1698,7 @@ arm64_get_section_size_bits(void)
if ((ret = get_kernel_config("CONFIG_MEMORY_HOTPLUG",
NULL)) == IKCONFIG_Y) {
if ((ret =
get_kernel_config("CONFIG_HOTPLUG_SIZE_BITS", &string)) == IKCONFIG_STR)
machdep->section_size_bits = atol(string);
- }
-
- /* arm64: reduce section size for sparsemem */
- if ((ret = get_kernel_config("CONFIG_ARM64_4K_PAGES",
NULL)) == IKCONFIG_Y
- || (ret =
get_kernel_config("CONFIG_ARM64_16K_PAGES", NULL)) == IKCONFIG_Y)
- machdep->section_size_bits =
_SECTION_SIZE_BITS_5_12;
- else if ((ret = get_kernel_config("CONFIG_ARM64_64K_PAGES",
NULL)) == IKCONFIG_Y)
- machdep->section_size_bits =
_SECTION_SIZE_BITS_5_12_64K;
+ }
}
if (CRASHDEBUG(1))
Thanks,
Guanyou.
1 week