[PATCH] vmcoreinfo: read vmcoreinfo using 'vmcoreinfo_data' when unavailable in elf note
by Aditya Gupta
Few vmcores don't have vmcoreinfo elf note, such as those created using
virsh-dump.
On architectures such as PowerPC64, vmcoreinfo is mandatory to fetch the
first_vmalloc_address, for vmcores of upstream linux, since crash-utility commit:
commit 5b24e363a898 ("get vmalloc start address from vmcoreinfo")
Try reading from the 'vmcoreinfo_data' symbol instead, if the vmcoreinfo
crash tries to read in case of diskdump/netdump is empty/missing.
The approach to read 'vmcoreinfo_data' was used for a live kernel, which can be
reused in the case of missing vmcoreinfo note also, as the
'vmcoreinfo_data' symbol is available with vmcore too
Hence rename 'vmcoreinfo_read_string' in kernel.c to
'vmcoreinfo_read_from_memory', and use it in netdump.c and diskdump.c
too.
Reported-by: Anushree Mathur <anushree.mathur(a)linux.ibm.com>
Tested-by: Anushree Mathur <anushree.mathur(a)linux.ibm.com>
Signed-off-by: Aditya Gupta <adityag(a)linux.ibm.com>
---
defs.h | 1 +
diskdump.c | 18 ++++++++++++++++++
kernel.c | 9 ++++-----
netdump.c | 19 +++++++++++++++++++
4 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/defs.h b/defs.h
index 2231cb68b804..910264e12314 100644
--- a/defs.h
+++ b/defs.h
@@ -6166,6 +6166,7 @@ void dump_kernel_table(int);
void dump_bt_info(struct bt_info *, char *where);
void dump_log(int);
void parse_kernel_version(char *);
+char *vmcoreinfo_read_from_memory(const char *);
#define LOG_LEVEL(v) ((v) & 0x07)
#define SHOW_LOG_LEVEL (0x1)
diff --git a/diskdump.c b/diskdump.c
index ce3cbb7b12dd..30d0c87f84c1 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -1041,6 +1041,13 @@ pfn_to_pos(ulong pfn)
return desc_pos;
}
+/**
+ * Check if vmcoreinfo in vmcore is missing/empty
+ */
+static bool is_vmcoreinfo_empty(void)
+{
+ return (dd->sub_header_kdump->size_vmcoreinfo == 0);
+}
/*
* Determine whether a file is a diskdump creation, and if TRUE,
@@ -1088,6 +1095,17 @@ is_diskdump(char *file)
pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ /*
+ * vmcoreinfo can be empty in case of dump collected via virsh-dump
+ *
+ * check if vmcoreinfo is not available in vmcore, and try to read
+ * thev vmcore from memory, using "vmcoreinfo_data" symbol
+ */
+ if (is_vmcoreinfo_empty()) {
+ error(WARNING, "vmcoreinfo is empty, will read from symbols\n");
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
+ }
+
if ((pc->flags2 & GET_LOG) && KDUMP_CMPRS_VALID()) {
pc->dfd = dd->dfd;
pc->readmem = read_diskdump;
diff --git a/kernel.c b/kernel.c
index adb19ad8725d..7d26a5c5a0a1 100644
--- a/kernel.c
+++ b/kernel.c
@@ -99,7 +99,6 @@ static ulong dump_audit_skb_queue(ulong);
static ulong __dump_audit(char *);
static void dump_audit(void);
static void dump_printk_safe_seq_buf(int);
-static char *vmcoreinfo_read_string(const char *);
static void check_vmcoreinfo(void);
static int is_pvops_xen(void);
static int get_linux_banner_from_vmlinux(char *, size_t);
@@ -11852,8 +11851,8 @@ dump_printk_safe_seq_buf(int msg_flags)
* Returns a string (that has to be freed by the caller) that contains the
* value for key or NULL if the key has not been found.
*/
-static char *
-vmcoreinfo_read_string(const char *key)
+char *
+vmcoreinfo_read_from_memory(const char *key)
{
char *buf, *value_string, *p1, *p2;
size_t value_length;
@@ -11918,10 +11917,10 @@ check_vmcoreinfo(void)
switch (get_symbol_type("vmcoreinfo_data", NULL, NULL))
{
case TYPE_CODE_PTR:
- pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
break;
case TYPE_CODE_ARRAY:
- pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
break;
}
}
diff --git a/netdump.c b/netdump.c
index b4e2a5cb2037..c69c7a1e80db 100644
--- a/netdump.c
+++ b/netdump.c
@@ -111,6 +111,14 @@ map_cpus_to_prstatus(void)
FREEBUF(nt_ptr);
}
+/**
+ * Check if vmcoreinfo in vmcore is missing/empty
+ */
+static bool is_vmcoreinfo_empty(void)
+{
+ return (nd->size_vmcoreinfo == 0);
+}
+
/*
* Determine whether a file is a netdump/diskdump/kdump creation,
* and if TRUE, initialize the vmcore_data structure.
@@ -464,6 +472,17 @@ is_netdump(char *file, ulong source_query)
pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ /*
+ * vmcoreinfo can be empty in case of dump collected via virsh-dump
+ *
+ * check if vmcoreinfo is not available in vmcore, and try to read
+ * thev vmcore from memory, using "vmcoreinfo_data" symbol
+ */
+ if (is_vmcoreinfo_empty()) {
+ error(WARNING, "vmcoreinfo is empty, will read from symbols\n");
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
+ }
+
if ((source_query == KDUMP_LOCAL) &&
(pc->flags2 & GET_OSRELEASE))
kdump_get_osrelease();
--
2.46.2
18 hours, 58 minutes
Re: [PATCH v7 00/15] gdb stack unwinding support for crash utility
by lijiang
Hi, Tao
Thank you for the update.
The following patch is a regression issue, so I tend to discuss it as a
separate patch.
[PATCH v7 01/15] Fix the regression of cpumask_t for xen hyper
In addition, I found another issue in my tests(on ppc64le), the gdb bt can
display the back trace for the panic task, but when I switch to another
task, the gdb bt can not display the back trace:
crash> gdb bt
#0 0xc0000000002bde04 in crash_setup_regs (newregs=0xc00000003264b858,
oldregs=0x0) at ./arch/powerpc/include/asm/kexec.h:133
#1 0xc0000000002be4f8 in __crash_kexec (regs=0x0) at
kernel/crash_core.c:122
#2 0xc00000000016c254 in panic (fmt=0xc0000000015eef20 "sysrq triggered
crash\n") at kernel/panic.c:373
#3 0xc000000000a708b8 in sysrq_handle_crash (key=<optimized out>) at
drivers/tty/sysrq.c:154
#4 0xc000000000a713d4 in __handle_sysrq (key=key@entry=99 'c',
check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:612
#5 0xc000000000a71e94 in write_sysrq_trigger (file=<optimized out>,
buf=<optimized out>, count=2, ppos=<optimized out>) at
drivers/tty/sysrq.c:1181
#6 0xc00000000073260c in pde_write (pde=0xc00000000af9cc00,
file=<optimized out>, buf=<optimized out>, count=<optimized out>,
ppos=<optimized out>) at fs/proc/inode.c:334
#7 proc_reg_write (file=<optimized out>, buf=<optimized out>,
count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:346
#8 0xc00000000063c0e0 in vfs_write (file=0xc0000000092d2900,
buf=0x10012536f60 <error: Cannot access memory at address 0x10012536f60>,
count=2, pos=0xc00000003264bd30) at fs/read_write.c:588
#9 vfs_write (file=0xc0000000092d2900, buf=0x10012536f60 <error: Cannot
access memory at address 0x10012536f60>, count=<optimized out>,
pos=0xc00000003264bd30) at fs/read_write.c:570
#10 0xc00000000063c690 in ksys_write (fd=<optimized out>, buf=0x10012536f60
<error: Cannot access memory at address 0x10012536f60>, count=2) at
fs/read_write.c:643
#11 0xc000000000031a28 in system_call_exception (regs=0xc00000003264be80,
r0=<optimized out>) at arch/powerpc/kernel/syscall.c:153
#12 0xc00000000000d05c in system_call_vectored_common () at
arch/powerpc/kernel/interrupt_64.S:198
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
0 0 0 c000000002bda980 RU 0.0 0 0
[swapper/0]
> 0 0 1 c000000003864c80 RU 0.0 0 0
[swapper/1]
...
8017 923 0 c000000043a20000 IN 0.2 22528 16256
sshd-session
8025 8017 6 c000000032271880 IN 0.1 22784 11840
sshd-session
> 8026 8025 0 c000000043a26600 RU 0.1 9664 6208 bash
...
11645 2 3 c000000032264c80 ID 0.0 0 0
[kworker/u32:2]
11738 6188 2 c00000003811b180 IN 0.1 43520 9408 pickup
12326 2 0 c00000003226b280 ID 0.0 0 0
[kworker/0:1]
13112 6089 2 c00000000c809900 IN 0.0 7232 3456 sleep
Let's take the "pickup" task as an example:
crash> set 11738
PID: 11738
COMMAND: "pickup"
TASK: c00000003811b180 [THREAD_INFO: c00000003811b180]
CPU: 2
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 0xc0000000a7f876a0 in ?? ()
gdb: gdb request failed: bt
crash> set gdb on
gdb: on
gdb> bt
#0 0xc0000000a7f876a0 in ?? ()
gdb>
Anyway, I did the same test on x86 64 and aarch64, it can work well as
expected. Can you help to double check on ppc64 architecture?
X86 64:
crash> set 14599
PID: 14599
COMMAND: "pickup"
TASK: ffff8f57a0d7c180 [THREAD_INFO: ffff8f57a0d7c180]
CPU: 41
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 0xffffffff8b3efe29 in context_switch (rq=0xffff8f6f1f835900,
prev=0xffff8f57a0d7c180, next=0xffff8f5786720000, rf=0xffff9df22fea7b80) at
kernel/sched/core.c:5208
#1 __schedule (sched_mode=sched_mode@entry=0) at kernel/sched/core.c:6549
#2 0xffffffff8b3f0217 in __schedule_loop (sched_mode=<optimized out>) at
kernel/sched/core.c:6626
#3 schedule () at kernel/sched/core.c:6641
#4 0xffffffff8b3f6eef in schedule_hrtimeout_range_clock
(expires=expires@entry=0xffff9df22fea7cb0, delta=<optimized out>,
delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS,
clock_id=clock_id@entry=1) at kernel/time/hrtimer.c:2293
#5 0xffffffff8b3f7003 in schedule_hrtimeout_range
(expires=expires@entry=0xffff9df22fea7cb0,
delta=delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS) at
kernel/time/hrtimer.c:2340
#6 0xffffffff8aae301c in ep_poll (ep=0xffff8f5790d15d40,
events=events@entry=0x7ffea91b6b90, maxevents=maxevents@entry=100,
timeout=timeout@entry=0xffff9df22fea7d58) at fs/eventpoll.c:2062
#7 0xffffffff8aae3138 in do_epoll_wait (epfd=epfd@entry=8,
events=events@entry=0x7ffea91b6b90, maxevents=maxevents@entry=100,
to=0xffff9df22fea7d58) at fs/eventpoll.c:2464
#8 0xffffffff8aae44a1 in __do_sys_epoll_wait (epfd=<optimized out>,
events=0x7ffea91b6b90, maxevents=<optimized out>, timeout=<optimized out>)
at fs/eventpoll.c:2476
#9 __se_sys_epoll_wait (epfd=<optimized out>, events=<optimized out>,
maxevents=<optimized out>, timeout=<optimized out>) at fs/eventpoll.c:2471
#10 __x64_sys_epoll_wait (regs=<optimized out>) at fs/eventpoll.c:2471
#11 0xffffffff8b3e293d in do_syscall_x64 (regs=0xffff9df22fea7f48, nr=232)
at arch/x86/entry/common.c:52
#12 do_syscall_64 (regs=0xffff9df22fea7f48, nr=232) at
arch/x86/entry/common.c:83
#13 0xffffffff8b40012f in entry_SYSCALL_64 () at
arch/x86/entry/entry_64.S:121
crash>
aarch64:
crash> set 9338
PID: 9338
COMMAND: "pickup"
TASK: ffff0000c7b05400 [THREAD_INFO: ffff0000c7b05400]
CPU: 3
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 __switch_to (prev=<unavailable>, prev@entry=0xffff0000c7b05400,
next=next@entry=<unavailable>) at arch/arm64/kernel/process.c:555
#1 0xffffafc5b5ebd744 in context_switch (rq=0xffff00077bbd0ec0,
prev=0xffff0000c7b05400, next=<unavailable>, rf=0xffff80008ac63a60) at
kernel/sched/core.c:5208
#2 __schedule (sched_mode=sched_mode@entry=0) at kernel/sched/core.c:6549
#3 0xffffafc5b5ebdc2c in __schedule_loop (sched_mode=<optimized out>) at
kernel/sched/core.c:6626
#4 schedule () at kernel/sched/core.c:6641
#5 0xffffafc5b5ec6030 in schedule_hrtimeout_range_clock
(expires=expires@entry=0xffff80008ac63be8, delta=delta@entry=99999999,
mode=mode@entry=HRTIMER_MODE_ABS, clock_id=clock_id@entry=1) at
kernel/time/hrtimer.c:2293
#6 0xffffafc5b5ec618c in schedule_hrtimeout_range
(expires=expires@entry=0xffff80008ac63be8,
delta=delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS) at
kernel/time/hrtimer.c:2340
#7 0xffffafc5b545d33c in ep_poll (ep=<unavailable>,
events=events@entry=0xffffde5c3f68,
maxevents=maxevents@entry=100, timeout=timeout@entry=0xffff80008ac63ce0) at
fs/eventpoll.c:2062
#8 0xffffafc5b545d4e4 in do_epoll_wait (epfd=epfd@entry=8,
events=events@entry=0xffffde5c3f68, maxevents=maxevents@entry=100,
to=to@entry=0xffff80008ac63ce0) at fs/eventpoll.c:2464
#9 0xffffafc5b545d534 in do_epoll_pwait (epfd=epfd@entry=8,
events=events@entry=0xffffde5c3f68, maxevents=maxevents@entry=100,
to=to@entry=0xffff80008ac63ce0, sigsetsize=<optimized out>,
sigmask=<optimized out>) at fs/eventpoll.c:2498
#10 0xffffafc5b545e7c8 in do_epoll_pwait (epfd=8, events=0xffffde5c3f68,
maxevents=100, to=0xffff80008ac63ce0, sigmask=<optimized out>,
sigsetsize=<optimized out>) at fs/eventpoll.c:2495
#11 __do_sys_epoll_pwait (epfd=8, events=0xffffde5c3f68, maxevents=100,
timeout=<optimized out>, sigmask=<optimized out>, sigsetsize=<optimized
out>) at fs/eventpoll.c:2511
#12 __se_sys_epoll_pwait (epfd=8, events=281474412330856, maxevents=100,
timeout=<optimized out>, sigmask=<optimized out>, sigsetsize=<optimized
out>) at fs/eventpoll.c:2505
#13 __arm64_sys_epoll_pwait (regs=<optimized out>) at fs/eventpoll.c:2505
#14 0xffffafc5b4fa99bc in __invoke_syscall (regs=0xffff80008ac63eb0,
syscall_fn=<optimized out>) at arch/arm64/kernel/syscall.c:35
#15 invoke_syscall (regs=regs@entry=0xffff80008ac63eb0, scno=<optimized
out>, sc_nr=sc_nr@entry=463, syscall_table=<optimized out>) at
arch/arm64/kernel/syscall.c:49
#16 0xffffafc5b4fa9ac8 in el0_svc_common (sc_nr=463,
syscall_table=<optimized out>, regs=0xffff80008ac63eb0, scno=<optimized
out>) at arch/arm64/kernel/syscall.c:132
#17 do_el0_svc (regs=regs@entry=0xffff80008ac63eb0) at
arch/arm64/kernel/syscall.c:151
#18 0xffffafc5b5eb6fa4 in el0_svc (regs=0xffff80008ac63eb0) at
arch/arm64/kernel/entry-common.c:712
#19 0xffffafc5b5eb74c0 in el0t_64_sync_handler (regs=<optimized out>) at
arch/arm64/kernel/entry-common.c:730
#20 0xffffafc5b4f91634 in el0t_64_sync () at arch/arm64/kernel/entry.S:598
crash>
BTW: other changes are fine to me.
Thanks
Lianbo
On Wed, Sep 4, 2024 at 3:54 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Wed, 4 Sep 2024 19:49:25 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v7 00/15] gdb stack unwinding support
> for crash utility
> To: devel(a)lists.crash-utility.osci.io
> Cc: Tao Liu <ltao(a)redhat.com>
> Message-ID: <20240904074940.21331-1-ltao(a)redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> This patchset is a rebase/merged version of the following 3 patchsets:
>
> 1): [PATCH v10 0/5] Improve stack unwind on ppc64 [1]
> 2): [PATCH 0/5] x86_64 gdb stack unwinding support [2]
> 3): Clean up on top of one-thread-v2 [3]
>
> A complete description of gdb stack unwinding support for crash can be
> found in [1].
>
> This patchset can be divided into the following 3 parts:
>
> 1) part1: preparations before stack unwinding support, some
> bugs/regressions found when drafting this patchset.
> 2) part2: common part for all CPU archs, mainly dealing with
> crash_target.c/gdb_interface.c files, in order to
> support different archs.
> 3) part3: arch specific, for each ppc64/x86_64/arm64/vmware
> stack unwinding support.
>
> === part 3
> arm64: Add gdb stack unwinding support
> vmware_guestdump: Various format versions support
> x86_64: Add gdb stack unwinding support
> ppc64: correct gdb passthroughs by implementing
> machdep->get_current_task_reg
>
> === part 2
> Conditionally output gdb stack unwinding stop reasons
> Stop stack unwinding at non-kernel address
> Print task pid/command instead of CPU index
> Rename get_cpu_reg to get_current_task_reg
> Let crash change gdb context
> Leave only one gdb thread for crash
> Remove 'frame' from prohibited commands list
>
> === part 1
> Fix gdb_interface: restore gdb's output streams at end of gdb_interface
> x86_64: Fix invalid input "=>" for bt command
> Fix cpumask_t recursive dependence issue
> Fix the regression of cpumask_t for xen hyper
> ===
>
> v7 -> v6:
> 1) Reorganise the patchset, re-divided them into 3 part against the
> previous 2 parts.
> 2) Re-dealed with the cpumask_t part, which solved the comment No.4
> pointed out by lianbo in [4].
> 3) Add conditional output for the failing message of gdb stack unwinding.
> see [PATCH 11/15] Conditionally output gdb stack unwinding stop reasons
> 4) Redraft the commit messages, updated some outdated info.
> 5) Merged "Let crash change gdb context" and "set_context(): check if
> context is already current" into one.
>
> [4]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01067.html
>
> v6 -> v5:
> 1) Refactor patch 4 & 9, which changed the function signature of struct
> get_cpu_reg/get_current_task_reg, and let each patch compile with no
> error when added on.
> 2) Rebased the patchset on top of latest upstream:
> ("79b93ecb2e72ec Fix a "Bus error" issue caused by 'crash --osrelease'
> or
> crash loading")
>
> v5 -> v4:
> 1) Plenty of code refactoring based on Lianbo's comments on v4.
> 2) Removed the magic number when dealing with regs bitmap, see [6].
> 3) Rebased the patchset on top of latest upstream:
> ("1c6da3eaff8207 arm64: Fix bt command show wrong stacktrace on ramdump
> source")
>
> v4 -> v3:
> Fixed the author issue in [PATCH v3 06/16] Fix gdb_interface: restore gdb's
> output streams at end of gdb_interface.
>
> v3 -> v2:
> 1) Updated CC list as pointed out in [4]
> 2) Compiling issues as in [5]
>
> v2 -> v1:
> 1) Added the patch: x86_64: Fix invalid input "=>" for bt command,
> thanks for Kazu's testing.
> 2) Modify the patch: x86_64: Add gdb stack unwinding support, added the
> pcp_save, spp_save and sp, for restoring the value in match of the
> original
> code logic.
>
> [1]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00469.html
> [2]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00488.html
> [3]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00554.html
> [4]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00681.html
> [5]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00715.html
> [6]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00819.html
>
> Aditya Gupta (3):
> Fix gdb_interface: restore gdb's output streams at end of
> gdb_interface
> Remove 'frame' from prohibited commands list
> ppc64: correct gdb passthroughs by implementing
> machdep->get_current_task_reg
>
> Alexey Makhalov (1):
> vmware_guestdump: Various format versions support
>
> Tao Liu (11):
> Fix the regression of cpumask_t for xen hyper
> Fix cpumask_t recursive dependence issue
> x86_64: Fix invalid input "=>" for bt command
> Leave only one gdb thread for crash
> Let crash change gdb context
> Rename get_cpu_reg to get_current_task_reg
> Print task pid/command instead of CPU index
> Stop stack unwinding at non-kernel address
> Conditionally output gdb stack unwinding stop reasons
> x86_64: Add gdb stack unwinding support
> arm64: Add gdb stack unwinding support
>
> arm64.c | 120 +++++++++++++++--
> crash_target.c | 71 ++++++----
> defs.h | 194 ++++++++++++++++++++++++++-
> gdb-10.2.patch | 96 ++++++++++++++
> gdb_interface.c | 39 ++----
> kernel.c | 63 +++++++--
> ppc64.c | 174 +++++++++++++++++++++++-
> symbols.c | 15 +++
> task.c | 34 +++--
> tools.c | 16 ++-
> unwind_x86_64.h | 4 -
> vmware_guestdump.c | 321 +++++++++++++++++++++++++++++++-------------
> x86_64.c | 323 ++++++++++++++++++++++++++++++++++++++++-----
> 13 files changed, 1247 insertions(+), 223 deletions(-)
>
> --
> 2.40.1
>
2 weeks, 5 days
[PATCH] RISCV64: add panic signature to panic_msg to properly display the PANIC message
by Austin Kim
Using 'sys' command, we can view the panic message with general system
information. If we run RISCV64-based vmcore, PANIC message is not properly
displayed.
The reason is that "Unable to handle kernel" is first printed in the kernel log
when exception occurs in the RISC-V based Linux kernel. The corresponding
kernel commit is 21733cb518471.
Without the patch:
crash> sys
KERNEL: vmlinux [TAINTED]
DUMPFILE: vmcore
CPUS: 4
DATE: Thu Aug 22 16:13:08 KST 2024
UPTIME: 00:33:25
LOAD AVERAGE: 0.07, 0.07, 0.02
TASKS: 385
NODENAME: starfive
RELEASE: 6.6.20+
VERSION: #13 SMP Mon Aug 19 12:58:52 KST 2024
MACHINE: riscv64 (unknown Mhz)
MEMORY: 4 GB
PANIC: ""
With the patch:
crash> sys
KERNEL: vmlinux [TAINTED]
DUMPFILE: vmcore
CPUS: 4
DATE: Thu Aug 22 16:13:08 KST 2024
UPTIME: 00:33:25
LOAD AVERAGE: 0.07, 0.07, 0.02
TASKS: 385
NODENAME: starfive
RELEASE: 6.6.20+
VERSION: #13 SMP Mon Aug 19 12:58:52 KST 2024
MACHINE: riscv64 (unknown Mhz)
MEMORY: 4 GB
PANIC: "Unable to handle kernel access to user memory without uaccess routines at virtual address 0000000000000000"
Signed-off-by: Austin Kim <austindh.kim(a)gmail.com>
---
task.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/task.c b/task.c
index d52ce0b..443f488 100644
--- a/task.c
+++ b/task.c
@@ -6330,6 +6330,7 @@ static const char* panic_msg[] = {
"[Hardware Error]: ",
"Bad mode in ",
"Oops: ",
+ "Unable to handle kernel access ",
};
#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
--
2.17.1
3 weeks, 6 days
[Question] about gdb version
by Guanyou Chen
Hi lianbo, tao
Is there a plan to upgrade the version of GDB?
GDB-10.2 doesn't seem to support "--with-zstd".
err: BFD: /xxx/vmlinux: unable to initialize decompress status for section
.debug_aranges
Guanyou
Thanks.
4 weeks, 1 day
[PATCH] mod: introduce -v option to display modules with valid version
by Sun Feng
With this option, we can get module version easily in kdump,
it's helpful when developing external modules.
crash> mod -v
NAME VERSION
ahci 3.0
vxlan 0.1.2.1
dca 1.12.1
...
Signed-off-by: Sun Feng <loyou85(a)gmail.com>
---
defs.h | 3 +++
help.c | 12 +++++++++++-
kernel.c | 46 +++++++++++++++++++++++++++++++++++++++++++++-
symbols.c | 44 +++++++++++++++++++++++++++++++++++++++-----
4 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/defs.h b/defs.h
index e2a9278..f14fcdf 100644
--- a/defs.h
+++ b/defs.h
@@ -2244,6 +2244,7 @@ struct offset_table { /* stash of commonly-used offsets */
long rb_list_head;
long file_f_inode;
long page_page_type;
+ long module_version;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2935,6 +2936,7 @@ struct symbol_table_data {
#define MAX_MOD_NAMELIST (256)
#define MAX_MOD_NAME (64)
+#define MAX_MOD_VERSION (64)
#define MAX_MOD_SEC_NAME (64)
#define MOD_EXT_SYMS (0x1)
@@ -2984,6 +2986,7 @@ struct load_module {
long mod_size;
char mod_namelist[MAX_MOD_NAMELIST];
char mod_name[MAX_MOD_NAME];
+ char mod_version[MAX_MOD_VERSION];
ulong mod_flags;
struct syment *mod_symtable;
struct syment *mod_symend;
diff --git a/help.c b/help.c
index e95ac1d..1bac5e1 100644
--- a/help.c
+++ b/help.c
@@ -5719,7 +5719,7 @@ NULL
char *help_mod[] = {
"mod",
"module information and loading of symbols and debugging data",
-"-s module [objfile] | -d module | -S [directory] [-D|-t|-r|-R|-o|-g]",
+"-s module [objfile] | -d module | -S [directory] [-D|-t|-r|-R|-o|-g|-v]",
" With no arguments, this command displays basic information of the currently",
" installed modules, consisting of the module address, name, base address,",
" size, the object file name (if known), and whether the module was compiled",
@@ -5791,6 +5791,7 @@ char *help_mod[] = {
" -g When used with -s or -S, add a module object's section",
" start and end addresses to its symbol list.",
" -o Load module symbols with old mechanism.",
+" -v Display modules with valid version.",
" ",
" If the %s session was invoked with the \"--mod <directory>\" option, or",
" a CRASH_MODULE_PATH environment variable exists, then /lib/modules/<release>",
@@ -5881,6 +5882,15 @@ char *help_mod[] = {
" vxglm P(U)",
" vxgms P(U)",
" vxodm P(U)",
+" ",
+" Display modules with valid version:",
+" ",
+" %s> mod -v",
+" NAME VERSION",
+" ahci 3.0",
+" vxlan 0.1.2.1",
+" dca 1.12.1",
+" ...",
NULL
};
diff --git a/kernel.c b/kernel.c
index adb19ad..91eef2a 100644
--- a/kernel.c
+++ b/kernel.c
@@ -3593,6 +3593,9 @@ module_init(void)
MEMBER_OFFSET_INIT(module_num_gpl_syms, "module",
"num_gpl_syms");
+ if (MEMBER_EXISTS("module", "version"))
+ MEMBER_OFFSET_INIT(module_version, "module", "version");
+
if (MEMBER_EXISTS("module", "mem")) { /* 6.4 and later */
kt->flags2 |= KMOD_MEMORY; /* MODULE_MEMORY() can be used. */
@@ -4043,6 +4046,7 @@ irregularity:
#define REMOTE_MODULE_SAVE_MSG (6)
#define REINIT_MODULES (7)
#define LIST_ALL_MODULE_TAINT (8)
+#define LIST_ALL_MODULE_VERSION (9)
void
cmd_mod(void)
@@ -4117,7 +4121,7 @@ cmd_mod(void)
address = 0;
flag = LIST_MODULE_HDR;
- while ((c = getopt(argcnt, args, "Rd:Ds:Sot")) != EOF) {
+ while ((c = getopt(argcnt, args, "Rd:Ds:Sotv")) != EOF) {
switch(c)
{
case 'R':
@@ -4195,6 +4199,13 @@ cmd_mod(void)
flag = LIST_ALL_MODULE_TAINT;
break;
+ case 'v':
+ if (flag)
+ cmd_usage(pc->curcmd, SYNOPSIS);
+ else
+ flag = LIST_ALL_MODULE_VERSION;
+ break;
+
default:
argerrs++;
break;
@@ -4578,10 +4589,12 @@ do_module_cmd(ulong flag, char *modref, ulong address,
struct load_module *lm, *lmp;
int maxnamelen;
int maxsizelen;
+ int maxversionlen;
char buf1[BUFSIZE];
char buf2[BUFSIZE];
char buf3[BUFSIZE];
char buf4[BUFSIZE];
+ char buf5[BUFSIZE];
if (NO_MODULES())
return;
@@ -4744,6 +4757,37 @@ do_module_cmd(ulong flag, char *modref, ulong address,
case LIST_ALL_MODULE_TAINT:
show_module_taint();
break;
+
+ case LIST_ALL_MODULE_VERSION:
+ maxnamelen = maxversionlen = 0;
+
+ for (i = 0; i < kt->mods_installed; i++) {
+ lm = &st->load_modules[i];
+ maxnamelen = strlen(lm->mod_name) > maxnamelen ?
+ strlen(lm->mod_name) : maxnamelen;
+
+ maxversionlen = strlen(lm->mod_version) > maxversionlen ?
+ strlen(lm->mod_version) : maxversionlen;
+ }
+
+ fprintf(fp, "%s %s\n",
+ mkstring(buf2, maxnamelen, LJUST, "NAME"),
+ mkstring(buf5, maxversionlen, LJUST, "VERSION"));
+
+ for (i = 0; i < kt->mods_installed; i++) {
+ lm = &st->load_modules[i];
+ if ((!address || (lm->module_struct == address) ||
+ (lm->mod_base == address)) &&
+ strlen(lm->mod_version)) {
+ fprintf(fp, "%s ", mkstring(buf2, maxnamelen,
+ LJUST, lm->mod_name));
+ fprintf(fp, "%s ", mkstring(buf5, maxversionlen,
+ LJUST, lm->mod_version));
+
+ fprintf(fp, "\n");
+ }
+ }
+ break;
}
}
diff --git a/symbols.c b/symbols.c
index d00fbd7..9d90df7 100644
--- a/symbols.c
+++ b/symbols.c
@@ -1918,6 +1918,7 @@ store_module_symbols_6_4(ulong total, int mods_installed)
{
int i, m, t;
ulong mod, mod_next;
+ ulong version;
char *mod_name;
uint nsyms, ngplsyms;
ulong syms, gpl_syms;
@@ -1930,6 +1931,7 @@ store_module_symbols_6_4(ulong total, int mods_installed)
struct load_module *lm;
char buf1[BUFSIZE];
char buf2[BUFSIZE];
+ char mod_version[BUFSIZE];
char *strbuf = NULL, *modbuf, *modsymbuf;
struct syment *sp;
ulong first, last;
@@ -1980,6 +1982,13 @@ store_module_symbols_6_4(ulong total, int mods_installed)
mod_name = modbuf + OFFSET(module_name);
+ BZERO(mod_version, BUFSIZE);
+ if (MEMBER_EXISTS("module", "version")) {
+ version = ULONG(modbuf + OFFSET(module_version));
+ if (version)
+ read_string(version, mod_version, BUFSIZE - 1);
+ }
+
lm = &st->load_modules[m++];
BZERO(lm, sizeof(struct load_module));
@@ -2003,9 +2012,15 @@ store_module_symbols_6_4(ulong total, int mods_installed)
error(INFO, "module name greater than MAX_MOD_NAME: %s\n", mod_name);
strncpy(lm->mod_name, mod_name, MAX_MOD_NAME-1);
}
+ if (strlen(mod_version) < MAX_MOD_VERSION)
+ strcpy(lm->mod_version, mod_version);
+ else {
+ error(INFO, "module version greater than MAX_MOD_VERSION: %s\n", mod_version);
+ strncpy(lm->mod_version, mod_version, MAX_MOD_VERSION-1);
+ }
if (CRASHDEBUG(3))
- fprintf(fp, "%lx (%lx): %s syms: %d gplsyms: %d ksyms: %ld\n",
- mod, lm->mod_base, lm->mod_name, nsyms, ngplsyms, nksyms);
+ fprintf(fp, "%lx (%lx): %s syms: %d gplsyms: %d ksyms: %ld version: %s\n",
+ mod, lm->mod_base, lm->mod_name, nsyms, ngplsyms, nksyms, lm->mod_version);
lm->mod_flags = MOD_EXT_SYMS;
lm->mod_ext_symcnt = mcnt;
@@ -2271,6 +2286,7 @@ store_module_symbols_v2(ulong total, int mods_installed)
{
int i, m;
ulong mod, mod_next;
+ ulong version;
char *mod_name;
uint nsyms, ngplsyms;
ulong syms, gpl_syms;
@@ -2285,6 +2301,7 @@ store_module_symbols_v2(ulong total, int mods_installed)
char buf2[BUFSIZE];
char buf3[BUFSIZE];
char buf4[BUFSIZE];
+ char mod_version[BUFSIZE];
char *strbuf, *modbuf, *modsymbuf;
struct syment *sp;
ulong first, last;
@@ -2344,6 +2361,13 @@ store_module_symbols_v2(ulong total, int mods_installed)
mod_name = modbuf + OFFSET(module_name);
+ BZERO(mod_version, BUFSIZE);
+ if (MEMBER_EXISTS("module", "version")) {
+ version = ULONG(modbuf + OFFSET(module_version));
+ if (version)
+ read_string(version, mod_version, BUFSIZE - 1);
+ }
+
lm = &st->load_modules[m++];
BZERO(lm, sizeof(struct load_module));
lm->mod_base = ULONG(modbuf + MODULE_OFFSET2(module_module_core, rx));
@@ -2357,11 +2381,19 @@ store_module_symbols_v2(ulong total, int mods_installed)
mod_name);
strncpy(lm->mod_name, mod_name, MAX_MOD_NAME-1);
}
+ if (strlen(mod_version) < MAX_MOD_VERSION)
+ strcpy(lm->mod_version, mod_version);
+ else {
+ error(INFO,
+ "module version greater than MAX_MOD_VERSION: %s\n",
+ mod_version);
+ strncpy(lm->mod_version, mod_version, MAX_MOD_VERSION-1);
+ }
if (CRASHDEBUG(3))
fprintf(fp,
- "%lx (%lx): %s syms: %d gplsyms: %d ksyms: %ld\n",
- mod, lm->mod_base, lm->mod_name, nsyms,
- ngplsyms, nksyms);
+ "%lx (%lx): %s syms: %d gplsyms: %d ksyms: %ld version: %s\n",
+ mod, lm->mod_base, lm->mod_name, nsyms,
+ ngplsyms, nksyms, lm->mod_version);
lm->mod_flags = MOD_EXT_SYMS;
lm->mod_ext_symcnt = mcnt;
lm->mod_init_module_ptr = ULONG(modbuf +
@@ -10177,6 +10209,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(module_next));
fprintf(fp, " module_name: %ld\n",
OFFSET(module_name));
+ fprintf(fp, " module_version: %ld\n",
+ OFFSET(module_version));
fprintf(fp, " module_syms: %ld\n",
OFFSET(module_syms));
fprintf(fp, " module_nsyms: %ld\n",
--
2.43.0
1 month
Re: [PATCH v2 1/2] kmem: introduce -t flag to get page owner
by lijiang
On Thu, Sep 12, 2024 at 11:41 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Thu, 12 Sep 2024 03:36:53 -0000
> From: qiwu.chen(a)transsion.com
> Subject: [Crash-utility] [PATCH v2 1/2] kmem: introduce -t flag to get
> page owner
> To: devel(a)lists.crash-utility.osci.io
> Message-ID: <20240912033653.15869.73753(a)lists.crash-utility.osci.io>
> Content-Type: text/plain; charset="utf-8"
>
> Introduce -t flag for kmem command to get page owner.
>
Thank you for the patch, qiwu.
Kazu implemented an owner command (see: *page_owner.c
<https://raw.githubusercontent.com/k-hagio/crash-pageowner/main/page_owner.c>*),
which is a similar feature, please refer to this section: page_owner.c
https://crash-utility.github.io/extensions.html
Thanks
Lianbo
Here is the user help manual:
>
> 1. Dump page_owner allocated stack trace for each allocated page in
> buddy system when used with "kmem -pt":
> crash> kmem -pt
> Page allocated via order 0, mask 0x1112c4a, pid 1, ts 16155269152 ns
> PFN 0x40000, type Movable, Flags 0xffff00000020836
> set_page_owner+84
> post_alloc_hook+308
> prep_new_page+48
> get_page_from_freelist+736
> __alloc_pages+348
> alloc_pages+280
> __page_cache_alloc+120
> page_cache_ra_unbounded+272
> do_page_cache_ra+172
> do_sync_mmap_readahead+492
> filemap_fault+340
> __do_fault+64
> __handle_mm_fault+528
> handle_mm_fault+208
> __do_page_fault+232
> do_page_fault+1264
> ......
>
> 2. Dump page_owner allocated/freed trace for an allocated page when used
> "kmem -pt" with a page address.
> crash> kmem -pt fffffc00001f9e40
> PAGE PHYSICAL MAPPING INDEX CNT FLAGS
> fffffc00001f9e40 47e79000 dead000000000008 0 0 ffff00000000000
> page_owner tracks the page 0xfffffc00001f9e40 as allocated
> Page allocated via order 3, mask 0xd20c0, pid 163, ts 39197221904 ns
> PFN 0x47e79, type Unmovable, Flags 0xffff00000000000
> set_page_owner+84
> post_alloc_hook+308
> prep_new_page+48
> get_page_from_freelist+736
> __alloc_pages+348
> alloc_pages+280
> alloc_slab_page+60
> allocate_slab+212
> new_slab+200
> ___slab_alloc+1432
> __slab_alloc+60
> kmem_cache_alloc_node+528
> alloc_task_struct_node+36
> dup_task_struct+56
> copy_process+724
> kernel_clone+276
>
> page last free ts 38730338480 ns, stack trace:
> set_page_owner+84
> post_alloc_hook+308
> prep_new_page+48
> get_page_from_freelist+736
> __alloc_pages+348
> alloc_pages+280
> alloc_slab_page+60
> allocate_slab+212
> new_slab+200
> ___slab_alloc+1432
> __slab_alloc+60
> kmem_cache_alloc_node+528
> alloc_task_struct_node+36
> dup_task_struct+56
> copy_process+724
> kernel_clone+276
>
> With this patch, the page allocation times can be sorted by page_owner_sort
> tool easily.
>
> Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
> ---
> defs.h | 43 ++++++
> help.c | 4 +-
> memory.c | 434 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 3 files changed, 461 insertions(+), 20 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 2231cb6..3d729c8 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -206,6 +206,34 @@ static inline int string_exists(char *s) { return (s
> ? TRUE : FALSE); }
> #undef roundup
> #endif
> #define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
> +#define __round_mask(x, y) ((__typeof__(x))((y)-1))
> +#define rounddown(x, y) ((x) & ~__round_mask(x, y))
> +#define IS_ALIGNED(x, y) (((x) & ((typeof(x))(y) - 1)) == 0)
> +
> +/* stack depot macros before kernel commit 8151c7a35d8bd */
> +#define STACK_ALLOC_ALIGN 4
> +#define STACK_ALLOC_NULL_PROTECTION_BITS 1
> +#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages
> */
> +#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGESHIFT() -
> STACK_ALLOC_ALIGN)
> +#define DEPOT_STACK_BITS (sizeof(uint) * 8)
> +#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
> + STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
> +
> +/* stack depot macros since kernel commit 8151c7a35d8bd */
> +#define STACK_DEPOT_EXTRA_BITS 5
> +#define DEPOT_HANDLE_BITS (sizeof(uint) * 8)
> +#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
> +#define DEPOT_POOL_SIZE (1LL << (PAGESHIFT() + DEPOT_POOL_ORDER))
> +#define DEPOT_STACK_ALIGN 4
> +#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGESHIFT() -
> DEPOT_STACK_ALIGN)
> +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
> + STACK_DEPOT_EXTRA_BITS)
> +
> +/* GFP flags */
> +#define __GFP_RECLAIMABLE 0x10u
> +#define __GFP_MOVABLE 0x08u
> +#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
> +#define GFP_MOVABLE_SHIFT 3
>
> typedef uint64_t physaddr_t;
>
> @@ -2243,6 +2271,18 @@ struct offset_table { /* stash
> of commonly-used offsets */
> long vmap_node_busy;
> long rb_list_head;
> long file_f_inode;
> + long pglist_data_node_page_ext;
> + long stack_record_size;
> + long stack_record_entries;
> + long stack_record_count;
> + long page_owner_order;
> + long page_owner_gfp_mask;
> + long page_owner_ts_nsec;
> + long page_owner_free_ts_nsec;
> + long page_owner_pid;
> + long page_owner_handle;
> + long page_owner_free_handle;
> + long mem_section_page_ext;
> };
>
> struct size_table { /* stash of commonly-used sizes */
> @@ -2419,6 +2459,9 @@ struct size_table { /* stash of
> commonly-used sizes */
> long module_memory;
> long fred_frame;
> long vmap_node;
> + long page_ext;
> + long page_owner;
> + long stack_record;
> };
>
> struct array_table {
> diff --git a/help.c b/help.c
> index e95ac1d..f8ec62f 100644
> --- a/help.c
> +++ b/help.c
> @@ -6815,7 +6815,7 @@ NULL
> char *help_kmem[] = {
> "kmem",
> "kernel memory",
> -"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n"
> +"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n"
> " [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P]
> address]]",
> " This command displays information about the use of kernel memory.\n",
> " -f displays the contents of the system free memory headers.",
> @@ -6845,6 +6845,7 @@ char *help_kmem[] = {
> " mem_map[] array, made up of the page struct address, its
> associated",
> " physical address, the page.mapping, page.index, page._count
> and",
> " page.flags fields.",
> +" -t displays page_owner allocated stack trace for each allocated
> page.",
> " -m member similar to -p, but displays page structure contents
> specified by",
> " a comma-separated list of one or more struct page members.
> The",
> " \"flags\" member will always be expressed in hexadecimal
> format, and",
> @@ -6899,6 +6900,7 @@ char *help_kmem[] = {
> " address when used with -p, the address can be either a page pointer,
> a",
> " physical address, or a kernel virtual address; its basic
> mem_map",
> " page information is displayed.",
> +" when added extra -t, display the page_owner traces for this
> page",
> " address when used with -m, the address can be either a page pointer,
> a",
> " physical address, or a kernel virtual address; the
> specified",
> " members of the associated page struct are displayed.",
> diff --git a/memory.c b/memory.c
> index 967a9cf..6c69b6a 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -323,6 +323,11 @@ static ulong kmem_cache_nodelists(ulong);
> static void dump_hstates(void);
> static ulong freelist_ptr(struct meminfo *, ulong, ulong);
> static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
> +static void page_owner_init(void);
> +static int page_owner_enabled(void);
> +static void stack_depot_init(void);
> +static void dump_page_owner(struct meminfo *, ulong, physaddr_t);
> +enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_ALL };
>
> /*
> * Memory display modes specific to this file.
> @@ -983,6 +988,8 @@ vm_init(void)
> vt->flags |= DISCONTIGMEM;
>
> sparse_mem_init();
> + page_owner_init();
> + stack_depot_init();
>
> vt->vmalloc_start = machdep->vmalloc_start();
> if (IS_VMALLOC_ADDR(vt->mem_map))
> @@ -1099,6 +1106,8 @@ vm_init(void)
> MEMBER_OFFSET_INIT(pglist_data_bdata, "pglist_data",
> "bdata");
> MEMBER_OFFSET_INIT(pglist_data_nr_zones, "pglist_data",
> "nr_zones");
> + MEMBER_OFFSET_INIT(pglist_data_node_page_ext,
> "pglist_data",
> + "node_page_ext");
> MEMBER_OFFSET_INIT(pglist_data_node_start_pfn,
> "pglist_data",
> "node_start_pfn");
> MEMBER_OFFSET_INIT(pglist_data_pgdat_next, "pglist_data",
> @@ -5037,6 +5046,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage
> *tm)
> #define SLAB_BITFIELD (ADDRESS_SPECIFIED << 25)
> #define SLAB_GATHER_FAILURE (ADDRESS_SPECIFIED << 26)
> #define GET_SLAB_ROOT_CACHES (ADDRESS_SPECIFIED << 27)
> +#define GET_PAGE_OWNER (ADDRESS_SPECIFIED << 28)
>
> #define GET_ALL \
>
> (GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
> @@ -5048,7 +5058,7 @@ cmd_kmem(void)
> int c;
> int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag;
> int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag;
> - int rflag;
> + int rflag, tflag;
> struct meminfo meminfo;
> ulonglong value[MAXARGS];
> char buf[BUFSIZE];
> @@ -5061,13 +5071,13 @@ cmd_kmem(void)
> spec_addr = choose_cpu = 0;
> sflag = Sflag = pflag = fflag = Fflag = Pflag = zflag =
> oflag = 0;
> vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0;
> - gflag = hflag = rflag = 0;
> + gflag = hflag = rflag = tflag = 0;
> escape = FALSE;
> BZERO(&meminfo, sizeof(struct meminfo));
> BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
> pc->curcmd_flags &= ~HEADER_PRINTED;
>
> - while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh"))
> != EOF) {
> + while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoht"))
> != EOF) {
> switch(c)
> {
> case 'V':
> @@ -5204,6 +5214,10 @@ cmd_kmem(void)
> gflag = 1;
> break;
>
> + case 't':
> + tflag = 1;
> + break;
> +
> default:
> argerrs++;
> break;
> @@ -5213,7 +5227,7 @@ cmd_kmem(void)
> if (argerrs)
> cmd_usage(pc->curcmd, SYNOPSIS);
>
> - if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag +
> + if ((fflag + Fflag + Vflag + oflag +
> vflag + Cflag + cflag + iflag + lflag + Lflag + gflag +
> hflag + rflag) > 1) {
> error(INFO, "only one flag allowed!\n");
> @@ -5264,10 +5278,13 @@ cmd_kmem(void)
> if (pflag) {
> meminfo.spec_addr = value[i];
> meminfo.flags = ADDRESS_SPECIFIED;
> + if (tflag)
> + meminfo.flags |= GET_PAGE_OWNER;
> dump_mem_map(&meminfo);
> pflag++;
> }
>
> +
> if (sflag || Sflag) {
> if (vt->flags & KMEM_CACHE_UNAVAIL)
> error(FATAL,
> @@ -5346,25 +5363,25 @@ cmd_kmem(void)
> gflag++;
> }
>
> - /*
> - * no value arguments allowed!
> - */
> - if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
> + /*
> + * no value arguments allowed!
> + */
> + if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
> Vflag || oflag || hflag || rflag) {
> error(INFO,
> "no address arguments allowed with this
> option\n");
> cmd_usage(pc->curcmd, SYNOPSIS);
> }
>
> - if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
> - lflag + Lflag + gflag)) {
> + if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
> + lflag + Lflag + gflag + tflag)) {
> meminfo.spec_addr = value[i];
> - meminfo.flags = ADDRESS_SPECIFIED;
> - if (meminfo.calls++)
> - fprintf(fp, "\n");
> + meminfo.flags = ADDRESS_SPECIFIED;
> + if (meminfo.calls++)
> + fprintf(fp, "\n");
> else
> kmem_cache_init();
> - kmem_search(&meminfo);
> + kmem_search(&meminfo);
> }
>
> }
> @@ -5372,8 +5389,11 @@ cmd_kmem(void)
> if (iflag == 1)
> dump_kmeminfo();
>
> - if (pflag == 1)
> + if (pflag == 1) {
> + if (tflag)
> + meminfo.flags = GET_PAGE_OWNER;
> dump_mem_map(&meminfo);
> + }
>
> if (fflag == 1)
> vt->dump_free_pages(&meminfo);
> @@ -5457,7 +5477,7 @@ cmd_kmem(void)
> if (!(sflag + Sflag + pflag + fflag + Fflag + vflag +
> Vflag + zflag + oflag + cflag + Cflag + iflag +
> nflag + lflag + Lflag + gflag + hflag + rflag +
> - meminfo.calls))
> + tflag + meminfo.calls))
> cmd_usage(pc->curcmd, SYNOPSIS);
>
> }
> @@ -5749,7 +5769,8 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
>
> switch (mi->flags)
> {
> - case ADDRESS_SPECIFIED:
> + case ADDRESS_SPECIFIED:
> + case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
> switch (mi->memtype)
> {
> case KVADDR:
> @@ -5774,6 +5795,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
> print_hdr = TRUE;
> break;
>
> + case GET_PAGE_OWNER:
> + print_hdr = FALSE;
> + break;
> +
> case GET_ALL:
> shared = 0;
> reserved = 0;
> @@ -5959,6 +5984,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
> shared++;
> }
> continue;
> +
> + case GET_PAGE_OWNER:
> + dump_page_owner(mi, pp, phys);
> + continue;
> }
> page_mapping = VALID_MEMBER(page_mapping);
>
> @@ -6083,6 +6112,7 @@ display_members:
>
> if (done)
> break;
> +
> }
>
> if (done)
> @@ -6119,7 +6149,10 @@ display_members:
> break;
>
> case ADDRESS_SPECIFIED:
> + case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
> mi->retval = done;
> + if (mi->flags & GET_PAGE_OWNER)
> + dump_page_owner(mi, pp, phys);
> break;
> }
>
> @@ -6129,6 +6162,345 @@ display_members:
> FREEBUF(page_cache);
> }
>
> +static int stack_depot_enabled(void)
> +{
> + struct syment *sp;
> + int disable = TRUE;
> +
> + if ((sp = symbol_search("stack_depot_disable")))
> + readmem(sp->value, KVADDR, &disable, sizeof(int),
> + "stack_depot_disable", RETURN_ON_ERROR);
> + else if ((sp = symbol_search("stack_depot_disabled")))
> + readmem(sp->value, KVADDR, &disable, sizeof(int),
> + "stack_depot_disabled", RETURN_ON_ERROR);
> + else if ((sp = symbol_search("stack_slabs")))
> + return sp->value ? FALSE : TRUE;
> +
> + return !disable;
> +}
> +
> +static void stack_depot_init(void)
> +{
> + if (stack_depot_enabled()) {
> + STRUCT_SIZE_INIT(stack_record, "stack_record");
> + MEMBER_OFFSET_INIT(stack_record_size, "stack_record",
> "size");
> + MEMBER_OFFSET_INIT(stack_record_entries, "stack_record",
> "entries");
> + if (MEMBER_EXISTS("stack_record", "count"))
> + MEMBER_OFFSET_INIT(stack_record_count,
> "stack_record", "count");
> + }
> +}
> +
> +/* Fetch stack entries from a depot. */
> +static unsigned int stack_depot_fetch(uint handle, ulong *entries)
> +{
> + struct syment *sp;
> + uint valid, offset, slabindex, poolindex, pools_num,
> stack_record_count;
> + uint stack_size = 0;
> + ulong stack_record_addr, sym_value;
> +
> + if (!handle)
> + return 0;
> +
> + if ((sp = symbol_search("stack_slabs"))) {
> + valid = (handle >> (STACK_ALLOC_INDEX_BITS +
> STACK_ALLOC_OFFSET_BITS))
> + & STACK_ALLOC_NULL_PROTECTION_BITS;
> + if (!valid)
> + return 0;
> +
> + slabindex = handle & ((1 << STACK_ALLOC_INDEX_BITS) - 1);
> + readmem(sp->value + slabindex * sizeof(void *), KVADDR,
> &stack_record_addr,
> + sizeof(void *), "stack_record_addr",
> FAULT_ON_ERROR);
> +
> + offset = (handle >> STACK_ALLOC_INDEX_BITS) &
> + ((1 << STACK_ALLOC_OFFSET_BITS) - 1);
> + stack_record_addr += (offset << STACK_ALLOC_ALIGN);
> + *entries = stack_record_addr +
> OFFSET(stack_record_entries);
> + readmem(stack_record_addr + OFFSET(stack_record_size),
> KVADDR, &stack_size,
> + sizeof(stack_size), "stack_record_entries",
> FAULT_ON_ERROR);
> + } else if ((sp = symbol_search("stack_pools")) &&
> + (sym_value = symbol_value("pools_num"))) {
> + poolindex = handle & ((1 << DEPOT_POOL_INDEX_BITS) - 1);
> + readmem(sym_value, KVADDR, &pools_num, sizeof(int),
> + "pools_num", RETURN_ON_ERROR);
> + if (poolindex >= pools_num) {
> + error(INFO, "pool index %d out of bounds (%d) for
> stack id %08x\n",
> + poolindex, pools_num, handle);
> + return 0;
> + }
> +
> + readmem(sp->value + (poolindex-1) * sizeof(void *),
> KVADDR, &stack_record_addr,
> + sizeof(void *), "stack_record_addr",
> FAULT_ON_ERROR);
> + if (!stack_record_addr)
> + return 0;
> +
> + offset = (handle >> DEPOT_POOL_INDEX_BITS) & ((1 <<
> DEPOT_OFFSET_BITS) - 1);
> + stack_record_addr += (offset << DEPOT_STACK_ALIGN);
> + readmem(stack_record_addr + OFFSET(stack_record_count),
> KVADDR, &stack_record_count,
> + sizeof(stack_record_count), "stack_record_count",
> FAULT_ON_ERROR);
> + if (!stack_record_count)
> + return 0;
> +
> + *entries = stack_record_addr +
> OFFSET(stack_record_entries);
> + readmem(stack_record_addr + OFFSET(stack_record_size),
> KVADDR, &stack_size,
> + sizeof(stack_size), "stack_record_entries",
> FAULT_ON_ERROR);
> + }
> +
> + return stack_size;
> +}
> +
> +static void stack_trace_print(ulong entries, uint nr_entries)
> +{
> + int i;
> + struct syment *sp;
> + ulong value, offset;
> + char buf[BUFSIZE];
> +
> + if (!nr_entries)
> + return;
> +
> + for (i = 0; i < nr_entries; i++) {
> + if (!readmem(entries, KVADDR, &value, sizeof(value),
> + "stack_trace", FAULT_ON_ERROR))
> + break;
> +
> + entries += sizeof(ulong);
> + sp = value_search(value, &offset);
> + if (!sp)
> + break;
> +
> + fprintf(fp, "%s\n", value_to_symstr(sp->value+offset, buf,
> 0));
> + }
> + fprintf(fp, "\n");
> +}
> +
> +static ulong gfp_migratetype(ulong gfp_flags)
> +{
> + struct syment *sp;
> + int page_group_by_mobility_disabled;
> +
> + if ((sp = symbol_search("page_group_by_mobility_disabled"))) {
> + readmem(sp->value, KVADDR,
> &page_group_by_mobility_disabled, sizeof(int),
> + "page_group_by_mobility_disabled",
> RETURN_ON_ERROR);
> + if (page_group_by_mobility_disabled) {
> + ulong migrate_unmovable;
> +
> + enumerator_value("MIGRATE_UNMOVABLE",
> &migrate_unmovable);
> + return migrate_unmovable;
> + }
> + }
> +
> + return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
> +}
> +
> +static void migratetype_name(ulong migratetype, char *buf)
> +{
> + struct syment *sp;
> + ulong migratetype_name_addr;
> +
> + sp = symbol_search("migratetype_names");
> + if (!sp)
> + return;
> +
> + readmem(sp->value + migratetype * sizeof(ulong), KVADDR,
> &migratetype_name_addr,
> + sizeof(ulong), "migratetype_name", RETURN_ON_ERROR);
> + read_string(migratetype_name_addr, buf, BUFSIZE-1);
> +}
> +
> +static void print_page_owner(ulong pfn, ulong page, char *page_owner,
> enum track_item alloc)
> +{
> + int i, pid;
> + ushort order;
> + uint handle, free_handle, gfp_mask, nr_entries;
> + u64 ts_nsec, free_ts_nsec;
> + ulong entries, offset, page_flags;
> + struct syment *sp;
> + char buf[BUFSIZE];
> +
> + order = USHORT(page_owner + OFFSET(page_owner_order));
> + gfp_mask = UINT(page_owner + OFFSET(page_owner_gfp_mask));
> + handle = UINT(page_owner + OFFSET(page_owner_handle));
> + free_handle = UINT(page_owner + OFFSET(page_owner_free_handle));
> + ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_ts_nsec));
> + free_ts_nsec = ULONGLONG(page_owner +
> OFFSET(page_owner_free_ts_nsec));
> + pid = INT(page_owner + OFFSET(page_owner_pid));
> +
> + if (handle && (alloc != TRACK_FREE)) {
> + fprintf(fp, "Page allocated via order %u, mask %#x, pid
> %d, ts %llu ns\n",
> + order, gfp_mask, pid, ts_nsec);
> + migratetype_name(gfp_migratetype(gfp_mask), buf);
> + readmem(page+OFFSET(page_flags), KVADDR, &page_flags,
> sizeof(ulong),
> + "page.flags", FAULT_ON_ERROR);
> + fprintf(fp, "PFN %#lx, type %s, Flags %#lx\n", pfn, buf,
> page_flags);
> + nr_entries = stack_depot_fetch(handle, &entries);
> + stack_trace_print(entries, nr_entries);
> + }
> +
> + if (alloc != TRACK_ALLOC &&
> + (free_handle = UINT(page_owner +
> OFFSET(page_owner_free_handle)))) {
> + nr_entries = stack_depot_fetch(handle, &entries);
> + fprintf(fp, "page last free ts %llu ns, stack trace:\n",
> free_ts_nsec);
> + stack_trace_print(entries, nr_entries);
> + }
> +}
> +
> +/* Get the max order for zoned buddy allocator */
> +static inline ulong get_max_order(void)
> +{
> + char *string;
> +
> + if ((kt->ikconfig_flags & IKCONFIG_AVAIL) &&
> + get_kernel_config("CONFIG_FORCE_MAX_ZONEORDER", &string) ==
> IKCONFIG_STR)
> + return atol(string);
> +
> + return 11;
> +}
> +
> +#define MAX_ORDER get_max_order()
> +#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
> +
> +static int lookup_page_ext(ulong pfn, ulong pp, ulong *page_ext)
> +{
> + int node;
> + ulong page_ext_size, section, section_nr, pgdat;
> + ulong node_page_ext, node_start_pfn, page_ext_idx;
> +
> + if (!kernel_symbol_exists("page_ext_size"))
> + return FALSE;
> +
> + readmem(symbol_value("page_ext_size"), KVADDR, &page_ext_size,
> + sizeof(page_ext_size), "page_ext_size", FAULT_ON_ERROR);
> +
> + if (IS_SPARSEMEM()) {
> + section_nr = pfn_to_section_nr(pfn);
> + if (!(section = valid_section_nr(section_nr)))
> + return FALSE;
> +
> + readmem(section + OFFSET(mem_section_page_ext), KVADDR,
> &node_page_ext,
> + sizeof(ulong), "mem_section page_ext",
> FAULT_ON_ERROR);
> + if (!node_page_ext)
> + return FALSE;
> +
> + *page_ext = node_page_ext + pfn * page_ext_size;
> + return TRUE;
> + }
> +
> + if ((node = page_to_nid(pp) >= 0)) {
> + pgdat = vt->node_table[node].pgdat;
> + if (!VALID_MEMBER(pglist_data_node_page_ext) ||
> + !VALID_MEMBER(pglist_data_node_start_pfn))
> + return FALSE;
> +
> + readmem(pgdat + OFFSET(pglist_data_node_page_ext), KVADDR,
> &node_page_ext,
> + sizeof(ulong), "pglist node_page_ext",
> FAULT_ON_ERROR);
> + if (!node_page_ext)
> + return FALSE;
> +
> + readmem(pgdat + OFFSET(pglist_data_node_start_pfn),
> KVADDR, &node_start_pfn,
> + sizeof(ulong), "pglist node_start_pfn",
> FAULT_ON_ERROR);
> + if (!node_start_pfn)
> + return FALSE;
> +
> + page_ext_idx = pfn - rounddown(node_start_pfn,
> MAX_ORDER_NR_PAGES);
> + *page_ext = node_page_ext + pfn * page_ext_size;
> + return TRUE;
> + }
> +
> + return FALSE;
> +}
> +
> +static ulong get_page_owner(ulong page_ext)
> +{
> + struct syment *sp;
> + ulong page_owner_ops_offset;
> +
> + sp = symbol_search("page_owner_ops");
> + if (!sp)
> + return FALSE;
> +
> + readmem(sp->value, KVADDR, &page_owner_ops_offset, sizeof(ulong),
> + "page_owner_ops_offset", RETURN_ON_ERROR);
> +
> + return page_ext + page_owner_ops_offset;
> +}
> +
> +static int page_owner_enabled(void)
> +{
> + struct syment *sp;
> + int enabled;
> +
> + if ((sp = symbol_search("page_owner_enabled")) &&
> + readmem(sp->value, KVADDR, &enabled, sizeof(int),
> + "page_owner_enabled", RETURN_ON_ERROR))
> + return enabled;
> +
> + if ((sp = symbol_search("page_owner_inited")) &&
> + readmem(sp->value, KVADDR, &enabled, sizeof(int),
> + "page_owner_inited", RETURN_ON_ERROR))
> + return enabled;
> +
> + return FALSE;
> +}
> +
> +static void page_owner_init(void)
> +{
> + if (page_owner_enabled()) {
> + STRUCT_SIZE_INIT(page_ext, "page_ext");
> + STRUCT_SIZE_INIT(page_owner, "page_owner");
> + MEMBER_OFFSET_INIT(mem_section_page_ext, "mem_section",
> "page_ext");
> + MEMBER_OFFSET_INIT(page_owner_handle, "page_owner",
> "handle");
> + MEMBER_OFFSET_INIT(page_owner_free_handle, "page_owner",
> "free_handle");
> + MEMBER_OFFSET_INIT(page_owner_order, "page_owner",
> "order");
> + MEMBER_OFFSET_INIT(page_owner_gfp_mask, "page_owner",
> "gfp_mask");
> + MEMBER_OFFSET_INIT(page_owner_ts_nsec, "page_owner",
> "ts_nsec");
> + MEMBER_OFFSET_INIT(page_owner_free_ts_nsec, "page_owner",
> "free_ts_nsec");
> + MEMBER_OFFSET_INIT(page_owner_pid, "page_owner", "pid");
> + }
> +}
> +
> +static void dump_page_owner(struct meminfo *mi, ulong pp, physaddr_t phys)
> +{
> + ulong pfn, page_ext_addr, page_owner_addr, page_ext;
> + long page_ext_owner, page_ext_owner_allocated;
> + char *page_owner;
> +
> + pfn = BTOP(phys);
> + if (!lookup_page_ext(pfn, pp, &page_ext_addr))
> + return;
> +
> + page_owner_addr = get_page_owner(page_ext_addr);
> + if (!page_owner_addr)
> + return;
> +
> + page_owner = (char *)GETBUF(SIZE(page_owner));
> + readmem(page_owner_addr, KVADDR, page_owner, SIZE(page_owner),
> + "page_owner", FAULT_ON_ERROR);
> +
> + enumerator_value("PAGE_EXT_OWNER", &page_ext_owner);
> + readmem(page_ext_addr, KVADDR, &page_ext, sizeof(ulong),
> + "page_ext", FAULT_ON_ERROR);
> + if (!(page_ext & (1 << page_ext_owner)))
> + goto exit;
> +
> + enumerator_value("PAGE_EXT_OWNER_ALLOCATED",
> &page_ext_owner_allocated);
> + if (mi->flags == GET_PAGE_OWNER) {
> + if (!(page_ext & (1 << page_ext_owner_allocated)) ||
> + !IS_ALIGNED(pfn, 1 << USHORT(page_owner +
> OFFSET(page_owner_order))))
> + goto exit;
> +
> + /* dump allocated page owner for current memory usage */
> + print_page_owner(pfn, pp, page_owner, TRACK_ALLOC);
> + } else {
> + if (page_ext & (1 << page_ext_owner_allocated))
> + fprintf(fp, "page_owner tracks the page 0x%lx as
> allocated\n", pp);
> + else
> + fprintf(fp, "page_owner tracks the page 0x%lx as
> freed\n", pp);
> + print_page_owner(pfn, pp, page_owner, TRACK_ALL);
> + }
> +
> +exit:
> + FREEBUF(page_owner);
> +}
> +
> static void
> dump_mem_map(struct meminfo *mi)
> {
> @@ -6161,6 +6533,19 @@ dump_mem_map(struct meminfo *mi)
> char style3[100];
> char style4[100];
>
> + if (mi->flags & GET_PAGE_OWNER) {
> + if (!page_owner_enabled()) {
> + error(INFO, "page_owner is disabled\n");
> + return;
> + }
> +
> + /* TODO: support page owner for early kernels without
> stack depot */
> + if (!stack_depot_enabled()) {
> + error(INFO, "stack_depot is disabled\n");
> + return;
> + }
> + }
> +
> if (IS_SPARSEMEM()) {
> dump_mem_map_SPARSEMEM(mi);
> return;
> @@ -6238,7 +6623,8 @@ dump_mem_map(struct meminfo *mi)
>
> switch (mi->flags)
> {
> - case ADDRESS_SPECIFIED:
> + case ADDRESS_SPECIFIED:
> + case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
> switch (mi->memtype)
> {
> case KVADDR:
> @@ -6263,6 +6649,10 @@ dump_mem_map(struct meminfo *mi)
> print_hdr = TRUE;
> break;
>
> + case GET_PAGE_OWNER:
> + print_hdr = FALSE;
> + break;
> +
> case GET_ALL:
> shared = 0;
> reserved = 0;
> @@ -6376,6 +6766,10 @@ dump_mem_map(struct meminfo *mi)
>
> /* FALLTHROUGH */
>
> + case GET_PAGE_OWNER:
> + dump_page_owner(mi, pp, phys);
> + break;
> +
> case GET_SLAB_PAGES:
> if (v22) {
> if ((flags >> v22_PG_Slab) & 1)
> @@ -6570,7 +6964,10 @@ display_members:
> break;
>
> case ADDRESS_SPECIFIED:
> + case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
> mi->retval = done;
> + if (mi->flags & GET_PAGE_OWNER)
> + dump_page_owner(mi, pp, phys);
> break;
> }
>
> @@ -19776,7 +20173,6 @@ do_slab_slub(struct meminfo *si, int verbose)
> if (is_free && (cpu_slab >= 0))
> fprintf(fp, "(cpu %d cache)", cpu_slab);
> fprintf(fp, "\n");
> -
> }
>
> return TRUE;
> --
> 2.25.1
>
1 month
Revert "arm64: section_size_bits compatible with macro definitions"
by Guanyou Chen
Hi tao
crash load 5.12 before vmcore, parse section_size_bits equals 27,
but it should be equals 30. because defined "CONFIG_ARM64_4K_PAGES",
The commit "568c6f04" for compat android gki, so we should cleanup.
Exp:
Before:
crash vmcore vmlinux -d1
...
xtime timespec.tv_sec: 603549d0: Wed Feb 24 02:30:40 CST 2021
utsname:
sysname: Linux
nodename: localhost
release: 4.14.180-perf-g4483caa8ae80-dirty
version: #1 SMP PREEMPT Wed Feb 24 03:16:01 CST 2021
machine: aarch64
domainname: localdomain
...
SECTION_SIZE_BITS: 27
...
After:
crash vmcore vmlinux -d1
...
xtime timespec.tv_sec: 603549d0: Wed Feb 24 02:30:40 CST 2021
utsname:
sysname: Linux
nodename: localhost
release: 4.14.180-perf-g4483caa8ae80-dirty
version: #1 SMP PREEMPT Wed Feb 24 03:16:01 CST 2021
machine: aarch64
domainname: localdomain
...
SECTION_SIZE_BITS: 30
...
diff --git a/arm64.c b/arm64.c
index 06e7451..05ffdb3 100644
--- a/arm64.c
+++ b/arm64.c
@@ -1698,14 +1698,7 @@ arm64_get_section_size_bits(void)
if ((ret = get_kernel_config("CONFIG_MEMORY_HOTPLUG",
NULL)) == IKCONFIG_Y) {
if ((ret =
get_kernel_config("CONFIG_HOTPLUG_SIZE_BITS", &string)) == IKCONFIG_STR)
machdep->section_size_bits = atol(string);
- }
-
- /* arm64: reduce section size for sparsemem */
- if ((ret = get_kernel_config("CONFIG_ARM64_4K_PAGES",
NULL)) == IKCONFIG_Y
- || (ret =
get_kernel_config("CONFIG_ARM64_16K_PAGES", NULL)) == IKCONFIG_Y)
- machdep->section_size_bits =
_SECTION_SIZE_BITS_5_12;
- else if ((ret = get_kernel_config("CONFIG_ARM64_64K_PAGES",
NULL)) == IKCONFIG_Y)
- machdep->section_size_bits =
_SECTION_SIZE_BITS_5_12_64K;
+ }
}
if (CRASHDEBUG(1))
Thanks,
Guanyou.
1 month, 2 weeks
[PATCH v2 0/2] ppc64: A few bug fixes for 6.x kernel
by Tao Liu
There are 3 bugs, which are caused due to newer kernel change, they were
found when testing the gdb stack unwinding support[1] for ppc64 against the
6.x kernels. See the discussion in [2][3]. So they'd better to be fixed
in a batch.
The 3 bugs are:
1) STACK_SWITCH_FRAME_REGS replaced STACK_FRAME_OVERHEAD for determine
the register values offset within stack. Fixed in patch 1/2.
2) EXCP_FRAME_MARKER outdated for determine the eframe stack. Fixed in
patch 2/2.
3) inline functions not inlined in kernel. Should be fixed in kernel
side rather than crash, unhandled currently.
By the way, currently the gdb stack unwinding support requires the
following patchsets to be merged first:
1. This patchset;
2. [Crash-utility] [PATCH v2] Fix the regression of cpumask_t for xen
hyper;
3. Patch for the No.3 bug, if we want to test against the 6.X el10 kernel
against gdb stack unwinding. Since the patch is not ready, 5.X el9 kernel
can be tested instead.
[1]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01079.html
[2]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01116.html
[3]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01126.html
v1 -> v2:
1) Get rid of instruction disassembly to determine if abi_v2 enabled.
2) Updated kernel commit hash into the crash patches.
Tao Liu (2):
ppc64: Fix bt printing error stack trace
ppc64: fix the bug eframe won't print for newer kernel
defs.h | 9 +++++--
netdump.c | 14 +++++++----
ppc64.c | 71 +++++++++++++++++++++++++++++++++++++++++++++----------
symbols.c | 5 ++--
4 files changed, 78 insertions(+), 21 deletions(-)
--
2.46.2
1 month, 2 weeks
Re: [PATCH v1 1/3] ppc64: Fix bt printing error stack trace
by lijiang
Hi, Tao
Thank you for the fix.
On Wed, Sep 18, 2024 at 7:44 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Wed, 18 Sep 2024 11:42:03 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v1 1/3] ppc64: Fix bt printing error
> stack trace
> To: devel(a)lists.crash-utility.osci.io
> Cc: adityag(a)linux.ibm.com, Tao Liu <ltao(a)redhat.com>
> Message-ID: <20240917234205.7783-2-ltao(a)redhat.com>
> Content-Type: text/plain; charset="US-ASCII"; x-default=true
>
> A error stack trace of bt cmd observed:
>
> crash> bt 1
> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
> #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable)
> #1 [c000000003773770] __switch_to at c00000000001fa9c
> #2 [c0000000037737d0] __schedule at c00000000112e4ec
> #3 [c0000000037738b0] schedule at c00000000112ea80
> ...
>
> The #0 stack trace is incorrect, the function address shouldn't exceed
> _end.
> The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from
> STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For
> CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will
> read a
> wrong value from stack when ABI_V2 enabled.
>
>
Can you help to add the related kernel commits to patch log? That will help
me a lot to review the patches.
> To determine if ABI_V2 enabled is tricky. This patch do it by check the
> following:
>
Can you try to read the value of e_flags from the elf header and to
determine what ABI version it is?
Thanks
Lianbo
>
> In arch/powerpc/include/asm/ppc_asm.h:
> #ifdef CONFIG_PPC64_ELF_ABI_V2
> #define STK_GOT 24
> #else
> #define STK_GOT 40
>
> In arch/powerpc/kernel/tm.S:
> _GLOBAL(tm_reclaim)
> mfcr r5
> mflr r0
> stw r5, 8(r1)
> std r0, 16(r1)
> std r2, STK_GOT(r1)
> ...
>
> So a disassemble on tm_reclaim, and extract the STK_GOT value from std
> instruction is used as the approach.
>
> After the patch:
> crash> bt 1
> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
> #0 [c0000000037737d0] __schedule at c00000000112e4ec
> #1 [c0000000037738b0] schedule at c00000000112ea80
> ...
>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> defs.h | 1 +
> ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 2231cb6..d5cb8cc 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t {
> #define MSR_PR_LG 14 /* Problem State / Privilege Level */
> /* Used to find the user or kernel-mode
> frame*/
>
> +#define STACK_SWITCH_FRAME_REGS 48
> #define STACK_FRAME_OVERHEAD 112
> #define EXCP_FRAME_MARKER 0x7265677368657265
>
> diff --git a/ppc64.c b/ppc64.c
> index e8930a1..6e5f155 100644
> --- a/ppc64.c
> +++ b/ppc64.c
> @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud);
> static ulong pmd_page_vaddr_l4(ulong pmd);
> static int is_opal_context(ulong sp, ulong nip);
> void opalmsg(void);
> +static bool is_ppc64_elf_abi_v2(void);
>
> static int is_opal_context(ulong sp, ulong nip)
> {
> @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task)
> return sp;
> }
>
> +static bool
> +is_ppc64_elf_abi_v2(void)
> +{
> + char buf1[BUFSIZE];
> + char *pos1, *pos2;
> + int errflag = 0;
> + ulong stk_got = 0;
> + static bool ret = false;
> + static bool checked = false;
> +
> + if (checked == true || !symbol_exists("tm_reclaim"))
> + return ret;
> +
> + sprintf(buf1, "x/16i tm_reclaim");
> + open_tmpfile();
> + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR))
> + goto out;
> + checked = true;
> + rewind(pc->tmpfile);
> + while (fgets(buf1, BUFSIZE, pc->tmpfile)) {
> + // "std r2, STK_GOT(r1)" is expected
> + if (strstr(buf1, "std") &&
> + strstr(buf1, "(r1)") &&
> + (pos1 = strstr(buf1, "r2,"))) {
> + pos1 += strlen("r2,");
> + for (pos2 = pos1; *pos2 != '\0' && *pos2 != '(';
> pos2++);
> + *pos2 = '\0';
> + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET,
> &errflag);
> + break;
> + }
> + }
> +
> + if (!errflag) {
> + switch (stk_got) {
> + case 24:
> + ret = true;
> + case 40:
> + goto out;
> + }
> + }
> + error(WARNING, "Unstable elf_abi v1/v2 detection.\n");
> +out:
> + close_tmpfile();
> + return ret;
> +}
>
> /*
> * get the SP and PC values for idle tasks.
> @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc,
> ulong *getsp)
> sp = ppc64_get_sp(task);
> if (!INSTACK(sp, bt))
> goto out;
> - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
> - sizeof(struct ppc64_pt_regs),
> - "PPC64 pt_regs", FAULT_ON_ERROR);
> +
> + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) {
> + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s,
> + sizeof(struct ppc64_pt_regs),
> + "PPC64 pt_regs", FAULT_ON_ERROR);
> + } else {
> + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
> + sizeof(struct ppc64_pt_regs),
> + "PPC64 pt_regs", FAULT_ON_ERROR);
> + }
> +
> ip = regs.nip;
> closest = closest_symbol(ip);
> if (STREQ(closest, ".__switch_to") || STREQ(closest,
> "__switch_to")) {
> --
> 2.40.1
>
1 month, 3 weeks