[PATCH] Fix the "ps -m" command shows wrong duration of RU task
by Kenneth Yin
The RU/TASK_RUNNING stat means the task is runnable.
It is either currently running or on a run queue waiting to run.
Currently, the crash tool uses the "rq_clock - sched_info->last_arrival" formula to
calculate the duration of task in RU state. This is for the scenario of a task running on a CPU.
But for the scenario of a task waiting in the CPU run queue (due to some reason
for example cfs/rt queue throttled), this formula could cause misunderstanding.
For example:
[ 220 10:36:38.026] [RU] PID: 12345 TASK: ffff8d674ab6b180 CPU: 1 COMMAND: "task"
Looking closer:
crash> rq.clock ffff8de438a5acc0
clock = 87029229985307234,
crash> task -R sched_info,se.exec_start
PID: 12345 TASK: ffff8d674ab6b180 CPU: 1 COMMAND: "task"
sched_info = {
pcount = 33,
run_delay = 0,
last_arrival = 67983031958439673,
last_queued = 87029224561119369
},
se.exec_start = 67983031958476937,
67983031 67983031 87029224 87029229
|<- running on CPU ->| <- IN ->|<- waiting in queue ->|
For this scenario, the "task" was waiting in the run queue of the CPU only for 5 seconds,
we should use the "rq_clock - sched_info->last_queued" formula.
We can trust sched_info->last_queued as it is only set when the task enters the CPU run queue.
Furthermore, when the task hits/runs on a CPU or dequeues the CPU run queue, it will be reset to 0.
Therefore, my idea is simple:
If a task in RU stat and sched_info->last_queued has value (!= 0),
it means this task is waiting in the run queue, use "rq_clock - sched_info->last_queued".
Otherwise, if a task in RU stat and sched_info->last_queued = 0
and sched_info->last_arrival has value (it must be), it means this task is running on the CPU,
use "rq_clock - sched_info->last_arrival".
Signed-off-by: Kenneth Yin <kyin(a)redhat.com>
---
defs.h | 1 +
symbols.c | 2 ++
task.c | 21 +++++++++++++++------
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/defs.h b/defs.h
index 4cf169c..66f5ce4 100644
--- a/defs.h
+++ b/defs.h
@@ -1787,6 +1787,7 @@ struct offset_table { /* stash of commonly-used offsets */
long vcpu_struct_rq;
long task_struct_sched_info;
long sched_info_last_arrival;
+ long sched_info_last_queued;
long page_objects;
long kmem_cache_oo;
long char_device_struct_cdev;
diff --git a/symbols.c b/symbols.c
index e30fafe..fb5035f 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9930,6 +9930,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sched_rt_entity_run_list));
fprintf(fp, " sched_info_last_arrival: %ld\n",
OFFSET(sched_info_last_arrival));
+ fprintf(fp, " sched_info_last_queued: %ld\n",
+ OFFSET(sched_info_last_queued));
fprintf(fp, " task_struct_thread_info: %ld\n",
OFFSET(task_struct_thread_info));
fprintf(fp, " task_struct_stack: %ld\n",
diff --git a/task.c b/task.c
index 3bafe79..f5386ac 100644
--- a/task.c
+++ b/task.c
@@ -332,9 +332,12 @@ task_init(void)
MEMBER_OFFSET_INIT(task_struct_last_run, "task_struct", "last_run");
MEMBER_OFFSET_INIT(task_struct_timestamp, "task_struct", "timestamp");
MEMBER_OFFSET_INIT(task_struct_sched_info, "task_struct", "sched_info");
- if (VALID_MEMBER(task_struct_sched_info))
+ if (VALID_MEMBER(task_struct_sched_info)) {
MEMBER_OFFSET_INIT(sched_info_last_arrival,
"sched_info", "last_arrival");
+ MEMBER_OFFSET_INIT(sched_info_last_queued,
+ "sched_info", "last_queued");
+ }
if (VALID_MEMBER(task_struct_last_run) ||
VALID_MEMBER(task_struct_timestamp) ||
VALID_MEMBER(sched_info_last_arrival)) {
@@ -6035,7 +6038,7 @@ ulonglong
task_last_run(ulong task)
{
ulong last_run;
- ulonglong timestamp;
+ ulonglong timestamp,last_queued;
timestamp = 0;
fill_task_struct(task);
@@ -6047,10 +6050,16 @@ task_last_run(ulong task)
} else if (VALID_MEMBER(task_struct_timestamp))
timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
OFFSET(task_struct_timestamp)) : 0;
- else if (VALID_MEMBER(sched_info_last_arrival))
- timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
- OFFSET(task_struct_sched_info) +
- OFFSET(sched_info_last_arrival)) : 0;
+ else if (VALID_MEMBER(sched_info_last_queued))
+ last_queued = ULONGLONG(tt->task_struct +
+ OFFSET(task_struct_sched_info) +
+ OFFSET(sched_info_last_queued));
+ if (last_queued != 0) {
+ timestamp = tt->last_task_read ? last_queued : 0;
+ } else if (VALID_MEMBER(sched_info_last_arrival))
+ timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
+ OFFSET(task_struct_sched_info) +
+ OFFSET(sched_info_last_arrival)) : 0;
return timestamp;
}
--
2.31.1
2 days, 17 hours
[PATCH] vmware_guestdump: Version 7 support
by Alexey Makhalov
ESXi 9.0 updated debug.guest format. CPU architecture type was
introduced and several fields of the header not used by the crash
were moved around. It is version 7 now.
Make corresponding changes in debug.guest parser and keep it
backward compatible with older versions.
Fix comment and log messages typos as well.
Signed-off-by: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
---
vmware_guestdump.c | 48 ++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 44 insertions(+), 4 deletions(-)
diff --git a/vmware_guestdump.c b/vmware_guestdump.c
index 78f37fb..1a6ef9b 100644
--- a/vmware_guestdump.c
+++ b/vmware_guestdump.c
@@ -30,6 +30,7 @@
* 2. Number of Virtual CPUs (4 bytes) } - struct guestdumpheader
* 3. Reserved gap
* 4. Main Memory information - struct mainmeminfo{,_old}
+ * 5. Reserved gap #2. Only in v7+
* (use get_vcpus_offset() to get total size of guestdumpheader)
* vcpus_offset: ---------\
* 1. struct vcpu_state1 \
@@ -111,6 +112,22 @@ struct vcpu_state2 {
uint8_t reserved3[65];
} __attribute__((packed));
+typedef enum {
+ CPU_ARCH_AARCH64,
+ CPU_ARCH_X86,
+} cpu_arch;
+
+/*
+ * Returns the size of reserved gap #2 in the header right after the Main Mem.
+ */
+static inline long
+get_gap2_size(uint32_t version)
+{
+ if (version == 7)
+ return 11;
+ return 0;
+}
+
/*
* Returns the size of the guest dump header.
*/
@@ -128,6 +145,9 @@ get_vcpus_offset(uint32_t version, int mem_holes)
return sizeof(struct guestdumpheader) + 14 + sizeof(struct mainmeminfo);
case 6: /* ESXi 8.0u2 */
return sizeof(struct guestdumpheader) + 15 + sizeof(struct mainmeminfo);
+ case 7: /* ESXi 9.0 */
+ return sizeof(struct guestdumpheader) + 8 + sizeof(struct mainmeminfo) +
+ get_gap2_size(version);
}
return 0;
@@ -155,10 +175,10 @@ get_vcpu_gapsize(uint32_t version)
*
* guestdump (debug.guest) is a simplified version of the *.vmss which does
* not contain a full VM state, but minimal guest state, such as a memory
- * layout and CPUs state, needed for debugger. is_vmware_guestdump()
+ * layout and CPUs state, needed for the debugger. is_vmware_guestdump()
* and vmware_guestdump_init() functions parse guestdump header and
* populate vmss data structure (from vmware_vmss.c). In result, all
- * handlers (except mempry_dump) from vmware_vmss.c can be reused.
+ * handlers (except memory_dump) from vmware_vmss.c can be reused.
*
* debug.guest does not have a dedicated header magic or file format signature
* To probe debug.guest we need to perform series of validations. In addition,
@@ -225,7 +245,8 @@ is_vmware_guestdump(char *filename)
/* vcpu_offset adjustment for mem_holes is required only for version 1. */
vcpus_offset = get_vcpus_offset(hdr.version, mmi.mem_holes);
} else {
- if (fseek(fp, vcpus_offset - sizeof(struct mainmeminfo), SEEK_SET) == -1) {
+ if (fseek(fp, vcpus_offset - sizeof(struct mainmeminfo) - get_gap2_size(hdr.version),
+ SEEK_SET) == -1) {
if (CRASHDEBUG(1))
error(INFO, LOGPRX"Failed to fseek '%s': [Error %d] %s\n",
filename, errno, strerror(errno));
@@ -240,6 +261,25 @@ is_vmware_guestdump(char *filename)
fclose(fp);
return FALSE;
}
+
+ /* Check CPU architecture field. Next 4 bytes after the Main Mem */
+ if (hdr.version >= 7) {
+ cpu_arch arch;
+ if (fread(&arch, sizeof(cpu_arch), 1, fp) != 1) {
+ if (CRASHDEBUG(1))
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ "CPU arch", filename, errno, strerror(errno));
+ fclose(fp);
+ return FALSE;
+ }
+ if (arch != CPU_ARCH_X86) {
+ if (CRASHDEBUG(1))
+ error(INFO,
+ LOGPRX"Invalid or unsupported CPU architecture: %d\n", arch);
+ fclose(fp);
+ return FALSE;
+ }
+ }
}
if (fseek(fp, 0L, SEEK_END) == -1) {
if (CRASHDEBUG(1))
@@ -300,7 +340,7 @@ vmware_guestdump_init(char *filename, FILE *ofp)
if (!machine_type("X86") && !machine_type("X86_64")) {
error(INFO,
- LOGPRX"Invalid or unsupported host architecture for .vmss file: %s\n",
+ LOGPRX"Invalid or unsupported host architecture for .guest file: %s\n",
MACHINE_TYPE);
result = FALSE;
goto exit;
--
2.43.5
2 weeks
[PATCH v2 0/5] gdb multi-stack unwinding support
by Tao Liu
This patchset is based on Alexy's work [1], and is the follow-up of the
previous "gdb stack unwinding support for crash utility" patchset.
Currently gdb target analyzes only one task at a time and it backtraces
only straight stack until end of the stack. If stacks were concatenated
during exceptions or interrupts, gdb bt will show only the topmost one.
This patchset will introduce multiple stacks support for gdb stack unwinding,
which can be observed as a different threads from gdb perspective. A
short usage is as follows:
'set <PID>' - to switch to a specific task
'gdb info threads' - to see list of in-kernel stacks of this task.
'gdb thread <ID>' - to switch to the stack.
'gdb bt' - to unwind it.
E.g, with the patchset:
crash> bt
PID: 17636 TASK: ffff88032e0742c0 CPU: 11 COMMAND: "kworker/11:4"
#0 [ffff88037fca6b58] machine_kexec at ffffffff8103cef2
#1 [ffff88037fca6ba8] crash_kexec at ffffffff810c9aa3
#2 [ffff88037fca6c70] panic at ffffffff815f0444
...
#9 [ffff88037fca6ec8] do_nmi at ffffffff815fd980
#10 [ffff88037fca6ef0] end_repeat_nmi at ffffffff815fcec1
[exception RIP: memcpy+13]
RIP: ffffffff812f5b1d RSP: ffff88034f2a9728 RFLAGS: 00010046
RAX: ffffc900139fe000 RBX: ffff880374b7a1b0 RCX: 0000000000000030
RBP: ffff88034f2a9778 R8: 000000007fffffff R9: 00000000ffffffff
...
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
--- <NMI exception stack> ---
#11 [ffff88034f2a9728] memcpy at ffffffff812f5b1d
#12 [ffff88034f2a9728] mga_dirty_update at ffffffffa024ad2b [mgag200]
#13 [ffff88034f2a9780] mga_imageblit at ffffffffa024ae3f [mgag200]
#14 [ffff88034f2a97a0] bit_putcs at ffffffff813424ef
...
crash> info threads
Id Target Id Frame
* 1 17636 kworker/11:4 (stack 0) crash_setup_regs (oldregs=0x0, newregs=0xffff88037fca6bb0)
2 17636 kworker/11:4 (stack 1) 0xffffffff812f5b1d in memcpy ()
crash> thread 2
crash> gdb bt
#0 0xffffffff812f5b1d in memcpy () at arch/x86/lib/memcpy_64.S:69
...
There are 2 stacks of the current task, and we can list/switch-to/unwind
each stack.
[1]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01204.html
v2 -> v1: 1) Rebase this patchset onto gdb-16.2 [2].
2) Improved the silent_call_bt() to catch the error FATAL.
[2]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01354.html
Tao Liu (5):
Add multi-threads support in crash target
Call cmd_bt silently after "set pid"
x86_64: Add gdb multi-stack unwind support
arm64: Add gdb multi-stack unwind support
ppc64: Add gdb multi-stack unwind support
arm64.c | 85 +++++++++++++++++++++++++++++++--
crash_target.c | 49 +++++++++++++++++--
defs.h | 3 +-
gdb_interface.c | 6 +--
kernel.c | 43 +++++++++++++++++
ppc64.c | 70 +++++++++++++++++++++++----
task.c | 4 +-
x86_64.c | 123 +++++++++++++++++++++++++++++++++++++++++++++---
8 files changed, 354 insertions(+), 29 deletions(-)
--
2.47.0
3 weeks, 2 days
[ANNOUNCE] crash-9.0.0 is available
by lijiang
Hi,
Thank you all for your contributions to the crash-utility, crash-9.0.0 is
now available.
Download from:
https://crash-utility.github.io/
or
https://github.com/crash-utility/crash/releases
The GitHub master branch serves as a development branch that will
contain all patches that are queued for the next release:
$ git clone https://github.com/crash-utility/crash.git
Changelog:
64f6efda29c8 ("crash-8.0.6 -> crash-9.0.0")
42fbae46441c ("Fix a regression of "help -r" which fail to print regs")
3340224deb12 ("Fix "log -c" option on Linux 6.14 and later kernels")
b982ddc4d66d ("Fix bad relocations when module sh_addr is nonzero")
2cf1a93805a8 ("Fix module section load address when sh_addr != 0")
974868ca4e64 ("RISCV64: Add more system properties to the 'mach' command")
25828e83d5f8 ("symbols: redetermine the end of kernel range for
in_ksymbol_range")
87887dbef251 ("Add a PR closer")
39ee29b71513 ("Prevent double-free strbuf when nsyms is 0")
90e1921a76f5 ("extensions/Makefile: eliminate race condition")
efb15d58faa2 ("Doc: correct spelling under "help list"")
f30f97dd8466 ("Makefile: Pass CFLAGS when building extensions")
a312f58feb05 ("Add ci-build.yml")
1248bfa80602 ("Add cross compilation support")
3115791c6894 ("gdb:disable building gdbserver in crash-utility")
bc1fc6ac218c ("sparc64: fix build failure")
ca0b70ea3cbe (".gitignore: add gdb-16.2 directory")
dfb2bb55e530 ("Update to gdb 16.2")
080b4baf5d5e ("Fix the failing of cmd "runq -g" for v6.14-rc1 kernel")
2795136a5154 ("Fix the failing of cmd "files" for v6.14-rc1 kernel")
2724bb1d0260 ("Fix build failure on 32bit machine(i686)")
325a9d1b3b4c ("tools.c: do not use keywords 'nullptr' as a variable in code")
772fbb102291 ("Fix build failure in readline lib")
e72b0ab8ebad ("x86_64: Fix 'bt -S/-I' segfault issue")
0f39e33d3504 ("arm64: add pac mask to better support gdb stack unwind")
88453095a3dd ("Fix misleading CPU count in display_sys_stats()")
b39f8b558f9c ("Enhance "kmem -i[=shared]" to display(or not) shared pages")
a713368a3474 ("kmem: fix the determination of slab page due to invalid
page_type")
8eb5279fdd2e ("Fix "net -a" option on Linux 6.13 and later kernels")
7357d822124c ("Doc: add compilation requirements note in README")
69aef36ff4bc ("Fix incorrect 'bt -v' output suggesting overflow")
8a11aa07d7b0 ("x86_64: Mark #VC stack unavailable when
CONFIG_AMD_MEM_ENCRYPT is not set")
f07032991f3d ("arm64: add cpu context registers to better support gdb
stack unwind")
aa9f7248075c ("Fix for "help -r" segfault in case of ramdump")
e44a9a9d808c ("Fix infinite loop during module symbols initialization")
7c5c795b0d67 ("Mark start of 8.0.7 development phase with version 8.0.6++")
Full ChangeLog:
https://crash-utility.github.io/changelog/ChangeLog-9.0.0.txt
or
https://github.com/crash-utility/crash/compare/8.0.6...9.0.0
1 month
[PATCH] Fix a regression of "help -r" which fail to print regs
by Tao Liu
This patch fixed a regression introduced by commit aa9f724 ("Fix for "help -r"
segfault in case of ramdump"), which ignored the case which nd->nt_prstatus
may contain register notes. As a result, it fails to print the registers
of such case.
Before:
crash> help -r
CPU 0:
help: registers not collected for cpu 0
CPU 1: [OFFLINE]
After:
crash> help -r
CPU 0:
RIP: ffffffff800c4d92 RSP: ffff810066cc7d68 RFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 650001bc7e664240 RSI: 0000000000000000 RDI: 0000000000000000
RBP: 650001bc7e664240 R8: 0000000000000006 R9: ffff810069dd03d4
R10: ffff81007d942080 R11: ffffffff80154235 R12: 0000000000000000
R13: 0000000000000000 R14: 000000000001e5fe R15: 0000000000000001
CS: 0010 SS: 0018
CPU 1: [OFFLINE]
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
netdump.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/netdump.c b/netdump.c
index b67bdad..c7ff009 100644
--- a/netdump.c
+++ b/netdump.c
@@ -2768,9 +2768,10 @@ display_regs_from_elf_notes(int cpu, FILE *ofp)
}
}
- if (((cpu < 0 ) || (!nd->nt_prstatus_percpu[cpu]) ||
- (cpu - skipped_count) >= nd->num_prstatus_notes) &&
- !machine_type("MIPS")) {
+ if (((cpu < 0 ) ||
+ ((!nd->nt_prstatus_percpu[cpu]) && (!nd->nt_prstatus)) ||
+ (cpu - skipped_count) >= nd->num_prstatus_notes)
+ && !machine_type("MIPS")) {
error(INFO, "registers not collected for cpu %d\n", cpu);
return;
}
--
2.47.0
1 month
Re: [PATCH] Fix "log -c" option on Linux 6.14 and later kernels
by lijiang
Thank you for the fix, Kazu.
On Wed, Apr 23, 2025 at 1:29 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Wed, 23 Apr 2025 05:27:58 +0000
> From: HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab(a)nec.com>
> Subject: [Crash-utility] [PATCH] Fix "log -c" option on Linux 6.14 and
> later kernels
> To: "devel(a)lists.crash-utility.osci.io"
> <devel(a)lists.crash-utility.osci.io>
> Message-ID: <1745386073-6196-1-git-send-email-k-hagio-ab(a)nec.com>
> Content-Type: text/plain; charset="iso-2022-jp"
>
> From: Kazuhito Hagio <k-hagio-ab(a)nec.com>
>
> Kernel commit 7863dcc72d0f ("pid: allow pid_max to be set per pid
> namespace") moved the pid_max variable into init_pid_ns. Without the
> patch, the "log -c" option fails with the following error:
>
> crash> log -c
> log: cannot resolve: "pid_max"
>
> While it is possible to track the pid_max value to init_pid_ns.pid_max,
> considering the option's availability, it might be better not to do so
> just for the sake of printing width. Furthermore, the current
> PID_MAX_LIMIT is 4194304, which does not exceed PID_CHARS_DEFAULT(8).
>
> Signed-off-by: Kazuhito Hagio <k-hagio-ab(a)nec.com>
> ---
> printk.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
>
For the patch: Ack.
Tao, Can we include the current patch in the new release this time? Maybe
tomorrow.
Thanks
Lianbo
diff --git a/printk.c b/printk.c
> index be842a759549..95db7e607e4c 100644
> --- a/printk.c
> +++ b/printk.c
> @@ -285,8 +285,9 @@ dump_lockless_record_log(int msg_flags)
> if (msg_flags & SHOW_LOG_CALLER) {
> unsigned int pidmax;
>
> - get_symbol_data("pid_max", sizeof(pidmax), &pidmax);
> - if (pidmax <= 99999)
> + if (!try_get_symbol_data("pid_max", sizeof(pidmax),
> &pidmax))
> + m.pid_max_chars = PID_CHARS_DEFAULT;
> + else if (pidmax <= 99999)
> m.pid_max_chars = 6;
> else if (pidmax <= 999999)
> m.pid_max_chars = 7;
> --
> 2.31.1
>
1 month
[PATCH] Fix "log -c" option on Linux 6.14 and later kernels
by HAGIO KAZUHITO(萩尾 一仁)
From: Kazuhito Hagio <k-hagio-ab(a)nec.com>
Kernel commit 7863dcc72d0f ("pid: allow pid_max to be set per pid
namespace") moved the pid_max variable into init_pid_ns. Without the
patch, the "log -c" option fails with the following error:
crash> log -c
log: cannot resolve: "pid_max"
While it is possible to track the pid_max value to init_pid_ns.pid_max,
considering the option's availability, it might be better not to do so
just for the sake of printing width. Furthermore, the current
PID_MAX_LIMIT is 4194304, which does not exceed PID_CHARS_DEFAULT(8).
Signed-off-by: Kazuhito Hagio <k-hagio-ab(a)nec.com>
---
printk.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/printk.c b/printk.c
index be842a759549..95db7e607e4c 100644
--- a/printk.c
+++ b/printk.c
@@ -285,8 +285,9 @@ dump_lockless_record_log(int msg_flags)
if (msg_flags & SHOW_LOG_CALLER) {
unsigned int pidmax;
- get_symbol_data("pid_max", sizeof(pidmax), &pidmax);
- if (pidmax <= 99999)
+ if (!try_get_symbol_data("pid_max", sizeof(pidmax), &pidmax))
+ m.pid_max_chars = PID_CHARS_DEFAULT;
+ else if (pidmax <= 99999)
m.pid_max_chars = 6;
else if (pidmax <= 999999)
m.pid_max_chars = 7;
--
2.31.1
1 month
[PATCH CRASH] Fix module section load address when sh_addr != 0
by Stephen Brennan
A user reported that crash was reporting the address for certain module
variables incorrectly. I was able to track it down specifically to
variables which were located in the .data section of a kernel module.
While the "sym" command gave the correct value, printing the address of
the variable or expressions based on it with "p" would give an incorrect
value. For example, the variable "ata_dummy_port_ops" variable is
included in the .data section of libata.ko when built as a module:
$ sudo grep '\bata_dummy_port_ops\b' /proc/kallsyms
ffffffffc0a71580 d ata_dummy_port_ops [libata]
$ sudo crash /usr/lib/debug/lib/modules/$(uname -r)/vmlinux /proc/kcore
crash> sym ata_dummy_port_ops
ffffffffc0a71580 (?) ata_dummy_port_ops [libata]
crash> mod -s libata
MODULE NAME TEXT_BASE SIZE OBJECT FILE
ffffffffc0a7b640 libata ffffffffc0a47000 520192
/usr/lib/debug/lib/modules/6.12.0-0.11.8.el9uek.x86_64/kernel/drivers/ata/libata.ko.debug
crash> sym ata_dummy_port_ops
ffffffffc0a71580 (B) ata_dummy_port_ops [libata]
crash> p/x &ata_dummy_port_ops
$1 = 0xffffffffc0a6fe80
The symbol value (from kallsyms) is correct, but its address provided by
GDB is incorrect. It turns out that the .data section has an sh_addr
which is non-zero. The result of this is that calculate_load_order_6_4()
incorrectly calculates the base address for the .data section. This
patch fixes the base address which is later provided to GDB via
add-symbol-file.
The impact here is interesting. Only variables within sections that have
a non-zero sh_addr are impacted. It turns out that this is relatively
common since Linux kernel commit 22d407b164ff7 ("lib: add allocation
tagging support for memory allocation profiling"), which was merged in
6.10. That commit added an entry to the scripts/module.lds.S linker
script, without specifying a base address of zero. I believe that is the
reason for the non-zero base addresses.
I was able to verify that, in addition to the Oracle Linux kernel where
we initially noticed the issue, kernel modules on Arch Linux and Fedora
also have non-zero .data sh_addr values. This is likely the case for
most non-clang kernels since 6.10, but those were the only two distros I
checked. While my reading of the module.lds.S seems to indicate that
kernels built with CONFIG_LTO_CLANG=y should also have non-zero .data,
.bss, and .rodata section addresses, I haven't been able to reproduce
this with clang LTO kernels. Regardless, crash should properly handle
non-zero sh_addr since it exists in the real world now.
The core of the issue is that the symbol value returned by BFD includes
the sh_addr of the section containing the symbol. For example, suppose
a symbol with address 0 is located within a section with virtual address
0xa00. Then, the resulting symbol value will be 0xa00, not 0.
calculate_load_order_6_4() computes the base address of each section by
using a kallsyms symbol known to be within that section, and then
subtracting the value of the symbol from the object file. This
implicitly assumes that the section sh_addr is zero, and thus the symbol
value is just an offset. To fix the computation, add in the section base
address, to account for cases where it is non-zero.
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
---
symbols.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/symbols.c b/symbols.c
index 5adbc30..e30fafe 100644
--- a/symbols.c
+++ b/symbols.c
@@ -12808,6 +12808,7 @@ calculate_load_order_6_4(struct load_module *lm, bfd *bfd, int dynamic,
asymbol *store;
asymbol *sym;
symbol_info syminfo;
+ bfd_vma secaddr;
char *secname;
int i, t;
@@ -12860,6 +12861,7 @@ calculate_load_order_6_4(struct load_module *lm, bfd *bfd, int dynamic,
}
if (strcmp(syminfo.name, s1->name) == 0) {
secname = (char *)bfd_section_name(sym->section);
+ secaddr = bfd_section_vma(sym->section);
break;
}
@@ -12890,14 +12892,14 @@ calculate_load_order_6_4(struct load_module *lm, bfd *bfd, int dynamic,
}
/* Update the offset information for the section */
- sec_start = s1->value - syminfo.value;
+ sec_start = s1->value - syminfo.value + secaddr;
/* keep the address instead of offset */
lm->mod_section_data[i].addr = sec_start;
lm->mod_section_data[i].flags |= SEC_FOUND;
if (CRASHDEBUG(2))
- fprintf(fp, "update sec offset sym %s @ %lx val %lx section %s\n",
- s1->name, s1->value, (ulong)syminfo.value, secname);
+ fprintf(fp, "update sec offset sym %s @ %lx val %lx section %s @ %lx\n",
+ s1->name, s1->value, (ulong)syminfo.value, secname, secaddr);
if (strcmp(secname, ".text") == 0)
lm->mod_text_start = sec_start;
--
2.43.5
1 month, 1 week
Re: [PATCH] RISCV64: Add more system properties to the 'mach' command
by lijiang
On Wed, Apr 2, 2025 at 10:24 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Wed, 2 Apr 2025 15:22:41 +1300
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] Re: [PATCH] RISCV64: Add more system
> properties to the 'mach' command
> To: Austin Kim <austindh.kim(a)gmail.com>
> Cc: devel(a)lists.crash-utility.osci.io
> Message-ID:
> <
> CAO7dBbWhevdJ8bBBnLp265-PuNjwVgq_SMyP+2ojnO0X4jEUpg(a)mail.gmail.com>
> Content-Type: text/plain; charset="UTF-8"
>
> Hi Austin,
>
> On Tue, Apr 1, 2025 at 3:07 PM Austin Kim <austindh.kim(a)gmail.com> wrote:
> >
> > Currently, 'mach' command displays only basic system properties for
> > RISC-V-based vmcores. This commit enhances the mach command by adding
> > additional system details, including virtual memory addresses, IRQ
> stacks,
> > and overflow stacks.
> >
> > (before)
> > crash> mach
> > MACHINE TYPE: riscv64
> > MEMORY SIZE: 4 GB
> > CPUS: 4
> > PROCESSOR SPEED: (unknown)
> > HZ: 100
> > PAGE SIZE: 4096
> > KERNEL STACK SIZE: 16384
> >
> > (after)
> > crash> mach
> > MACHINE TYPE: riscv64
> > MEMORY SIZE: 4 GB
> > CPUS: 4
> > PROCESSOR SPEED: (unknown)
> > HZ: 100
> > PAGE SIZE: 4096
> > KERNEL VIRTUAL BASE: ffffffd800000000
> > KERNEL MODULES BASE: ffffffff01d08000
> > KERNEL VMALLOC BASE: ffffffc800000000
> > KERNEL VMEMMAP BASE: ffffffc700000000
> > KERNEL STACK SIZE: 16384
> > IRQ STACK SIZE: 16384
> > IRQ STACKS:
> > CPU 0: ffffffc800000000
> > CPU 1: ffffffc800008000
> > CPU 2: ffffffc800010000
> > CPU 3: ffffffc800018000
> > OVERFLOW STACK SIZE: 4096
> > OVERFLOW STACKS:
> > CPU 0: ffffffd8fc7433c0
> > CPU 1: ffffffd8fc75f3c0
> > CPU 2: ffffffd8fc77b3c0
> > CPU 3: ffffffd8fc7973c0
> >
> > Signed-off-by: Austin Kim <austindh.kim(a)gmail.com>
>
> The patch LGTM, so ack.
Applied:
https://github.com/crash-utility/crash/commit/974868ca4e64f2cbb70976538db...
Thanks
Lianbo
> Thanks,
> Tao Liu
>
1 month, 1 week
[PATCH] RISCV64: Add more system properties to the 'mach' command
by Austin Kim
Currently, 'mach' command displays only basic system properties for
RISC-V-based vmcores. This commit enhances the mach command by adding
additional system details, including virtual memory addresses, IRQ stacks,
and overflow stacks.
(before)
crash> mach
MACHINE TYPE: riscv64
MEMORY SIZE: 4 GB
CPUS: 4
PROCESSOR SPEED: (unknown)
HZ: 100
PAGE SIZE: 4096
KERNEL STACK SIZE: 16384
(after)
crash> mach
MACHINE TYPE: riscv64
MEMORY SIZE: 4 GB
CPUS: 4
PROCESSOR SPEED: (unknown)
HZ: 100
PAGE SIZE: 4096
KERNEL VIRTUAL BASE: ffffffd800000000
KERNEL MODULES BASE: ffffffff01d08000
KERNEL VMALLOC BASE: ffffffc800000000
KERNEL VMEMMAP BASE: ffffffc700000000
KERNEL STACK SIZE: 16384
IRQ STACK SIZE: 16384
IRQ STACKS:
CPU 0: ffffffc800000000
CPU 1: ffffffc800008000
CPU 2: ffffffc800010000
CPU 3: ffffffc800018000
OVERFLOW STACK SIZE: 4096
OVERFLOW STACKS:
CPU 0: ffffffd8fc7433c0
CPU 1: ffffffd8fc75f3c0
CPU 2: ffffffd8fc77b3c0
CPU 3: ffffffd8fc7973c0
Signed-off-by: Austin Kim <austindh.kim(a)gmail.com>
---
riscv64.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/riscv64.c b/riscv64.c
index 98bf02a..d934b22 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -135,6 +135,7 @@ static void riscv64_get_struct_page_size(struct machine_specific *ms)
static void
riscv64_display_machine_stats(void)
{
+ int i, pad;
struct new_utsname *uts;
char buf[BUFSIZE];
ulong mhz;
@@ -151,7 +152,31 @@ riscv64_display_machine_stats(void)
fprintf(fp, "(unknown)\n");
fprintf(fp, " HZ: %d\n", machdep->hz);
fprintf(fp, " PAGE SIZE: %d\n", PAGESIZE());
+ fprintf(fp, "KERNEL VIRTUAL BASE: %lx\n", machdep->machspec->page_offset);
+ fprintf(fp, "KERNEL MODULES BASE: %lx\n", machdep->machspec->modules_vaddr);
+ fprintf(fp, "KERNEL VMALLOC BASE: %lx\n", machdep->machspec->vmalloc_start_addr);
+ fprintf(fp, "KERNEL VMEMMAP BASE: %lx\n", machdep->machspec->vmemmap_vaddr);
fprintf(fp, " KERNEL STACK SIZE: %ld\n", STACKSIZE());
+ if (machdep->machspec->irq_stack_size) {
+ fprintf(fp, " IRQ STACK SIZE: %ld\n",
+ machdep->machspec->irq_stack_size);
+ fprintf(fp, " IRQ STACKS:\n");
+ for (i = 0; i < kt->cpus; i++) {
+ pad = (i < 10) ? 3 : (i < 100) ? 2 : (i < 1000) ? 1 : 0;
+ fprintf(fp, "%s CPU %d: %lx\n", space(pad), i,
+ machdep->machspec->irq_stacks[i]);
+ }
+ }
+ if (machdep->machspec->overflow_stack_size) {
+ fprintf(fp, "OVERFLOW STACK SIZE: %ld\n",
+ machdep->machspec->overflow_stack_size);
+ fprintf(fp, " OVERFLOW STACKS:\n");
+ for (i = 0; i < kt->cpus; i++) {
+ pad = (i < 10) ? 3 : (i < 100) ? 2 : (i < 1000) ? 1 : 0;
+ fprintf(fp, "%s CPU %d: %lx\n", space(pad), i,
+ machdep->machspec->overflow_stacks[i]);
+ }
+ }
}
static void
--
2.34.1
1 month, 1 week