[PATCH] vmcoreinfo: read vmcoreinfo using 'vmcoreinfo_data' when unavailable in elf note
by Aditya Gupta
Few vmcores don't have vmcoreinfo elf note, such as those created using
virsh-dump.
On architectures such as PowerPC64, vmcoreinfo is mandatory to fetch the
first_vmalloc_address, for vmcores of upstream linux, since crash-utility commit:
commit 5b24e363a898 ("get vmalloc start address from vmcoreinfo")
Try reading from the 'vmcoreinfo_data' symbol instead, if the vmcoreinfo
crash tries to read in case of diskdump/netdump is empty/missing.
The approach to read 'vmcoreinfo_data' was used for a live kernel, which can be
reused in the case of missing vmcoreinfo note also, as the
'vmcoreinfo_data' symbol is available with vmcore too
Note though, till GDB interface is not initialised, reading from
vmcoreinfo_data symbol is not done, so behaviour is same as previously
with no vmcoreinfo (only till GDB interface is not initialised)
Hence rename 'vmcoreinfo_read_string' in kernel.c to
'vmcoreinfo_read_from_memory', and use it in netdump.c and diskdump.c
too.
Reported-by: Anushree Mathur <anushree.mathur(a)linux.ibm.com>
Reported-by: Kowshik Jois <kowsjois(a)linux.ibm.com>
Tested-by: Anushree Mathur <anushree.mathur(a)linux.ibm.com>
Tested-by: Kowshik Jois <kowsjois(a)linux.ibm.com>
Signed-off-by: Aditya Gupta <adityag(a)linux.ibm.com>
---
defs.h | 1 +
diskdump.c | 18 ++++++++++++++++++
kernel.c | 17 ++++++++++++-----
netdump.c | 19 +++++++++++++++++++
4 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/defs.h b/defs.h
index 2fdb4db56a05..fbd09e19103f 100644
--- a/defs.h
+++ b/defs.h
@@ -6213,6 +6213,7 @@ void dump_kernel_table(int);
void dump_bt_info(struct bt_info *, char *where);
void dump_log(int);
void parse_kernel_version(char *);
+char *vmcoreinfo_read_from_memory(const char *);
#define LOG_LEVEL(v) ((v) & 0x07)
#define SHOW_LOG_LEVEL (0x1)
diff --git a/diskdump.c b/diskdump.c
index ce3cbb7b12dd..3be56248c7a9 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -1041,6 +1041,13 @@ pfn_to_pos(ulong pfn)
return desc_pos;
}
+/**
+ * Check if vmcoreinfo in vmcore is missing/empty
+ */
+static bool is_vmcoreinfo_empty(void)
+{
+ return (dd->sub_header_kdump->size_vmcoreinfo == 0);
+}
/*
* Determine whether a file is a diskdump creation, and if TRUE,
@@ -1088,6 +1095,17 @@ is_diskdump(char *file)
pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ /*
+ * vmcoreinfo can be empty in case of dump collected via virsh-dump
+ *
+ * check if vmcoreinfo is not available in vmcore, and try to read
+ * the vmcoreinfo from memory, using "vmcoreinfo_data" symbol
+ */
+ if (is_vmcoreinfo_empty()) {
+ error(WARNING, "vmcoreinfo is empty, will read from symbols\n");
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
+ }
+
if ((pc->flags2 & GET_LOG) && KDUMP_CMPRS_VALID()) {
pc->dfd = dd->dfd;
pc->readmem = read_diskdump;
diff --git a/kernel.c b/kernel.c
index b8d3b7999974..b296487ea036 100644
--- a/kernel.c
+++ b/kernel.c
@@ -99,7 +99,6 @@ static ulong dump_audit_skb_queue(ulong);
static ulong __dump_audit(char *);
static void dump_audit(void);
static void dump_printk_safe_seq_buf(int);
-static char *vmcoreinfo_read_string(const char *);
static void check_vmcoreinfo(void);
static int is_pvops_xen(void);
static int get_linux_banner_from_vmlinux(char *, size_t);
@@ -11892,8 +11891,8 @@ dump_printk_safe_seq_buf(int msg_flags)
* Returns a string (that has to be freed by the caller) that contains the
* value for key or NULL if the key has not been found.
*/
-static char *
-vmcoreinfo_read_string(const char *key)
+char *
+vmcoreinfo_read_from_memory(const char *key)
{
char *buf, *value_string, *p1, *p2;
size_t value_length;
@@ -11903,6 +11902,14 @@ vmcoreinfo_read_string(const char *key)
buf = value_string = NULL;
+ if (!(pc->flags & GDB_INIT)) {
+ /*
+ * GDB interface hasn't been initialised yet, so can't
+ * access vmcoreinfo_data
+ */
+ return NULL;
+ }
+
switch (get_symbol_type("vmcoreinfo_data", NULL, NULL))
{
case TYPE_CODE_PTR:
@@ -11958,10 +11965,10 @@ check_vmcoreinfo(void)
switch (get_symbol_type("vmcoreinfo_data", NULL, NULL))
{
case TYPE_CODE_PTR:
- pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
break;
case TYPE_CODE_ARRAY:
- pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
break;
}
}
diff --git a/netdump.c b/netdump.c
index c7ff009e7f90..c9f0e4eaa580 100644
--- a/netdump.c
+++ b/netdump.c
@@ -111,6 +111,14 @@ map_cpus_to_prstatus(void)
FREEBUF(nt_ptr);
}
+/**
+ * Check if vmcoreinfo in vmcore is missing/empty
+ */
+static bool is_vmcoreinfo_empty(void)
+{
+ return (nd->size_vmcoreinfo == 0);
+}
+
/*
* Determine whether a file is a netdump/diskdump/kdump creation,
* and if TRUE, initialize the vmcore_data structure.
@@ -464,6 +472,17 @@ is_netdump(char *file, ulong source_query)
pc->read_vmcoreinfo = vmcoreinfo_read_string;
+ /*
+ * vmcoreinfo can be empty in case of dump collected via virsh-dump
+ *
+ * check if vmcoreinfo is not available in vmcore, and try to read
+ * the vmcoreinfo from memory, using "vmcoreinfo_data" symbol
+ */
+ if (is_vmcoreinfo_empty()) {
+ error(WARNING, "vmcoreinfo is empty, will read from symbols\n");
+ pc->read_vmcoreinfo = vmcoreinfo_read_from_memory;
+ }
+
if ((source_query == KDUMP_LOCAL) &&
(pc->flags2 & GET_OSRELEASE))
kdump_get_osrelease();
--
2.49.0
8 hours, 59 minutes
[PATCH] Update requirements for building on Fedora
by Charles Haithcock
Attempting to build on Fedora fails with the following error;
$ make
TARGET: RISCV64
CRASH: 9.0.0++
GDB: 16.2
Saving 'gdb-16.2.tar.gz'
[...]
checking for the correct version of gmp.h... no
configure: error: Building GDB requires GMP 4.2+, and MPFR 3.1.0+.
Try the --with-gmp and/or --with-mpfr options to specify
their locations. If you obtained GMP and/or MPFR from a vendor
distribution package, make sure that you have installed both the libraries
and the header files. They may be located in separate packages.
make[2]: *** No targets specified and no makefile found. Stop.
crash build failed
make[1]: *** [Makefile:316: gdb_merge] Error 1
make: *** [Makefile:307: all] Error 2
Installing gmp-devel and mpfr-devel fixed this, so this patch updates the
requirements for building on Fedora.
---
README | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README b/README
index 2e34fbb..f9824c7 100644
--- a/README
+++ b/README
@@ -73,7 +73,7 @@
that is created in the top-level kernel build directory must be saved.
o Requirements for building:
- Fedora: make gcc gcc-c++ ncurses-devel zlib-devel lzo-devel snappy-devel bison wget patch texinfo libzstd-devel
+ Fedora: make gcc gcc-c++ ncurses-devel zlib-devel lzo-devel snappy-devel bison wget patch texinfo libzstd-devel gmp-devel mpfr-devel
Ubuntu/Debian: make gcc g++ libncurses-dev zlib1g-dev liblzo2-dev libsnappy-dev bison wget patch texinfo libzstd-dev
Arch Linux: make gcc ncurses zlib lzo snappy bison wget patch texinfo zstd
openSUSE: make gcc gcc-c++ ncurses-devel zlib-devel lzo-devel snappy-devel bison wget patch texinfo libzstd-devel
--
2.50.0
20 hours, 33 minutes
[PATCH] Disable DT_DEBUG lookup by GDB inside the vmcore
by Shivang Upadhyay
Crash with GDB 16.2, the following warnings are printed:
crash>
crash: page excluded: kernel virtual address: c0000000022d6098 type: "gdb_readmem_callback"
crash: page excluded: kernel virtual address: c0000000022d6098 type: "gdb_readmem_callback"
This occurs because the elf_locate_base function in GDB 16.2 attempts
to read the address of the dynamic linker runtime structure, which is
present in the .dynamic section of the executable. However, this section
may be excluded from the dump by makedumpfile.
Commit e906eaca2b1a ("Fix the issue of "page excluded" messages flooding")
attempted fix this by suppressing these warnings for regular users, but the
warnings still appear when crash is started in debug mode.
To fix this, remove the elf_locate_base call in GDB that tries to read the
.dynamic section, as this information is not useful for debugging kernel
images in either dump or live kernel scenarios.
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Lianbo Jiang <lijiang(a)redhat.com>
Signed-off-by: Shivang Upadhyay <shivangu(a)linux.ibm.com>
---
gdb-16.2.patch | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/gdb-16.2.patch b/gdb-16.2.patch
index 151e4e2..7b79cdf 100644
--- a/gdb-16.2.patch
+++ b/gdb-16.2.patch
@@ -1952,3 +1952,19 @@ exit 0
}
/* Remember the bfd indexes for the .text, .data, .bss and
+--- gdb-16.2/gdb/solib-svr4.c.orig
++++ gdb-16.2/gdb/solib-svr4.c
+@@ -742,11 +742,13 @@ elf_locate_base (void)
+ return extract_typed_address (pbuf, ptr_type);
+ }
+
++#ifndef CRASH_MERGE
+ /* Find DT_DEBUG. */
+ if (gdb_bfd_scan_elf_dyntag (DT_DEBUG, current_program_space->exec_bfd (),
+ &dyn_ptr, NULL)
+ || scan_dyntag_auxv (DT_DEBUG, &dyn_ptr, NULL))
+ return dyn_ptr;
++#endif
+
+ /* This may be a static executable. Look for the symbol
+ conventionally named _r_debug, as a last resort. */
--
2.49.0
3 days, 14 hours
[Crash-utility][PATCH] Fix crash initialization failure on LoongArch with recent GDB versions
by Ming Wang
The crash tool failed to initialize on LoongArch64 when using
GDB 16.2 (and likely other recent GDB versions that have enhanced
LoongArch support) due to the error:
"fatal error: buffer size is not enough to fit register value".
This occurs in supply_registers() because GDB now correctly
reports the size of LoongArch LASX (256-bit) vector registers
(xr0-xr31) as 32 bytes. The `regval` buffer in `crash_target.c`
was previously fixed at 16 bytes.
This patch increases the `regval` buffer size to 32 bytes to
accommodate the largest LoongArch registers reported by GDB.
This allows crash to initialize successfully.
Signed-off-by: Ming Wang <wangming01(a)loongson.cn>
---
crash_target.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crash_target.c b/crash_target.c
index 5966b7b..d93d58c 100644
--- a/crash_target.c
+++ b/crash_target.c
@@ -71,7 +71,7 @@ public:
static void supply_registers(struct regcache *regcache, int regno)
{
- gdb_byte regval[16];
+ gdb_byte regval[32];
struct gdbarch *arch = regcache->arch ();
const char *regname = gdbarch_register_name(arch, regno);
int regsize = register_size(arch, regno);
--
2.41.3
3 days, 16 hours
Re: [PATCH] Fix a regression for eppic extension on gdb-16.2
by lijiang
Hi, Tao
On Wed, Jul 9, 2025 at 1:42 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Tue, 8 Jul 2025 13:26:38 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH] Fix a regression for eppic extension
> on gdb-16.2
> To: devel(a)lists.crash-utility.osci.io
> Cc: Tao Liu <ltao(a)redhat.com>
> Message-ID: <20250708012638.97698-1-ltao(a)redhat.com>
> Content-Type: text/plain; charset="US-ASCII"; x-default=true
>
> There is a regression found when testing eppic extension on gdb-16.2
> crash:
>
> crash> cgroup
> /root/.eppic/cgroup.c : line 99 : Error: undefined variable
> 'cgroup_roots'
>
> The root cause is when doing gdb upgrading, the replacement of
> gdb_get_datatype() is incorrect:
>
> The original gdb-10.2 version:
>
> long value = SYMBOL_VALUE(expr->elts[2].symbol);
>
> The incorrect gdb-16.2 replacement:
>
> long value = value_as_long(expr->evaluate());
>
> According to gdb/tracepoint.c, the correct gdb-16.2 replacement should be:
>
> symbol *sym;
> expr::var_value_operation *vvop
> = (gdb::checked_static_cast<expr::var_value_operation *>
> (exp->op.get ()));
> sym = vvop->get_symbol ();
> long value = sym->value_longest ();
>
> Otherwise, the value_as_long() will throw an exception when trying to
> convert a struct into long, such as "cgroup_roots". The reason why this
> issue only observed on crash extensions, is the faulty code block
> triggered with "req->tcb", which is a callback for gdb_interface(), and
> the callback is used by eppic extension, but the normal crash internal
> calls
> hardly use it.
>
> After:
> crash> cgroup
> 0:/user.slice/user-1000.slice/session-2.scope
>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> gdb-16.2.patch | 29 +++++++++++++++++++++++++++++
> 1 file changed, 29 insertions(+)
>
> diff --git a/gdb-16.2.patch b/gdb-16.2.patch
> index 151e4e2..eb620f7 100644
> --- a/gdb-16.2.patch
> +++ b/gdb-16.2.patch
>
Can you help to add the gdb-16.2/gdb/symtab.c to gdb-16.2.patch, and it
looks like this:
# to all subsequent patch applications.
tar xvzmf gdb-16.2.tar.gz \
- gdb-16.2/gdb/symfile.c
+ gdb-16.2/gdb/symfile.c \
+ gdb-16.2/gdb/symtab.c
exit 0
In addition, also please add a prefix "gdb: " to patch title, E.g:
gdb: Fix a regression for eppic extension on gdb-16.2
Other changes are fine to me. So: Ack(with the above change)
Thanks
Lianbo
> @@ -1952,3 +1952,32 @@ exit 0
> }
>
> /* Remember the bfd indexes for the .text, .data, .bss and
> +--- gdb-16.2/gdb/symtab.c.orig
> ++++ gdb-16.2/gdb/symtab.c
> +@@ -7690,7 +7690,11 @@
> + console("expr->first_opcode(): OP_VAR_VALUE\n");
> + type = expr->evaluate_type()->type();
> + if (req->tcb) {
> +- long value = value_as_long(expr->evaluate());
> ++ expr::var_value_operation *vvop
> ++ =
> (gdb::checked_static_cast<expr::var_value_operation *>
> ++ (expr->op.get ()));
> ++ sym = vvop->get_symbol ();
> ++ long value = sym->value_longest ();
> + /* callback with symbol value */
> + req->typecode = TYPE_CODE(type);
> + req->tcb(EOP_VALUE, req, &value, 0, 0, 0);
> +@@ -7701,8 +7705,12 @@
> + req->length = type->length();
> + }
> + if (TYPE_CODE(type) == TYPE_CODE_ENUM) {
> ++ expr::var_value_operation *vvop
> ++ =
> (gdb::checked_static_cast<expr::var_value_operation *>
> ++ (expr->op.get ()));
> ++ sym = vvop->get_symbol ();
> + req->typecode = TYPE_CODE(type);
> +- req->value =
> value_as_long(expr->evaluate());
> ++ req->value = sym->value_longest ();
> + req->tagname = (char
> *)TYPE_TAG_NAME(type);
> + if (!req->tagname) {
> + val = expr->evaluate_type();
> --
> 2.47.0
>
1 week, 3 days
[PATCH] x86_64: filter unwanted warning message for "bt -T" cmd
by Tao Liu
After patch "x86_64: Add gdb multi-stack unwind support" applied, a
warning message is observed for "bt -T" cmd:
crash> bt -T
bt: seek error: kernel virtual address: fffffffffffffffb type: "gdb_readmem_callback"
[ffffbaebc60d6fa8] srso_return_thunk at ffffffff82246fa5
...
The root cause is, "bt -T" will set BT_TEXT_SYMBOLS_ALL for bt->flags,
and eip is set to be 0 in kernel.c:back_trace(). Later in
x86_64_low_budget_back_trace_cmd(), eip - 5, or 0xfffffffffffffffb is
used for address disassembly by gdb "x/1i 0x%lx". This address is invalid so
the warning message is output.
In fact, multi-stack unwind isn't designed for "bt -T" and eip = 0 case.
To avoid the warning message, let's simply bypass the "bt -T" case for
x86_64. Other archs(arm64/ppc64) aren't affected by the issue because
the gdb "x/1i 0x%lx" are not applied on those archs.
After apply the patch:
crash> bt -T
[ffffbaebc60d6fa8] srso_return_thunk at ffffffff82246fa5
...
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
x86_64.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/x86_64.c b/x86_64.c
index cfefe3f..d7da536 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -3636,7 +3636,8 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
level++;
}
- if (is_task_active(bt->task) && bt->flags & BT_DUMPFILE_SEARCH) {
+ if (is_task_active(bt->task) && bt->flags & BT_DUMPFILE_SEARCH &&
+ !(bt->flags & BT_TEXT_SYMBOLS_ALL)) {
if (!extra_stacks_regs[extra_stacks_idx]) {
extra_stacks_regs[extra_stacks_idx] =
(struct user_regs_bitmap_struct *)
--
2.47.0
1 week, 5 days
[PATCH] Fix a regression for eppic extension on gdb-16.2
by Tao Liu
There is a regression found when testing eppic extension on gdb-16.2
crash:
crash> cgroup
/root/.eppic/cgroup.c : line 99 : Error: undefined variable 'cgroup_roots'
The root cause is when doing gdb upgrading, the replacement of
gdb_get_datatype() is incorrect:
The original gdb-10.2 version:
long value = SYMBOL_VALUE(expr->elts[2].symbol);
The incorrect gdb-16.2 replacement:
long value = value_as_long(expr->evaluate());
According to gdb/tracepoint.c, the correct gdb-16.2 replacement should be:
symbol *sym;
expr::var_value_operation *vvop
= (gdb::checked_static_cast<expr::var_value_operation *>
(exp->op.get ()));
sym = vvop->get_symbol ();
long value = sym->value_longest ();
Otherwise, the value_as_long() will throw an exception when trying to
convert a struct into long, such as "cgroup_roots". The reason why this
issue only observed on crash extensions, is the faulty code block
triggered with "req->tcb", which is a callback for gdb_interface(), and
the callback is used by eppic extension, but the normal crash internal calls
hardly use it.
After:
crash> cgroup
0:/user.slice/user-1000.slice/session-2.scope
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
gdb-16.2.patch | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/gdb-16.2.patch b/gdb-16.2.patch
index 151e4e2..eb620f7 100644
--- a/gdb-16.2.patch
+++ b/gdb-16.2.patch
@@ -1952,3 +1952,32 @@ exit 0
}
/* Remember the bfd indexes for the .text, .data, .bss and
+--- gdb-16.2/gdb/symtab.c.orig
++++ gdb-16.2/gdb/symtab.c
+@@ -7690,7 +7690,11 @@
+ console("expr->first_opcode(): OP_VAR_VALUE\n");
+ type = expr->evaluate_type()->type();
+ if (req->tcb) {
+- long value = value_as_long(expr->evaluate());
++ expr::var_value_operation *vvop
++ = (gdb::checked_static_cast<expr::var_value_operation *>
++ (expr->op.get ()));
++ sym = vvop->get_symbol ();
++ long value = sym->value_longest ();
+ /* callback with symbol value */
+ req->typecode = TYPE_CODE(type);
+ req->tcb(EOP_VALUE, req, &value, 0, 0, 0);
+@@ -7701,8 +7705,12 @@
+ req->length = type->length();
+ }
+ if (TYPE_CODE(type) == TYPE_CODE_ENUM) {
++ expr::var_value_operation *vvop
++ = (gdb::checked_static_cast<expr::var_value_operation *>
++ (expr->op.get ()));
++ sym = vvop->get_symbol ();
+ req->typecode = TYPE_CODE(type);
+- req->value = value_as_long(expr->evaluate());
++ req->value = sym->value_longest ();
+ req->tagname = (char *)TYPE_TAG_NAME(type);
+ if (!req->tagname) {
+ val = expr->evaluate_type();
--
2.47.0
1 week, 6 days