Re: [PATCH v4 10/16] Parse stack by inactive_stack_frame priorily if the struct is valid
by lijiang
On Fri, May 31, 2024 at 5:30 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Fri, 31 May 2024 17:19:33 +0800
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v4 10/16] Parse stack by
> inactive_stack_frame priorily if the struct is valid
> To: devel(a)lists.crash-utility.osci.io
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>, "Naveen N . Rao"
> <naveen.n.rao(a)linux.vnet.ibm.com>, Lianbo Jiang <
> lijiang(a)redhat.com>,
> Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Message-ID: <20240531091939.97828-11-ltao(a)redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
> Cc: Hari Bathini <hbathini(a)linux.ibm.com>
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>
> Cc: Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
> Cc: Lianbo Jiang <lijiang(a)redhat.com>
> Cc: HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab(a)nec.com>
> Cc: Tao Liu <ltao(a)redhat.com>
> Cc: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> x86_64.c | 22 +++++++++++-----------
> 1 file changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/x86_64.c b/x86_64.c
> index 4ba0b40..54c69fd 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -6519,6 +6519,17 @@ x86_64_ORC_init(void)
> };
> struct ORC_data *orc;
>
> + MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame",
> "bp");
> + MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr,
> "inactive_task_frame", "ret_addr");
> + MEMBER_OFFSET_INIT(inactive_task_frame_r15, "inactive_task_frame",
> "r15");
> + MEMBER_OFFSET_INIT(inactive_task_frame_r14, "inactive_task_frame",
> "r14");
> + MEMBER_OFFSET_INIT(inactive_task_frame_r13, "inactive_task_frame",
> "r13");
> + MEMBER_OFFSET_INIT(inactive_task_frame_r12, "inactive_task_frame",
> "r12");
> + MEMBER_OFFSET_INIT(inactive_task_frame_flags,
> "inactive_task_frame", "flags");
> + MEMBER_OFFSET_INIT(inactive_task_frame_si, "inactive_task_frame",
> "si");
> + MEMBER_OFFSET_INIT(inactive_task_frame_di, "inactive_task_frame",
> "di");
> + MEMBER_OFFSET_INIT(inactive_task_frame_bx, "inactive_task_frame",
> "bx");
> +
> if (machdep->flags & FRAMEPOINTER)
> return;
>
> @@ -6576,17 +6587,6 @@ x86_64_ORC_init(void)
> orc->__stop_orc_unwind = symbol_value("__stop_orc_unwind");
> orc->orc_lookup = symbol_value("orc_lookup");
>
> - MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame",
> "bp");
> - MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr,
> "inactive_task_frame", "ret_addr");
> - MEMBER_OFFSET_INIT(inactive_task_frame_r15, "inactive_task_frame",
> "r15");
> - MEMBER_OFFSET_INIT(inactive_task_frame_r14, "inactive_task_frame",
> "r14");
> - MEMBER_OFFSET_INIT(inactive_task_frame_r13, "inactive_task_frame",
> "r13");
> - MEMBER_OFFSET_INIT(inactive_task_frame_r12, "inactive_task_frame",
> "r12");
> - MEMBER_OFFSET_INIT(inactive_task_frame_flags,
> "inactive_task_frame", "flags");
> - MEMBER_OFFSET_INIT(inactive_task_frame_si, "inactive_task_frame",
> "si");
> - MEMBER_OFFSET_INIT(inactive_task_frame_di, "inactive_task_frame",
> "di");
> - MEMBER_OFFSET_INIT(inactive_task_frame_bx, "inactive_task_frame",
> "bx");
> -
> orc->has_signal = MEMBER_EXISTS("orc_entry", "signal"); /* added
> at 6.3 */
> orc->has_end = MEMBER_EXISTS("orc_entry", "end"); /* removed
> at 6.4 */
>
I would suggest folding the current patch into [PATCH v4 09/16].
Thanks
Lianbo
>
> --
> 2.40.1
>
4 months
Re: [PATCH v4 07/16] Stop stack unwinding at non-kernel address
by lijiang
On Fri, May 31, 2024 at 5:30 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Fri, 31 May 2024 17:19:30 +0800
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v4 07/16] Stop stack unwinding at
> non-kernel address
> To: devel(a)lists.crash-utility.osci.io
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>, "Naveen N . Rao"
> <naveen.n.rao(a)linux.vnet.ibm.com>, Lianbo Jiang <
> lijiang(a)redhat.com>,
> Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Message-ID: <20240531091939.97828-8-ltao(a)redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> The stack unwinding is for kernel addresses only. If non-kernel address
> encountered, it is usually a user space address, or non-address value
> like a function call parameter. So stopping stack unwinding at non-kernel
> address will decrease the invalid unwind results.
>
> Before:
> crash> gdb bt
> #0 0xffffffff816a8f65 in context_switch ...
> #1 __schedule () ...
> #2 0xffffffff816a94e9 in schedule ...
> #3 0xffffffff816a86fd in schedule_hrtimeout_range_clock ...
> #4 0xffffffff816a8733 in schedule_hrtimeout_range ...
> #5 0xffffffff8124bb7e in ep_poll ...
> #6 0xffffffff8124d00d in SYSC_epoll_wait ...
> #7 SyS_epoll_wait ...
> #8 <signal handler called>
> #9 0x00007f0449407923 in ?? ()
> #10 0xffff880100000001 in ?? ()
> #11 0xffff880169b3c010 in ?? ()
> #12 0x0000000000000040 in irq_stack_union ()
> #13 0xffff880169b3c058 in ?? ()
> #14 0xffff880169b3c048 in ?? ()
> #15 0xffff880169b3c050 in ?? ()
> #16 0x0000000000000000 in ?? ()
>
> After:
> crash> gdb bt
> #0 0xffffffff816a8f65 in context_switch ...
> #1 __schedule () ...
> #2 0xffffffff816a94e9 in schedule () ...
> #3 0xffffffff816a86fd in schedule_hrtimeout_range_clock ...
> #4 0xffffffff816a8733 in schedule_hrtimeout_range ...
> #5 0xffffffff8124bb7e in ep_poll ...
> #6 0xffffffff8124d00d in SYSC_epoll_wait ...
> #7 SyS_epoll_wait ...
> #8 <signal handler called>
> #9 0x00007f0449407923 in ?? ()
>
>
It seems that there are still some non-kernel addresses that do not get
filtered. Can you help double check?
For example:
crash> gdb bt
#0 crash_setup_regs (newregs=0xffffb5bb4f197938, oldregs=0x0) at
./arch/x86/include/asm/kexec.h:114
#1 0xffffffff8e61e32e in __crash_kexec (regs=regs@entry=0x0) at
kernel/crash_core.c:122
#2 0xffffffff8e51a64d in panic (fmt=fmt@entry=0xffffffff8fa51609 "sysrq
triggered crash\n") at kernel/panic.c:366
#3 0xffffffff8ec21f86 in sysrq_handle_crash (key=<optimized out>) at
drivers/tty/sysrq.c:154
#4 0xffffffff8ec22550 in __handle_sysrq (key=<optimized out>,
check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:612
#5 0xffffffff8ec22bf5 in write_sysrq_trigger (file=<optimized out>,
buf=<optimized out>, count=2, ppos=<optimized out>) at
drivers/tty/sysrq.c:1183
#6 0xffffffff8e935ae5 in pde_write (ppos=<optimized out>, count=<optimized
out>, buf=<optimized out>, file=0xffffb5bb4f197938, pde=0xffff98338b78e0c0)
at fs/proc/inode.c:334
#7 proc_reg_write (file=0xffffb5bb4f197938, buf=0x0, count=1, ppos=0x0) at
fs/proc/inode.c:346
#8 0xffffffff8e88d382 in vfs_write (file=file@entry=0xffff98338b789200,
buf=buf@entry=0x5614d58a22c0 <error: Cannot access memory at address
0x5614d58a22c0>, count=count@entry=2, pos=pos@entry=0xffffb5bb4f197b78) at
fs/read_write.c:588
#9 0xffffffff8e88d9ff in ksys_write (fd=<optimized out>,
buf=0x5614d58a22c0 <error: Cannot access memory at address 0x5614d58a22c0>,
count=2) at fs/read_write.c:643
#10 0xffffffff8f124429 in do_syscall_x64 (nr=1, regs=0xffffb5bb4f197f58) at
arch/x86/entry/common.c:52
#11 do_syscall_64 (regs=0xffffb5bb4f197f58, nr=1) at
arch/x86/entry/common.c:83
#12 0xffffffff8f20012b in entry_SYSCALL_64 () at
arch/x86/entry/entry_64.S:121
#13 0x00007f9a147f69e0 in ?? ()
The frame #13 looks like a non-kernel address.
Thanks
Lianbo
> Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
> Cc: Hari Bathini <hbathini(a)linux.ibm.com>
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>
> Cc: Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
> Cc: Lianbo Jiang <lijiang(a)redhat.com>
> Cc: HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab(a)nec.com>
> Cc: Tao Liu <ltao(a)redhat.com>
> Cc: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> defs.h | 1 +
> gdb-10.2.patch | 26 ++++++++++++++++++++++++++
> gdb_interface.c | 6 ++++++
> 3 files changed, 33 insertions(+)
>
> diff --git a/defs.h b/defs.h
> index 012ffdc..c0e6a29 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -7902,6 +7902,7 @@ extern unsigned char *gdb_prettyprint_arrays;
> extern unsigned int *gdb_repeat_count_threshold;
> extern unsigned char *gdb_stop_print_at_null;
> extern unsigned int *gdb_output_radix;
> +int is_kvaddr(ulong);
>
> /*
> * gdb/top.c
> diff --git a/gdb-10.2.patch b/gdb-10.2.patch
> index 0bed96a..3ed40c0 100644
> --- a/gdb-10.2.patch
> +++ b/gdb-10.2.patch
> @@ -16171,3 +16171,29 @@ exit 0
> }
>
> /*
> +--- gdb-10.2/gdb/frame.c.orig
> ++++ gdb-10.2/gdb/frame.c
> +@@ -2331,6 +2331,10 @@ inside_entry_func (frame_info *this_frame)
> + This function should not contain target-dependent tests, such as
> + checking whether the program-counter is zero. */
> +
> ++#ifdef CRASH_MERGE
> ++extern "C" int is_kvaddr(ulong);
> ++#endif
> ++
> + struct frame_info *
> + get_prev_frame (struct frame_info *this_frame)
> + {
> +@@ -2353,7 +2357,11 @@ get_prev_frame (struct frame_info *this_frame)
> + get_frame_id (this_frame);
> +
> + frame_pc_p = get_frame_pc_if_available (this_frame, &frame_pc);
> +-
> ++#ifdef CRASH_MERGE
> ++ if (!is_kvaddr(frame_pc)) {
> ++ return NULL;
> ++ }
> ++#endif
> + /* tausq/2004-12-07: Dummy frames are skipped because it doesn't make
> much
> + sense to stop unwinding at a dummy frame. One place where a dummy
> + frame may have an address "inside_main_func" is on HPUX. On HPUX,
> the
> diff --git a/gdb_interface.c b/gdb_interface.c
> index b13d5fd..e76ecc6 100644
> --- a/gdb_interface.c
> +++ b/gdb_interface.c
> @@ -947,6 +947,12 @@ gdb_lookup_module_symbol(ulong addr, ulong *offset)
> }
> }
>
> +int
> +is_kvaddr(ulong addr)
> +{
> + return IS_KVADDR(addr);
> +}
> +
> /*
> * Used by gdb_interface() to catch gdb-related errors, if desired.
> */
> --
> 2.40.1
>
4 months
[PATCH v6 07/24] arm64: Fix bt command show wrong stacktrace on ramdump source
by cb126yx@126.com
If we use crash to parse ramdump(Qcom phone device) rather than vmcore.
Start command should be like: crash vmlinux --kaslr=xxx DDRCS0_0.BIN@0x0000000080000000,... --machdep vabits_actual=39
Then We will see bt command show misleading backtrace information as:
crash> bt 16930
PID: 16930 TASK: ffffff89b3eada00 CPU: 2 COMMAND: "Firebase Backgr"
#0 [ffffffc034c437f0] __switch_to at ffffffe0036832d4
#1 [ffffffc034c43850] __kvm_nvhe_$d.2314 at 6be732e004cf05a0
#2 [ffffffc034c438b0] __kvm_nvhe_$d.2314 at 86c54c6004ceff80
#3 [ffffffc034c43950] __kvm_nvhe_$d.2314 at 55d6f96003a7b120
#4 [ffffffc034c439f0] __kvm_nvhe_$d.2314 at 9ccec46003a80a64
#5 [ffffffc034c43ac0] __kvm_nvhe_$d.2314 at 8cf41e6003a945c4
#6 [ffffffc034c43b10] __kvm_nvhe_$d.2314 at a8f181e00372c818
#7 [ffffffc034c43b40] __kvm_nvhe_$d.2314 at 6dedde600372c0d0
#8 [ffffffc034c43b90] __kvm_nvhe_$d.2314 at 62cc07e00373d0ac
#9 [ffffffc034c43c00] __kvm_nvhe_$d.2314 at 72fb1de00373bedc
...
PC: 00000073f5294840 LR: 00000070d8f39ba4 SP: 00000070d4afd5d0
X29: 00000070d4afd600 X28: b4000071efcda7f0 X27: 00000070d4afe000
X26: 0000000000000000 X25: 00000070d9616000 X24: 0000000000000000
X23: 0000000000000000 X22: 0000000000000000 X21: 0000000000000000
X20: b40000728fd27520 X19: b40000728fd27550 X18: 000000702daba000
X17: 00000073f5294820 X16: 00000070d940f9d8 X15: 00000000000000bf
X14: 0000000000000000 X13: 00000070d8ad2fac X12: b40000718fce5040
X11: 0000000000000000 X10: 0000000000000070 X9: 0000000000000001
X8: 0000000000000062 X7: 0000000000000020 X6: 0000000000000000
X5: 0000000000000000 X4: 0000000000000000 X3: 0000000000000000
X2: 0000000000000002 X1: 0000000000000080 X0: b40000728fd27550
ORIG_X0: b40000728fd27550 SYSCALLNO: ffffffff PSTATE: 40001000
By checking the raw data below, will see the lr (fp+8) data show the pointer which already been replaced by PAC prefix.
crash> bt -f
PID: 16930 TASK: ffffff89b3eada00 CPU: 2 COMMAND: "Firebase Backgr"
#0 [ffffffc034c437f0] __switch_to at ffffffe0036832d4
ffffffc034c437f0: ffffffc034c43850 6be732e004cf05a4
ffffffc034c43800: ffffffe006186108 a0ed07e004cf09c4
ffffffc034c43810: ffffff8a1a340000 ffffff8a8d343c00
ffffffc034c43820: ffffff89b3eada00 ffffff8b780db540
ffffffc034c43830: ffffff89b3eada00 0000000000000000
ffffffc034c43840: 0000000000000004 712b828118484a00
#1 [ffffffc034c43850] __kvm_nvhe_$d.2314 at 6be732e004cf05a0
ffffffc034c43850: ffffffc034c438b0 86c54c6004ceff84
ffffffc034c43860: 000000708070f000 ffffffc034c43938
ffffffc034c43870: ffffff88bd822878 ffffff89b3eada00
...
So we may search the CONFIG_ARM64_PTR_AUTH_KERNEL to double check if pac mechanism has been enabled on this ramdump.
Luckily, lijiang and tao give some valuable suggestion:
As IKCONFIG may not available in most distributions.
So we could check if the "struct ptrauth_keys_kernel" has already been embeded into arm's thread structure.
struct thread_struct {
struct cpu_context cpu_context; /* cpu context */
...
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
struct ptrauth_keys_kernel keys_kernel;
#endif
...
};
PAC feature should be enabled when "struct ptrauth_keys_kernel" exist, and we can use vabits to figure out pac bitmask.
The fix backtrace is shown below:
crash> bt 16930
PID: 16930 TASK: ffffff89b3eada00 CPU: 2 COMMAND: "Firebase Backgr"
#0 [ffffffc034c437f0] __switch_to at ffffffe0036832d4
#1 [ffffffc034c43850] __schedule at ffffffe004cf05a0
#2 [ffffffc034c438b0] preempt_schedule_common at ffffffe004ceff80
#3 [ffffffc034c43950] unmap_page_range at ffffffe003a7b120
#4 [ffffffc034c439f0] unmap_vmas at ffffffe003a80a64
#5 [ffffffc034c43ac0] exit_mmap at ffffffe003a945c4
#6 [ffffffc034c43b10] __mmput at ffffffe00372c818
#7 [ffffffc034c43b40] mmput at ffffffe00372c0d0
#8 [ffffffc034c43b90] exit_mm at ffffffe00373d0ac
#9 [ffffffc034c43c00] do_exit at ffffffe00373bedc
PC: 00000073f5294840 LR: 00000070d8f39ba4 SP: 00000070d4afd5d0
X29: 00000070d4afd600 X28: b4000071efcda7f0 X27: 00000070d4afe000
X26: 0000000000000000 X25: 00000070d9616000 X24: 0000000000000000
X23: 0000000000000000 X22: 0000000000000000 X21: 0000000000000000
X20: b40000728fd27520 X19: b40000728fd27550 X18: 000000702daba000
X17: 00000073f5294820 X16: 00000070d940f9d8 X15: 00000000000000bf
X14: 0000000000000000 X13: 00000070d8ad2fac X12: b40000718fce5040
X11: 0000000000000000 X10: 0000000000000070 X9: 0000000000000001
X8: 0000000000000062 X7: 0000000000000020 X6: 0000000000000000
X5: 0000000000000000 X4: 0000000000000000 X3: 0000000000000000
X2: 0000000000000002 X1: 0000000000000080 X0: b40000728fd27550
ORIG_X0: b40000728fd27550 SYSCALLNO: ffffffff PSTATE: 40001000
Here is gki's PAC related original commit:
Link: https://lore.kernel.org/all/1584090304-18043-12-git-send-email-amit.kachh...
Link: https://cs.android.com/android/_/android/kernel/common/+/689eae42afd7a916...
Signed-off-by: bevis_chen <bevis_chen(a)asus.com>
---
arm64.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/arm64.c b/arm64.c
index b3040d7..50dd458 100644
--- a/arm64.c
+++ b/arm64.c
@@ -92,6 +92,7 @@ static void arm64_get_crash_notes(void);
static void arm64_calc_VA_BITS(void);
static int arm64_is_uvaddr(ulong, struct task_context *);
static void arm64_calc_KERNELPACMASK(void);
+static void arm64_recalc_KERNELPACMASK(void);
static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char *label, int base);
struct kernel_range {
@@ -581,6 +582,17 @@ arm64_init(int when)
if (!machdep->hz)
machdep->hz = 100;
+ /*
+ * In the case of using ramdump rather than vmcore,
+ * will fail to parse out KERNELPAC.
+ * So we check if the "sturct ptrauth_keys_kernel" exits
+ * as a basis for whether PAC feature is enabled or not.
+ * If yes, then we use vabits to figure out pac bitmask.
+ */
+ if(!machdep->machspec->CONFIG_ARM64_KERNELPACMASK)
+ arm64_recalc_KERNELPACMASK();
+
+
arm64_irq_stack_init();
arm64_overflow_stack_init();
arm64_stackframe_init();
@@ -4921,6 +4933,21 @@ static void arm64_calc_KERNELPACMASK(void)
}
}
+
+#define GENMASK_UL(h, l) \
+ (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+
+static void arm64_recalc_KERNELPACMASK(void){
+ /* arm64: check if pac already enabled yet from related structure.*/
+ if (STRUCT_EXISTS("ptrauth_keys_kernel") && machdep->machspec->VA_BITS_ACTUAL){
+ machdep->machspec->CONFIG_ARM64_KERNELPACMASK =
+ GENMASK_UL(63, machdep->machspec->VA_BITS_ACTUAL);
+ if (CRASHDEBUG(1))
+ fprintf(fp, "CONFIG_ARM64_KERNELPACMASK: %lx\n",
+ machdep->machspec->CONFIG_ARM64_KERNELPACMASK);
+ }
+}
+
#endif /* ARM64 */
--
2.27.0
4 months
Re: [PATCH v2] arm64: fix a potential segfault when unwind frame
by lijiang
>
> Date: Wed, 24 Jul 2024 17:21:37 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] Re: [PATCH v2] arm64: fix a potential
> segfault when unwind frame
> To: qiwu.chen(a)transsion.com
> Cc: devel(a)lists.crash-utility.osci.io
> Message-ID:
> <
> CAO7dBbVRpMcyUcFumkeVUkE8qwBwb88qQhpwOVv8YbxdoCOymA(a)mail.gmail.com>
> Content-Type: text/plain; charset="UTF-8"
>
> Hi qiwu,
>
> The patch LGTM, so ack.
>
>
Applied:
https://github.com/crash-utility/crash/commit/af895b219876b293d551e6dec82...
Thanks
Lianbo
> Thanks,
> Tao Liu
>
> On Wed, Jul 24, 2024 at 1:41 PM <qiwu.chen(a)transsion.com> wrote:
> >
> > The range of frame->fp is checked insufficiently, which may lead to a
> wrong
> > next fp. As a result, bt->stackbuf will be accessed out of range, and
> segfault.
> >
> > crash> bt
> > [Detaching after fork from child process 11409]
> > PID: 7661 TASK: ffffff81858aa500 CPU: 4 COMMAND: "sh"
> > #0 [ffffffc008003f50] local_cpu_stop at ffffffdd7669444c
> >
> > Thread 1 "crash" received signal SIGSEGV, Segmentation fault.
> > 0x00005555558266cc in arm64_unwind_frame (bt=0x7fffffffd8f0,
> frame=0x7fffffffd080) at
> > arm64.c:2821
> > 2821 frame->fp = GET_STACK_ULONG(fp);
> > (gdb) bt
> > arm64.c:2821
> > out>) at main.c:1338
> > gdb_interface.c:81
> > (gdb) p /x *(struct bt_info*) 0x7fffffffd8f0
> > $3 = {task = 0xffffff81858aa500, flags = 0x0, instptr =
> 0xffffffdd76694450, stkptr =
> > 0xffffffc008003f40, bptr = 0x0, stackbase = 0xffffffc027288000,
> > stacktop = 0xffffffc02728c000, stackbuf = 0x555556115a40, tc =
> 0x55559d16fdc0, hp = 0x0,
> > textlist = 0x0, ref = 0x0, frameptr = 0xffffffc008003f50,
> > call_target = 0x0, machdep = 0x0, debug = 0x0, eframe_ip = 0x0, radix
> = 0x0, cpumask =
> > 0x0}
> > (gdb) p /x *(struct arm64_stackframe*) 0x7fffffffd080
> > $4 = {fp = 0xffffffc008003f50, sp = 0xffffffc008003f60, pc =
> 0xffffffdd76694450}
> > crash> bt -S 0xffffffc008003f50
> > PID: 7661 TASK: ffffff81858aa500 CPU: 4 COMMAND: "sh"
> > bt: non-process stack address for this task: ffffffc008003f50
> > (valid range: ffffffc027288000 - ffffffc02728c000)
> >
> > Check frame->fp value sufficiently before access it. Only frame->fp
> within
> > the range of bt->stackbase and bt->stacktop will be regarded as valid.
> >
> > Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
> > ---
> > arm64.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/arm64.c b/arm64.c
> > index b3040d7..624dba2 100644
> > --- a/arm64.c
> > +++ b/arm64.c
> > @@ -2814,7 +2814,7 @@ arm64_unwind_frame(struct bt_info *bt, struct
> arm64_stackframe *frame)
> > low = frame->sp;
> > high = (low + stack_mask) & ~(stack_mask);
> >
> > - if (fp < low || fp > high || fp & 0xf)
> > + if (fp < low || fp > high || fp & 0xf || !INSTACK(fp, bt))
> > return FALSE;
> >
> > frame->sp = fp + 0x10;
> > @@ -3024,7 +3024,7 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct
> arm64_stackframe *frame,
> > low = frame->sp;
> > high = (low + stack_mask) & ~(stack_mask);
> >
> > - if (fp < low || fp > high || fp & 0xf)
> > + if (fp < low || fp > high || fp & 0xf || !INSTACK(fp, bt))
> > return FALSE;
> >
> > if (CRASHDEBUG(1))
> > --
> > 2.25.1
>
4 months
Re: [PATCH v2] arm64: fix a potential segfault when unwind frame
by Lianbo Jiang
On 7/24/24 9:36 AM, devel-request(a)lists.crash-utility.osci.io wrote:
> Date: Wed, 24 Jul 2024 01:36:09 -0000
> From:qiwu.chen@transsion.com
> Subject: [Crash-utility] [PATCH v2] arm64: fix a potential segfault
> when unwind frame
> To:devel@lists.crash-utility.osci.io
> Message-ID:<20240724013609.28594.37360(a)lists.crash-utility.osci.io>
> Content-Type: text/plain; charset="utf-8"
>
> The range of frame->fp is checked insufficiently, which may lead to a wrong
> next fp. As a result, bt->stackbuf will be accessed out of range, and segfault.
>
> crash> bt
> [Detaching after fork from child process 11409]
> PID: 7661 TASK: ffffff81858aa500 CPU: 4 COMMAND: "sh"
> #0 [ffffffc008003f50] local_cpu_stop at ffffffdd7669444c
>
> Thread 1 "crash" received signal SIGSEGV, Segmentation fault.
> 0x00005555558266cc in arm64_unwind_frame (bt=0x7fffffffd8f0, frame=0x7fffffffd080) at
> arm64.c:2821
> 2821 frame->fp = GET_STACK_ULONG(fp);
> (gdb) bt
> arm64.c:2821
> out>) at main.c:1338
> gdb_interface.c:81
> (gdb) p /x *(struct bt_info*) 0x7fffffffd8f0
> $3 = {task = 0xffffff81858aa500, flags = 0x0, instptr = 0xffffffdd76694450, stkptr =
> 0xffffffc008003f40, bptr = 0x0, stackbase = 0xffffffc027288000,
> stacktop = 0xffffffc02728c000, stackbuf = 0x555556115a40, tc = 0x55559d16fdc0, hp = 0x0,
> textlist = 0x0, ref = 0x0, frameptr = 0xffffffc008003f50,
> call_target = 0x0, machdep = 0x0, debug = 0x0, eframe_ip = 0x0, radix = 0x0, cpumask =
> 0x0}
> (gdb) p /x *(struct arm64_stackframe*) 0x7fffffffd080
> $4 = {fp = 0xffffffc008003f50, sp = 0xffffffc008003f60, pc = 0xffffffdd76694450}
> crash> bt -S 0xffffffc008003f50
> PID: 7661 TASK: ffffff81858aa500 CPU: 4 COMMAND: "sh"
> bt: non-process stack address for this task: ffffffc008003f50
> (valid range: ffffffc027288000 - ffffffc02728c000)
>
> Check frame->fp value sufficiently before access it. Only frame->fp within
> the range of bt->stackbase and bt->stacktop will be regarded as valid.
>
> Signed-off-by: qiwu.chen<qiwu.chen(a)transsion.com>
> ---
> arm64.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arm64.c b/arm64.c
> index b3040d7..624dba2 100644
> --- a/arm64.c
> +++ b/arm64.c
> @@ -2814,7 +2814,7 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame)
> low = frame->sp;
> high = (low + stack_mask) & ~(stack_mask);
>
> - if (fp < low || fp > high || fp & 0xf)
> + if (fp < low || fp > high || fp & 0xf || !INSTACK(fp, bt))
> return FALSE;
>
> frame->sp = fp + 0x10;
> @@ -3024,7 +3024,7 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame,
> low = frame->sp;
> high = (low + stack_mask) & ~(stack_mask);
>
> - if (fp < low || fp > high || fp & 0xf)
> + if (fp < low || fp > high || fp & 0xf || !INSTACK(fp, bt))
> return FALSE;
>
Thank you for the update. This looks good, so: Ack.
Lianbo
> if (CRASHDEBUG(1))
> -- 2.25.1
4 months
[PATCH v2] arm64: fix a potential segfault when unwind frame
by qiwu.chen@transsion.com
The range of frame->fp is checked insufficiently, which may lead to a wrong
next fp. As a result, bt->stackbuf will be accessed out of range, and segfault.
crash> bt
[Detaching after fork from child process 11409]
PID: 7661 TASK: ffffff81858aa500 CPU: 4 COMMAND: "sh"
#0 [ffffffc008003f50] local_cpu_stop at ffffffdd7669444c
Thread 1 "crash" received signal SIGSEGV, Segmentation fault.
0x00005555558266cc in arm64_unwind_frame (bt=0x7fffffffd8f0, frame=0x7fffffffd080) at
arm64.c:2821
2821 frame->fp = GET_STACK_ULONG(fp);
(gdb) bt
arm64.c:2821
out>) at main.c:1338
gdb_interface.c:81
(gdb) p /x *(struct bt_info*) 0x7fffffffd8f0
$3 = {task = 0xffffff81858aa500, flags = 0x0, instptr = 0xffffffdd76694450, stkptr =
0xffffffc008003f40, bptr = 0x0, stackbase = 0xffffffc027288000,
stacktop = 0xffffffc02728c000, stackbuf = 0x555556115a40, tc = 0x55559d16fdc0, hp = 0x0,
textlist = 0x0, ref = 0x0, frameptr = 0xffffffc008003f50,
call_target = 0x0, machdep = 0x0, debug = 0x0, eframe_ip = 0x0, radix = 0x0, cpumask =
0x0}
(gdb) p /x *(struct arm64_stackframe*) 0x7fffffffd080
$4 = {fp = 0xffffffc008003f50, sp = 0xffffffc008003f60, pc = 0xffffffdd76694450}
crash> bt -S 0xffffffc008003f50
PID: 7661 TASK: ffffff81858aa500 CPU: 4 COMMAND: "sh"
bt: non-process stack address for this task: ffffffc008003f50
(valid range: ffffffc027288000 - ffffffc02728c000)
Check frame->fp value sufficiently before access it. Only frame->fp within
the range of bt->stackbase and bt->stacktop will be regarded as valid.
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
arm64.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arm64.c b/arm64.c
index b3040d7..624dba2 100644
--- a/arm64.c
+++ b/arm64.c
@@ -2814,7 +2814,7 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame)
low = frame->sp;
high = (low + stack_mask) & ~(stack_mask);
- if (fp < low || fp > high || fp & 0xf)
+ if (fp < low || fp > high || fp & 0xf || !INSTACK(fp, bt))
return FALSE;
frame->sp = fp + 0x10;
@@ -3024,7 +3024,7 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame,
low = frame->sp;
high = (low + stack_mask) & ~(stack_mask);
- if (fp < low || fp > high || fp & 0xf)
+ if (fp < low || fp > high || fp & 0xf || !INSTACK(fp, bt))
return FALSE;
if (CRASHDEBUG(1))
--
2.25.1
4 months
Re: arm64: Fix bt command show wrong stacktrace on ramdump source
by Lianbo Jiang
Hi, bevis_chen
Thank you for the v2.
On 7/11/24 6:42 AM, devel-request(a)lists.crash-utility.osci.io wrote:
> Date: Wed, 10 Jul 2024 14:19:01 -0000
> From:cb126yx@126.com
> Subject: [Crash-utility] Re: arm64: Fix bt command show wrong
> stacktrace on ramdump source
> To:devel@lists.crash-utility.osci.io
> Message-ID:<20240710141901.30295.7874(a)lists.crash-utility.osci.io>
> Content-Type: text/plain; charset="utf-8"
>
> >From dd6b187ac15a237cefe863c4e5b432cf13b9883a Mon Sep 17 00:00:00 2001
> From: bevis_chen<bevis_chen(a)asus.com>
> Date: Wed, 3 Jul 2024 15:05:44 +0800
> Subject: [PATCH] arm64: Fix bt command show wrong stacktrace on ramdump
> source
>
> If we use crash to parse ramdump(Qcom phone device) rathen than vmcore.
> Start command should be like: crash vmlinux --kaslr=xxx
> DDRCS0_0.BIN@0x0000000080000000,... --machdep vabits_actual=39
> Then We will see bt command show misleading backtrace information below:
>
> crash> bt 16930
> PID: 16930 TASK: ffffff89b3eada00 CPU: 2 COMMAND: "Firebase
> Backgr"
> #0 [ffffffc034c437f0] __switch_to at ffffffe0036832d4
> #1 [ffffffc034c43850] __kvm_nvhe_$d.2314 at 6be732e004cf05a0
> #2 [ffffffc034c438b0] __kvm_nvhe_$d.2314 at 86c54c6004ceff80
> #3 [ffffffc034c43950] __kvm_nvhe_$d.2314 at 55d6f96003a7b120
> #4 [ffffffc034c439f0] __kvm_nvhe_$d.2314 at 9ccec46003a80a64
> #5 [ffffffc034c43ac0] __kvm_nvhe_$d.2314 at 8cf41e6003a945c4
> #6 [ffffffc034c43b10] __kvm_nvhe_$d.2314 at a8f181e00372c818
> #7 [ffffffc034c43b40] __kvm_nvhe_$d.2314 at 6dedde600372c0d0
> #8 [ffffffc034c43b90] __kvm_nvhe_$d.2314 at 62cc07e00373d0ac
> #9 [ffffffc034c43c00] __kvm_nvhe_$d.2314 at 72fb1de00373bedc
> ...
> PC: 00000073f5294840 LR: 00000070d8f39ba4 SP: 00000070d4afd5d0
> X29: 00000070d4afd600 X28: b4000071efcda7f0 X27: 00000070d4afe000
> X26: 0000000000000000 X25: 00000070d9616000 X24: 0000000000000000
> X23: 0000000000000000 X22: 0000000000000000 X21: 0000000000000000
> X20: b40000728fd27520 X19: b40000728fd27550 X18: 000000702daba000
> X17: 00000073f5294820 X16: 00000070d940f9d8 X15: 00000000000000bf
> X14: 0000000000000000 X13: 00000070d8ad2fac X12: b40000718fce5040
> X11: 0000000000000000 X10: 0000000000000070 X9: 0000000000000001
> X8: 0000000000000062 X7: 0000000000000020 X6: 0000000000000000
> X5: 0000000000000000 X4: 0000000000000000 X3: 0000000000000000
> X2: 0000000000000002 X1: 0000000000000080 X0: b40000728fd27550
> ORIG_X0: b40000728fd27550 SYSCALLNO: ffffffff PSTATE: 40001000
>
> By checking the raw data below, will see the lr (fp+8) data show the
> pointer which already been replaced by PAC prefix.
>
> crash> bt -f
> PID: 16930 TASK: ffffff89b3eada00 CPU: 2 COMMAND: "Firebase
> Backgr"
> #0 [ffffffc034c437f0] __switch_to at ffffffe0036832d4
> ffffffc034c437f0: ffffffc034c43850 6be732e004cf05a4
> ffffffc034c43800: ffffffe006186108 a0ed07e004cf09c4
> ffffffc034c43810: ffffff8a1a340000 ffffff8a8d343c00
> ffffffc034c43820: ffffff89b3eada00 ffffff8b780db540
> ffffffc034c43830: ffffff89b3eada00 0000000000000000
> ffffffc034c43840: 0000000000000004 712b828118484a00
> #1 [ffffffc034c43850] __kvm_nvhe_$d.2314 at 6be732e004cf05a0
> ffffffc034c43850: ffffffc034c438b0 86c54c6004ceff84
> ffffffc034c43860: 000000708070f000 ffffffc034c43938
> ffffffc034c43870: ffffff88bd822878 ffffff89b3eada00
> ...
>
> So we check the CONFIG_ARM64_PTR_AUTH and CONFIG_ARM64_PTR_AUTH_KERNEL
> to double check if pac mechanism been enabled on this ramdump.
> Then we use vabits to figure it out.
> Fix then show the right backtrace below:
> crash> bt 16930
> PID: 16930 TASK: ffffff89b3eada00 CPU: 2 COMMAND: "Firebase
> Backgr"
> #0 [ffffffc034c437f0] __switch_to at ffffffe0036832d4
> #1 [ffffffc034c43850] __schedule at ffffffe004cf05a0
> #2 [ffffffc034c438b0] preempt_schedule_common at ffffffe004ceff80
> #3 [ffffffc034c43950] unmap_page_range at ffffffe003a7b120
> #4 [ffffffc034c439f0] unmap_vmas at ffffffe003a80a64
> #5 [ffffffc034c43ac0] exit_mmap at ffffffe003a945c4
> #6 [ffffffc034c43b10] __mmput at ffffffe00372c818
> #7 [ffffffc034c43b40] mmput at ffffffe00372c0d0
> #8 [ffffffc034c43b90] exit_mm at ffffffe00373d0ac
> #9 [ffffffc034c43c00] do_exit at ffffffe00373bedc
> PC: 00000073f5294840 LR: 00000070d8f39ba4 SP: 00000070d4afd5d0
> X29: 00000070d4afd600 X28: b4000071efcda7f0 X27: 00000070d4afe000
> X26: 0000000000000000 X25: 00000070d9616000 X24: 0000000000000000
> X23: 0000000000000000 X22: 0000000000000000 X21: 0000000000000000
> X20: b40000728fd27520 X19: b40000728fd27550 X18: 000000702daba000
> X17: 00000073f5294820 X16: 00000070d940f9d8 X15: 00000000000000bf
> X14: 0000000000000000 X13: 00000070d8ad2fac X12: b40000718fce5040
> X11: 0000000000000000 X10: 0000000000000070 X9: 0000000000000001
> X8: 0000000000000062 X7: 0000000000000020 X6: 0000000000000000
> X5: 0000000000000000 X4: 0000000000000000 X3: 0000000000000000
> X2: 0000000000000002 X1: 0000000000000080 X0: b40000728fd27550
> ORIG_X0: b40000728fd27550 SYSCALLNO: ffffffff PSTATE: 40001000
>
> Let's use GENMASK to replace the pac pointer to fix it.
> gki related commit url here:
> https://lore.kernel.org/all/20230412160134.306148-4-mark.rutland@arm.com/
Can you help to add the kernel commit to patch log?
de1702f65feb ("arm64: move PAC masks to <asm/pointer_auth.h>")
> Signed-off-by: bevis_chen<bevis_chen(a)asus.com>
> ---
> arm64.c | 33 +++++++++++++++++++++++++++++++++
> 1 file changed, 33 insertions(+)
>
> diff --git a/arm64.c b/arm64.c
> index b3040d7..d8ce98f 100644
> --- a/arm64.c
> +++ b/arm64.c
> @@ -92,6 +92,7 @@ static void arm64_get_crash_notes(void);
> static void arm64_calc_VA_BITS(void);
> static int arm64_is_uvaddr(ulong, struct task_context *);
> static void arm64_calc_KERNELPACMASK(void);
> +static void arm64_recalc_KERNELPACMASK(void);
> static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char *label, int base);
>
> struct kernel_range {
> @@ -581,6 +582,18 @@ arm64_init(int when)
> if (!machdep->hz)
> machdep->hz = 100;
>
> + /*
> + * In the case of using ramdump rather than vmcore,
> + * Will fail to parse out KERNELPAC.
> + * So let's try again from kconfig to ensure if PAC enabled.
> + * If yes, then we use vabits to figure it out.
> + * gki related commit url:
> + *https://lore.kernel.org/all/20230412160134.306148-4-mark.rutland@arm.com/
> + */
> + if(!machdep->machspec->CONFIG_ARM64_KERNELPACMASK)
> + arm64_recalc_KERNELPACMASK();
> +
> +
> arm64_irq_stack_init();
> arm64_overflow_stack_init();
> arm64_stackframe_init();
> @@ -4921,6 +4934,26 @@ static void arm64_calc_KERNELPACMASK(void)
> }
> }
>
> +
> +#define GENMASK_UL(h, l) \
> + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
> +
BTW: I saw a similar version implemented in the kernel, can you help
double check if that is intentional?
#define __GENMASK(h, l) \
(((~_UL(0)) - (_UL(1) << (l)) + 1) & \
(~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
Thanks
Lianbo
> +static void arm64_recalc_KERNELPACMASK(void){
> + if (kt->ikconfig_flags & IKCONFIG_AVAIL){
> + /* arm64: check pac already enabled yet.*/
> + if ((get_kernel_config("CONFIG_ARM64_PTR_AUTH", NULL) == IKCONFIG_Y)
> + && (get_kernel_config("CONFIG_ARM64_PTR_AUTH_KERNEL", NULL) == IKCONFIG_Y)){
> + if (machdep->machspec->VA_BITS_ACTUAL){
> + machdep->machspec->CONFIG_ARM64_KERNELPACMASK =
> + GENMASK_UL(63, machdep->machspec->VA_BITS_ACTUAL);
> + if (CRASHDEBUG(1))
> + fprintf(fp, "CONFIG_ARM64_KERNELPACMASK: %lx\n",
> + machdep->machspec->CONFIG_ARM64_KERNELPACMASK);
> + }
> + }
> + }
> +}
> +
> #endif /* ARM64 */
>
>
> --
> 2.27.0
4 months
Re: [PATCH v4 09/16] x86_64: Add gdb stack unwinding support
by lijiang
On Fri, May 31, 2024 at 5:33 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Fri, 31 May 2024 17:19:32 +0800
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v4 09/16] x86_64: Add gdb stack
> unwinding support
> To: devel(a)lists.crash-utility.osci.io
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>, "Naveen N . Rao"
> <naveen.n.rao(a)linux.vnet.ibm.com>, Lianbo Jiang <
> lijiang(a)redhat.com>,
> Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Message-ID: <20240531091939.97828-10-ltao(a)redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> This patch will add stack unwinding support for x86_64 CPU arch. It
> follows the tech path of ppc64_get_stack_frame() & ppc64_get_cpu_reg()
> in ppc64.c, to get and consume the cpu regs.
>
> One different case is, we need to handle inactive_task_frame structure
> specifically in x86_64.
>
> If inactive_task_frame structure enabled, for inactive tasks, 7 regs can
> be get from inactive_task_frame in stack, and sp need to rewind back to
> skip inactive_task_frame structure (See code comments in
> x86_64.c:x86_64_get_stack_frame()); for active tasks, we get regs by the
> original way.
>
> If inactive_task_frame structure is not enabled, for inactive tasks, the
> stack frame is organized as linked list, and sp/bp can be get from stack;
> for active tasks, we get regs by the original way.
>
> Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
> Cc: Hari Bathini <hbathini(a)linux.ibm.com>
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>
> Cc: Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
> Cc: Lianbo Jiang <lijiang(a)redhat.com>
> Cc: HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab(a)nec.com>
> Cc: Tao Liu <ltao(a)redhat.com>
> Cc: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> defs.h | 19 +++-
> x86_64.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 2 files changed, 283 insertions(+), 27 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 8450aea..ed52cc3 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2241,6 +2241,20 @@ struct offset_table { /* stash
> of commonly-used offsets */
> long mnt_namespace_nr_mounts;
> long mount_mnt_node;
> long log_caller_id;
> + long inactive_task_frame_r15;
> + long inactive_task_frame_r14;
> + long inactive_task_frame_r13;
> + long inactive_task_frame_r12;
> + long inactive_task_frame_flags;
> + long inactive_task_frame_si;
> + long inactive_task_frame_di;
> + long inactive_task_frame_bx;
> + long thread_struct_es;
> + long thread_struct_ds;
> + long thread_struct_fsbase;
> + long thread_struct_gsbase;
> + long thread_struct_fs;
> + long thread_struct_gs;
> };
>
> struct size_table { /* stash of commonly-used sizes */
> @@ -8008,7 +8022,7 @@ extern int have_full_symbols(void);
>
> /*
> * Register numbers must be in sync with gdb/features/i386/64bit-core.c
> - * to make crash_target->fetch_registers() ---> machdep->get_cpu_reg()
> + * to make crash_target->fetch_registers() --->
> machdep->get_current_task_reg()
> * working properly.
> */
> enum x86_64_regnum {
> @@ -8052,6 +8066,9 @@ enum x86_64_regnum {
> FOSEG_REGNUM,
> FOOFF_REGNUM,
> FOP_REGNUM,
> + FS_BASE_REGNUM = 152,
> + GS_BASE_REGNUM,
> + ORIG_RAX_REGNUM,
> LAST_REGNUM
> };
>
> diff --git a/x86_64.c b/x86_64.c
> index 5ab2852..4ba0b40 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -126,7 +126,7 @@ static int x86_64_get_framesize(struct bt_info *,
> ulong, ulong, char *);
> static void x86_64_framesize_debug(struct bt_info *);
> static void x86_64_get_active_set(void);
> static int x86_64_get_kvaddr_ranges(struct vaddr_range *);
> -static int x86_64_get_cpu_reg(int, int, const char *, int, void *);
> +static int x86_64_get_current_task_reg(int, const char *, int, void *);
> static int x86_64_verify_paddr(uint64_t);
> static void GART_init(void);
> static void x86_64_exception_stacks_init(void);
> @@ -143,6 +143,29 @@ struct machine_specific x86_64_machine_specific = { 0
> };
> static const char *exception_functions_orig[];
> static const char *exception_functions_5_8[];
>
> +/* Use this hardwired version -- sometimes the
> + * debuginfo doesn't pick this up even though
> + * it exists in the kernel; it shouldn't change.
> + */
> +struct x86_64_user_regs_struct {
> + unsigned long r15, r14, r13, r12, bp, bx;
> + unsigned long r11, r10, r9, r8, ax, cx, dx;
> + unsigned long si, di, orig_ax, ip, cs;
> + unsigned long flags, sp, ss, fs_base;
> + unsigned long gs_base, ds, es, fs, gs;
> +};
> +
> +struct user_regs_bitmap_struct {
> + struct {
> + unsigned long r15, r14, r13, r12, bp, bx;
> + unsigned long r11, r10, r9, r8, ax, cx, dx;
> + unsigned long si, di, orig_ax, ip, cs;
> + unsigned long flags, sp, ss, fs_base;
> + unsigned long gs_base, ds, es, fs, gs;
> + };
> + ulong bitmap;
> +};
>
The struct user_regs_bitmap_struct can de defined as below:
+struct user_regs_bitmap_struct {
+ struct user_regs_bitmap_struct uregs;
+ ulong bitmap;
+};
+
> /*
> * Do all necessary machine-specific setup here. This is called several
> * times during initialization.
> @@ -195,7 +218,7 @@ x86_64_init(int when)
> machdep->machspec->irq_eframe_link = UNINITIALIZED;
> machdep->machspec->irq_stack_gap = UNINITIALIZED;
> machdep->get_kvaddr_ranges = x86_64_get_kvaddr_ranges;
> - machdep->get_current_task_reg = x86_64_get_cpu_reg;
> + machdep->get_current_task_reg =
> x86_64_get_current_task_reg;
> if (machdep->cmdline_args[0])
> parse_cmdline_args();
> if ((string = pc->read_vmcoreinfo("relocate"))) {
> @@ -500,6 +523,12 @@ x86_64_init(int when)
> MEMBER_OFFSET_INIT(thread_struct_rsp,
> "thread_struct", "sp");
> if (INVALID_MEMBER(thread_struct_rsp0))
> MEMBER_OFFSET_INIT(thread_struct_rsp0,
> "thread_struct", "sp0");
> + MEMBER_OFFSET_INIT(thread_struct_es, "thread_struct",
> "es");
> + MEMBER_OFFSET_INIT(thread_struct_ds, "thread_struct",
> "ds");
> + MEMBER_OFFSET_INIT(thread_struct_fsbase, "thread_struct",
> "fsbase");
> + MEMBER_OFFSET_INIT(thread_struct_gsbase, "thread_struct",
> "gsbase");
> + MEMBER_OFFSET_INIT(thread_struct_fs, "thread_struct",
> "fs");
> + MEMBER_OFFSET_INIT(thread_struct_gs, "thread_struct",
> "gs");
> STRUCT_SIZE_INIT(tss_struct, "tss_struct");
> MEMBER_OFFSET_INIT(tss_struct_ist, "tss_struct", "ist");
> if (INVALID_MEMBER(tss_struct_ist)) {
> @@ -583,17 +612,6 @@ x86_64_init(int when)
> "user_regs_struct", "r15");
> STRUCT_SIZE_INIT(user_regs_struct, "user_regs_struct");
> if (!VALID_STRUCT(user_regs_struct)) {
> - /* Use this hardwired version -- sometimes the
> - * debuginfo doesn't pick this up even though
> - * it exists in the kernel; it shouldn't change.
> - */
> - struct x86_64_user_regs_struct {
> - unsigned long r15, r14, r13, r12, bp, bx;
> - unsigned long r11, r10, r9, r8, ax, cx, dx;
> - unsigned long si, di, orig_ax, ip, cs;
> - unsigned long flags, sp, ss, fs_base;
> - unsigned long gs_base, ds, es, fs, gs;
> - };
> ASSIGN_SIZE(user_regs_struct) =
> sizeof(struct x86_64_user_regs_struct);
> ASSIGN_OFFSET(user_regs_struct_rip) =
> @@ -891,7 +909,7 @@ x86_64_dump_machdep_table(ulong arg)
> fprintf(fp, " is_page_ptr: x86_64_is_page_ptr()\n");
> fprintf(fp, " verify_paddr: x86_64_verify_paddr()\n");
> fprintf(fp, " get_kvaddr_ranges: x86_64_get_kvaddr_ranges()\n");
> - fprintf(fp, " get_cpu_reg: x86_64_get_cpu_reg()\n");
> + fprintf(fp, "get_current_task_reg:
> x86_64_get_current_task_reg()\n");
> fprintf(fp, " init_kernel_pgd: x86_64_init_kernel_pgd()\n");
> fprintf(fp, "clear_machdep_cache:
> x86_64_clear_machdep_cache()\n");
> fprintf(fp, " xendump_p2m_create: %s\n", PVOPS_XEN() ?
> @@ -4971,22 +4989,124 @@ x86_64_eframe_verify(struct bt_info *bt, long
> kvaddr, long cs, long ss,
> return FALSE;
> }
>
> +#define GET_REG_FROM_INACTIVE_TASK_FRAME(reg) \
> +({ \
> + ulong offset, reg_value = 0, rsp; \
> + if (VALID_MEMBER(inactive_task_frame_bp)) { \
> + offset = OFFSET(task_struct_thread) +
> OFFSET(thread_struct_rsp); \
> + readmem(bt->task + offset, KVADDR, &rsp, \
> + sizeof(ulong), "thread_struct.rsp",
> FAULT_ON_ERROR); \
> + readmem(rsp + OFFSET(inactive_task_frame_##reg), KVADDR,
> ®_value, \
> + sizeof(ulong), "inactive_task_frame.##reg",
> FAULT_ON_ERROR); \
> + } \
> + reg_value; \
> +})
>
I would suggest changing the above macro definition to a function, at least
there are two advantages:
[1] once the task_struct or inactive_task_frame is changed in the future,
it is easy to follow its changes.
[2] easy to debug, and more readable.
> +
> /*
> * Get a stack frame combination of pc and ra from the most relevent
> spot.
> */
> static void
> x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
> {
> - if (bt->flags & BT_DUMPFILE_SEARCH)
> - return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
> + struct user_regs_bitmap_struct *ur_bitmap;
> + ulong pcp_save = *pcp;
> + ulong spp_save = *spp;
> + ulong sp;
>
> if (bt->flags & BT_SKIP_IDLE)
> bt->flags &= ~BT_SKIP_IDLE;
> + if (pcp)
> + *pcp = x86_64_get_pc(bt);
> + if (spp)
> + sp = *spp = x86_64_get_sp(bt);
> + ur_bitmap = (struct user_regs_bitmap_struct
> *)GETBUF(sizeof(*ur_bitmap));
> + memset(ur_bitmap, 0, sizeof(struct user_regs_bitmap_struct));
>
Can you keep the same style?(sorry to be picky :-))
+ ur_bitmap = (struct user_regs_bitmap_struct
*)GETBUF(sizeof(*ur_bitmap));
+ memset(ur_bitmap, 0, sizeof(*ur_bitmap));
or
+ ur_bitmap = (struct user_regs_bitmap_struct *)GETBUF(sizeof(struct
user_regs_bitmap_struct)));
+ memset(ur_bitmap, 0, sizeof(struct user_regs_bitmap_struct));
+
> + if (VALID_MEMBER(inactive_task_frame_bp)) {
> + if (!is_task_active(bt->task)) {
> + /*
> + * For inactive tasks in live and dumpfile, regs
> can be
> + * get from inactive_task_frame struct.
> + */
> + ur_bitmap->r15 =
> GET_REG_FROM_INACTIVE_TASK_FRAME(r15);
> + ur_bitmap->r14 =
> GET_REG_FROM_INACTIVE_TASK_FRAME(r14);
> + ur_bitmap->r13 =
> GET_REG_FROM_INACTIVE_TASK_FRAME(r13);
> + ur_bitmap->r12 =
> GET_REG_FROM_INACTIVE_TASK_FRAME(r12);
> + ur_bitmap->bx =
> GET_REG_FROM_INACTIVE_TASK_FRAME(bx);
> + ur_bitmap->bp =
> GET_REG_FROM_INACTIVE_TASK_FRAME(bp);
> + /*
> + For inactive tasks:
> + crash> task -x 1|grep sp
> + sp = 0xffffc90000013d00
> + crash> rd ffffc90000013d00 32
> + ffffc90000013d00: ffff888104dad4a8
> 0000000000000000 r15,r14
> + ffffc90000013d10: ffff888100280000
> ffff888100216500 r13,r12
> + ffffc90000013d20: ffff888100217018
> ffff88817fd2c800 rbx,rbp
> + ffffc90000013d30: ffffffff81a6a1b3
> ffffc90000013de0 saved_rip,...
> + ffffc90000013d40: ffff888100000004
> 99ccbf53ea493000
> + ffffc90000013d50: ffff888100216500
> ffff888100216500
> +
> + crash> dis __schedule
> + ...
> + 0xffffffff81a6a1ab <__schedule+507>: mov
> %r13,%rsi
> + 0xffffffff81a6a1ae <__schedule+510>: call
> 0xffffffff81003490 <__switch_to_asm>
> + 0xffffffff81a6a1b3 <__schedule+515>: mov
> %rax,%rdi <<=== saved_rip
> + ...
> + crash> dis __switch_to_asm
> + 0xffffffff81003490 <__switch_to_asm>: push %rbp
> + 0xffffffff81003491 <__switch_to_asm+1>: push %rbx
> + 0xffffffff81003492 <__switch_to_asm+2>: push %r12
> + 0xffffffff81003494 <__switch_to_asm+4>: push %r13
> + 0xffffffff81003496 <__switch_to_asm+6>: push %r14
> + 0xffffffff81003498 <__switch_to_asm+8>: push %r15
> + 0xffffffff8100349a <__switch_to_asm+10>:
> mov %rsp,0x14d8(%rdi)
> + ...
> + Now saved_rip = ffffffff81a6a1b3, and we are
> starting
> + the stack unwind at saved_rip, which is function
> __schedule()
> + instead of function __switch_to_asm(), so the
> stack pointer should
> + be rewind from ffffc90000013d00 back to
> ffffc90000013d38,
> + aka *spp += 7 * reg_len. Otherwise we are
> unwinding function
> + __schedule() but with __switch_to_asm()'s stack
> frame, which
> + will fail.
> + */
> + sp += 7 * sizeof(unsigned long);
>
It seems this may still rely on the compiler, have you tried another
compiler? I'm not sure if there are the same results, just my concerns.
Anyway, If there is no better way or solution, at least we should comment
on it and point out the current limitations. What do you think?
+ ur_bitmap->bitmap += 0x3f;
>
Also why is it 0x3f? Can you explain the details?
> + } else {
> + /*
> + * For active tasks in dumpfile, we get regs
> through the
> + * original way. For active tasks in live, we only
> get
> + * ip and sp in the end of the function.
> + */
> + if (bt->flags & BT_DUMPFILE_SEARCH) {
> + FREEBUF(ur_bitmap);
> + bt->need_free = FALSE;
>
See the comments in the previous thread.
> + *pcp = pcp_save;
> + *spp = spp_save;
> + return x86_64_get_dumpfile_stack_frame(bt,
> pcp, spp);
> + }
> + }
> + } else {
> + if (!is_task_active(bt->task)) {
> + readmem(*spp, KVADDR, &(ur_bitmap->bp),
> + sizeof(ulong), "ur_bitmap->bp",
> FAULT_ON_ERROR);
> + ur_bitmap->bitmap += 0x10;
> + } else {
> + if (bt->flags & BT_DUMPFILE_SEARCH) {
> + FREEBUF(ur_bitmap);
> + bt->need_free = FALSE;
> + *pcp = pcp_save;
> + *spp = spp_save;
> + return x86_64_get_dumpfile_stack_frame(bt,
> pcp, spp);
> + }
> + }
> + }
>
> - if (pcp)
> - *pcp = x86_64_get_pc(bt);
> - if (spp)
> - *spp = x86_64_get_sp(bt);
> + ur_bitmap->ip = *pcp;
> + ur_bitmap->sp = sp;
> + ur_bitmap->bitmap += 0x50000;
> +
> + bt->machdep = ur_bitmap;
> + bt->need_free = TRUE;
> }
>
> /*
> @@ -6458,6 +6578,14 @@ x86_64_ORC_init(void)
>
> MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame",
> "bp");
> MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr,
> "inactive_task_frame", "ret_addr");
> + MEMBER_OFFSET_INIT(inactive_task_frame_r15, "inactive_task_frame",
> "r15");
> + MEMBER_OFFSET_INIT(inactive_task_frame_r14, "inactive_task_frame",
> "r14");
> + MEMBER_OFFSET_INIT(inactive_task_frame_r13, "inactive_task_frame",
> "r13");
> + MEMBER_OFFSET_INIT(inactive_task_frame_r12, "inactive_task_frame",
> "r12");
> + MEMBER_OFFSET_INIT(inactive_task_frame_flags,
> "inactive_task_frame", "flags");
> + MEMBER_OFFSET_INIT(inactive_task_frame_si, "inactive_task_frame",
> "si");
> + MEMBER_OFFSET_INIT(inactive_task_frame_di, "inactive_task_frame",
> "di");
> + MEMBER_OFFSET_INIT(inactive_task_frame_bx, "inactive_task_frame",
> "bx");
>
> orc->has_signal = MEMBER_EXISTS("orc_entry", "signal"); /* added
> at 6.3 */
> orc->has_end = MEMBER_EXISTS("orc_entry", "end"); /* removed
> at 6.4 */
> @@ -9071,17 +9199,128 @@ x86_64_get_kvaddr_ranges(struct vaddr_range *vrp)
> return cnt;
> }
>
> +#define REG_CASE(R, r) \
> + case R##_REGNUM: \
> + if (size != sizeof(ur_bitmap->r)) { \
> + ret = FALSE; break; \
> + } else { \
> + memcpy(value, &ur_bitmap->r, size); \
> + ret = TRUE; break; \
> + }
> +
> static int
> -x86_64_get_cpu_reg(int cpu, int regno, const char *name,
> +x86_64_get_current_task_reg(int regno, const char *name,
> int size, void *value)
> {
> - if (regno >= LAST_REGNUM)
> - return FALSE;
> + struct bt_info bt_info, bt_setup;
> + struct task_context *tc;
> + struct user_regs_bitmap_struct *ur_bitmap;
> + ulong ip, sp;
> + bool ret = FALSE;
>
> - if (VMSS_DUMPFILE())
> - return vmware_vmss_get_cpu_reg(cpu, regno, name, size,
> value);
> + switch (regno) {
> + case RAX_REGNUM ... GS_REGNUM:
> + case FS_BASE_REGNUM ... ORIG_RAX_REGNUM:
> + break;
> + default:
> + return FALSE;
> + }
>
>
Ditto.
> - return FALSE;
> + tc = CURRENT_CONTEXT();
> + if (!tc)
> + return FALSE;
> +
> + if (VMSS_DUMPFILE())
> + return vmware_vmss_get_cpu_reg(tc->processor, regno, name,
> size, value);
> +
> + BZERO(&bt_setup, sizeof(struct bt_info));
> + clone_bt_info(&bt_setup, &bt_info, tc);
> + fill_stackbuf(&bt_info);
> +
> + // reusing the get_dumpfile_regs function to get pt regs structure
> + get_dumpfile_regs(&bt_info, &sp, &ip);
> + if (bt_info.stackbuf)
> + FREEBUF(bt_info.stackbuf);
> + ur_bitmap = (struct user_regs_bitmap_struct *)bt_info.machdep;
> + if (!ur_bitmap)
> + return FALSE;
> +
> + /* Get all registers from elf notes*/
> + if (!bt_info.need_free) {
> + goto get_all;
> + }
> +
>
Ditto.
> + /* Get subset registers from stack frame*/
> + switch (ur_bitmap->bitmap) {
> + case 0x5003f:
> + switch (regno) {
> + case R12_REGNUM ... R15_REGNUM:
> + case RBP_REGNUM:
> + case RBX_REGNUM:
> + case RIP_REGNUM:
> + case RSP_REGNUM:
> + break;
> + default:
> + return FALSE;
> + }
> + break;
> + case 0x50010:
> + switch (regno) {
> + case RBP_REGNUM:
> + case RIP_REGNUM:
> + case RSP_REGNUM:
> + break;
> + default:
> + return FALSE;
> + }
> + break;
> + case 0x50000:
> + switch (regno) {
> + case RIP_REGNUM:
> + case RSP_REGNUM:
> + break;
> + default:
> + return FALSE;
> + }
> + break;
> + }
>
Can you help to explain more details about several magic number values?
Such as 0x5003f, 0x50000, 0x50010. Or you may point out where they come
from, it may be helpful to add them here as code comment.
Thanks
Lianbo
+
> +get_all:
> + switch (regno) {
> + REG_CASE(RAX, ax);
> + REG_CASE(RBX, bx);
> + REG_CASE(RCX, cx);
> + REG_CASE(RDX, dx);
> + REG_CASE(RSI, si);
> + REG_CASE(RDI, di);
> + REG_CASE(RBP, bp);
> + REG_CASE(RSP, sp);
> + REG_CASE(R8, r8);
> + REG_CASE(R9, r9);
> + REG_CASE(R10, r10);
> + REG_CASE(R11, r11);
> + REG_CASE(R12, r12);
> + REG_CASE(R13, r13);
> + REG_CASE(R14, r14);
> + REG_CASE(R15, r15);
> + REG_CASE(RIP, ip);
> + REG_CASE(EFLAGS, flags);
> + REG_CASE(CS, cs);
> + REG_CASE(SS, ss);
> + REG_CASE(DS, ds);
> + REG_CASE(ES, es);
> + REG_CASE(FS, fs);
> + REG_CASE(GS, gs);
> + REG_CASE(FS_BASE, fs_base);
> + REG_CASE(GS_BASE, gs_base);
> + REG_CASE(ORIG_RAX, orig_ax);
> + }
> +
> + if (bt_info.need_free) {
> + FREEBUF(ur_bitmap);
> + bt_info.need_free = FALSE;
> + }
> + return ret;
> }
>
> /*
> --
> 2.40.1
>
4 months
Re: [PATCH v4 08/16] ppc64: correct gdb passthroughs by implementing machdep->get_cpu_reg
by lijiang
On Fri, May 31, 2024 at 5:33 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Fri, 31 May 2024 17:19:31 +0800
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v4 08/16] ppc64: correct gdb
> passthroughs by implementing machdep->get_cpu_reg
> To: devel(a)lists.crash-utility.osci.io
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>, "Naveen N . Rao"
> <naveen.n.rao(a)linux.vnet.ibm.com>, Lianbo Jiang <
> lijiang(a)redhat.com>,
> Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Message-ID: <20240531091939.97828-9-ltao(a)redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> From: Aditya Gupta <adityag(a)linux.ibm.com>
>
> Currently, gdb passthroughs of 'bt', 'frame', 'up', 'down', 'info
> locals' don't work. This is due to gdb not knowing the register values to
> unwind the stack frames
>
> Every gdb passthrough goes through `gdb_interface`. And then, gdb expects
> `crash_target::fetch_registers` to give it the register values, which is
> dependent on `machdep->get_cpu_reg` to read the register values for
> specific architecture.
>
> ----------------------------
> gdb passthrough (eg. "bt") | |
> crash -------------------------> | |
> | gdb_interface |
> | |
> | |
> | ---------------------- |
> fetch_registers | | | |
> crash_target<-------------------------+--| gdb | |
> --------------------------+->| | |
> Registers (SP,NIP, etc.)| | | |
> | | | |
> | ---------------------- |
> ----------------------------
>
> Implement `machdep->get_cpu_reg` on PPC64, so that crash provides the
> register values to gdb to unwind stack frames properly
>
> With these changes, on powerpc, 'bt' command output in gdb mode, will look
> like this:
>
> gdb> bt
> #0 0xc0000000002a53e8 in crash_setup_regs (oldregs=<optimized out>,
> newregs=0xc00000000486f8d8) at ./arch/powerpc/include/asm/kexec.h:69
> #1 __crash_kexec (regs=<optimized out>) at kernel/kexec_core.c:974
> #2 0xc000000000168918 in panic (fmt=<optimized out>) at
> kernel/panic.c:358
> #3 0xc000000000b735f8 in sysrq_handle_crash (key=<optimized out>) at
> drivers/tty/sysrq.c:155
> #4 0xc000000000b742cc in __handle_sysrq (key=key@entry=99,
> check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:602
> #5 0xc000000000b7506c in write_sysrq_trigger (file=<optimized out>,
> buf=<optimized out>, count=2, ppos=<optimized out>) at
> drivers/tty/sysrq.c:1163
> #6 0xc00000000069a7bc in pde_write (ppos=<optimized out>,
> count=<optimized out>, buf=<optimized out>, file=<optimized out>,
> pde=0xc000000009ed3a80) at fs/proc/inode.c:340
> #7 proc_reg_write (file=<optimized out>, buf=<optimized out>,
> count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:352
> #8 0xc0000000005b3bbc in vfs_write (file=file@entry=0xc00000009dda7d00,
> buf=buf@entry=0xebcfc7c6040 <error: Cannot access memory at address
> 0xebcfc7c6040>, count=count@entry=2, pos=pos@entry=0xc00000000486fda0) at
> fs/read_write.c:582
>
> instead of earlier output without this patch:
>
> gdb> bt
> #0 <unavailable> in ?? ()
> Backtrace stopped: previous frame identical to this frame (corrupt
> stack?)
>
> Also, 'get_dumpfile_regs' has been introduced to get registers from
> multiple supported vmcore formats. Correspondingly a flag
> 'BT_NO_PRINT_REGS'
> has been introduced to tell helper functions to get registers, to not
> print registers with every call to backtrace in gdb.
>
> Note: This feature to support GDB unwinding doesn't support live debugging
>
> Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
> Cc: Hari Bathini <hbathini(a)linux.ibm.com>
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>
> Cc: Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
> Cc: Lianbo Jiang <lijiang(a)redhat.com>
> Cc: HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab(a)nec.com>
> Cc: Tao Liu <ltao(a)redhat.com>
> Cc: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Improved-by: Tao Liu <ltao(a)redhat.com>
> Signed-off-by: Aditya Gupta <adityag(a)linux.ibm.com>
> ---
> defs.h | 123 +++++++++++++++++++++++++++++++++++++++++
> kernel.c | 33 +++++++++++
> ppc64.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 3 files changed, 312 insertions(+), 7 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index c0e6a29..8450aea 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -982,6 +982,7 @@ struct bt_info {
> ulong eframe_ip;
> ulong radix;
> ulong *cpumask;
> + bool need_free;
Comment on it later.
> };
>
> #define STACK_OFFSET_TYPE(OFF) \
> @@ -6096,6 +6097,7 @@ int load_module_symbols_helper(char *);
> void unlink_module(struct load_module *);
> int check_specified_module_tree(char *, char *);
> int is_system_call(char *, ulong);
> +void get_dumpfile_regs(struct bt_info*, ulong*, ulong*);
> void generic_dump_irq(int);
> void generic_get_irq_affinity(int);
> void generic_show_interrupts(int, ulong *);
> @@ -6195,6 +6197,7 @@ ulong cpu_map_addr(const char *type);
> #define BT_REGS_NOT_FOUND (0x4000000000000ULL)
> #define BT_OVERFLOW_STACK (0x8000000000000ULL)
> #define BT_SKIP_IDLE (0x10000000000000ULL)
> +#define BT_NO_PRINT_REGS (0x20000000000000ULL)
> #define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
>
> #define BT_REF_HEXVAL (0x1)
> @@ -8052,6 +8055,126 @@ enum x86_64_regnum {
> LAST_REGNUM
> };
>
> +/*
> + * Register numbers to make crash_target->fetch_registers()
> + * ---> machdep->get_current_task_reg() work properly.
> + *
> + * These register numbers and names are given according to output of
> + * `rs6000_register_name`, because that is what was being used by
> + * crash_target::fetch_registers in case of PPC64
> + */
> +enum ppc64_regnum {
> + PPC64_R0_REGNUM = 0,
> + PPC64_R1_REGNUM,
> + PPC64_R2_REGNUM,
> + PPC64_R3_REGNUM,
> + PPC64_R4_REGNUM,
> + PPC64_R5_REGNUM,
> + PPC64_R6_REGNUM,
> + PPC64_R7_REGNUM,
> + PPC64_R8_REGNUM,
> + PPC64_R9_REGNUM,
> + PPC64_R10_REGNUM,
> + PPC64_R11_REGNUM,
> + PPC64_R12_REGNUM,
> + PPC64_R13_REGNUM,
> + PPC64_R14_REGNUM,
> + PPC64_R15_REGNUM,
> + PPC64_R16_REGNUM,
> + PPC64_R17_REGNUM,
> + PPC64_R18_REGNUM,
> + PPC64_R19_REGNUM,
> + PPC64_R20_REGNUM,
> + PPC64_R21_REGNUM,
> + PPC64_R22_REGNUM,
> + PPC64_R23_REGNUM,
> + PPC64_R24_REGNUM,
> + PPC64_R25_REGNUM,
> + PPC64_R26_REGNUM,
> + PPC64_R27_REGNUM,
> + PPC64_R28_REGNUM,
> + PPC64_R29_REGNUM,
> + PPC64_R30_REGNUM,
> + PPC64_R31_REGNUM,
> +
> + PPC64_F0_REGNUM = 32,
> + PPC64_F1_REGNUM,
> + PPC64_F2_REGNUM,
> + PPC64_F3_REGNUM,
> + PPC64_F4_REGNUM,
> + PPC64_F5_REGNUM,
> + PPC64_F6_REGNUM,
> + PPC64_F7_REGNUM,
> + PPC64_F8_REGNUM,
> + PPC64_F9_REGNUM,
> + PPC64_F10_REGNUM,
> + PPC64_F11_REGNUM,
> + PPC64_F12_REGNUM,
> + PPC64_F13_REGNUM,
> + PPC64_F14_REGNUM,
> + PPC64_F15_REGNUM,
> + PPC64_F16_REGNUM,
> + PPC64_F17_REGNUM,
> + PPC64_F18_REGNUM,
> + PPC64_F19_REGNUM,
> + PPC64_F20_REGNUM,
> + PPC64_F21_REGNUM,
> + PPC64_F22_REGNUM,
> + PPC64_F23_REGNUM,
> + PPC64_F24_REGNUM,
> + PPC64_F25_REGNUM,
> + PPC64_F26_REGNUM,
> + PPC64_F27_REGNUM,
> + PPC64_F28_REGNUM,
> + PPC64_F29_REGNUM,
> + PPC64_F30_REGNUM,
> + PPC64_F31_REGNUM,
> +
> + PPC64_PC_REGNUM = 64,
> + PPC64_MSR_REGNUM = 65,
> + PPC64_CR_REGNUM = 66,
> + PPC64_LR_REGNUM = 67,
> + PPC64_CTR_REGNUM = 68,
> + PPC64_XER_REGNUM = 69,
> + PPC64_FPSCR_REGNUM = 70,
> +
> + PPC64_VR0_REGNUM = 106,
> + PPC64_VR1_REGNUM,
> + PPC64_VR2_REGNUM,
> + PPC64_VR3_REGNUM,
> + PPC64_VR4_REGNUM,
> + PPC64_VR5_REGNUM,
> + PPC64_VR6_REGNUM,
> + PPC64_VR7_REGNUM,
> + PPC64_VR8_REGNUM,
> + PPC64_VR9_REGNUM,
> + PPC64_VR10_REGNUM,
> + PPC64_VR11_REGNUM,
> + PPC64_VR12_REGNUM,
> + PPC64_VR13_REGNUM,
> + PPC64_VR14_REGNUM,
> + PPC64_VR15_REGNUM,
> + PPC64_VR16_REGNUM,
> + PPC64_VR17_REGNUM,
> + PPC64_VR18_REGNUM,
> + PPC64_VR19_REGNUM,
> + PPC64_VR20_REGNUM,
> + PPC64_VR21_REGNUM,
> + PPC64_VR22_REGNUM,
> + PPC64_VR23_REGNUM,
> + PPC64_VR24_REGNUM,
> + PPC64_VR25_REGNUM,
> + PPC64_VR26_REGNUM,
> + PPC64_VR27_REGNUM,
> + PPC64_VR28_REGNUM,
> + PPC64_VR29_REGNUM,
> + PPC64_VR30_REGNUM,
> + PPC64_VR31_REGNUM,
> +
> + PPC64_VSCR_REGNUM = 138,
> + PPC64_VRSAVE_REGNU = 139
> +};
> +
>
Is it possible to move the regnum definitions to ppc64.c? I did not see
them used in other modules. But, not sure if there are any compilation
issues, I haven't tried it.
/* crash_target.c */
> extern int gdb_change_thread_context ();
>
> diff --git a/kernel.c b/kernel.c
> index 78b7b1e..3730c55 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -3533,6 +3533,39 @@ get_lkcd_regs(struct bt_info *bt, ulong *eip, ulong
> *esp)
> machdep->get_stack_frame(bt, eip, esp);
> }
>
> +void
> +get_dumpfile_regs(struct bt_info *bt, ulong *eip, ulong *esp)
> +{
> + bt->flags |= BT_NO_PRINT_REGS;
> +
> + if (NETDUMP_DUMPFILE())
> + get_netdump_regs(bt, eip, esp);
> + else if (KDUMP_DUMPFILE())
> + get_kdump_regs(bt, eip, esp);
> + else if (DISKDUMP_DUMPFILE())
> + get_diskdump_regs(bt, eip, esp);
> + else if (KVMDUMP_DUMPFILE())
> + get_kvmdump_regs(bt, eip, esp);
> + else if (LKCD_DUMPFILE())
> + get_lkcd_regs(bt, eip, esp);
> + else if (XENDUMP_DUMPFILE())
> + get_xendump_regs(bt, eip, esp);
> + else if (SADUMP_DUMPFILE())
> + get_sadump_regs(bt, eip, esp);
> + else if (VMSS_DUMPFILE())
> + get_vmware_vmss_regs(bt, eip, esp);
> + else if (REMOTE_PAUSED()) {
> + if (!is_task_active(bt->task) || !get_remote_regs(bt, eip,
> esp))
> + machdep->get_stack_frame(bt, eip, esp);
> + } else
> + machdep->get_stack_frame(bt, eip, esp);
> +
> + bt->flags &= ~BT_NO_PRINT_REGS;
> +
> + bt->instptr = *eip;
> + bt->stkptr = *esp;
> +}
> +
>
> /*
> * Store the head of the kernel module list for future use.
> diff --git a/ppc64.c b/ppc64.c
> index e8930a1..ee1995a 100644
> --- a/ppc64.c
> +++ b/ppc64.c
> @@ -55,6 +55,8 @@ static void ppc64_set_bt_emergency_stack(enum
> emergency_stack_type type,
> static char * ppc64_check_eframe(struct ppc64_pt_regs *);
> static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
> struct bt_info *);
> +static int ppc64_get_current_task_reg(int regno, const char *name, int
> size,
> + void *value);
> static void parse_cmdline_args(void);
> static int ppc64_paca_percpu_offset_init(int);
> static void ppc64_init_cpu_info(void);
> @@ -73,6 +75,26 @@ static ulong pmd_page_vaddr_l4(ulong pmd);
> static int is_opal_context(ulong sp, ulong nip);
> void opalmsg(void);
>
> +struct user_regs_bitmap_struct {
> + struct {
> + long gpr[32];
> + long nip;
> + long msr;
> + long orig_gpr3; /* Used for restarting system calls */
> + long ctr;
> + long link;
> + long xer;
> + long ccr;
> + long mq; /* 601 only (not used at present) */
> + /* Used on APUS to hold IPL value.
> */
> + long trap; /* Reason for being here */
> + long dar; /* Fault registers */
> + long dsisr;
> + long result; /* Result of a system call */
> + };
> + ulong bitmap;
> +};
> +
> static int is_opal_context(ulong sp, ulong nip)
> {
> uint64_t opal_start, opal_end;
> @@ -704,6 +726,8 @@ ppc64_init(int when)
> error(FATAL, "cannot malloc hwirqstack
> buffer space.");
> }
>
> + machdep->get_current_task_reg = ppc64_get_current_task_reg;
> +
> ppc64_init_paca_info();
>
> if (!machdep->hz) {
> @@ -2501,6 +2525,120 @@ ppc64_print_eframe(char *efrm_str, struct
> ppc64_pt_regs *regs,
> ppc64_print_nip_lr(regs, 1);
> }
>
> +static int
> +ppc64_get_current_task_reg(int regno, const char *name, int size,
> + void *value)
> +{
> + struct bt_info bt_info, bt_setup;
> + struct task_context *tc;
> + ulong task;
> + struct user_regs_bitmap_struct *ur_bitmap;
> + ulong ip, sp;
> + bool ret = FALSE;
> +
> + /* Currently only handling registers available in ppc64_pt_regs:
> + *
> + * 0-31: r0-r31
> + * 64: pc/nip
> + * 65: msr
> + *
> + * 67: lr
> + * 68: ctr
> + */
> + switch (regno) {
> + case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
> +
> + case PPC64_PC_REGNUM:
> + case PPC64_MSR_REGNUM:
> + case PPC64_LR_REGNUM:
> + case PPC64_CTR_REGNUM:
> + break;
> +
> + default:
> + // return false if we can't get that register
> + if (CRASHDEBUG(1))
> + error(WARNING, "unsupported register, regno=%d\n",
> regno);
> + return FALSE;
> + }
> +
>
The above switch-case code block can be defined as a new static function to
check the validity of regnum, for example:
static int sanity_check_regs(int regno)
And then it can be invoked in the current function. Just an idea, maybe
this looks more concise and readable. Any thoughts?
+ tc = CURRENT_CONTEXT();
> + if (!tc)
> + return FALSE;
> + BZERO(&bt_setup, sizeof(struct bt_info));
> + clone_bt_info(&bt_setup, &bt_info, tc);
> + fill_stackbuf(&bt_info);
> +
> + // reusing the get_dumpfile_regs function to get pt regs structure
> + get_dumpfile_regs(&bt_info, &sp, &ip);
> + if (bt_info.stackbuf)
> + FREEBUF(bt_info.stackbuf);
> + ur_bitmap = (struct user_regs_bitmap_struct *)bt_info.machdep;
> +
> + if (!ur_bitmap) {
> + error(WARNING, "pt_regs not available for cpu %d\n",
> tc->processor);
> + return FALSE;
> + }
> + if (!bt_info.need_free) {
> + goto get_all;
> + }
>
Could you please explain why the need_free is used to determine if it
should jump into the get_all code block? The flag looks very strange, or
is there any another solution?
+
> + switch (ur_bitmap->bitmap) {
> + case 0x100000002:
> + switch (regno) {
> + case PPC64_R1_REGNUM:
> + case PPC64_PC_REGNUM:
> + break;
> + default:
> + return FALSE;
> + }
> + break;
> + }
>
The switch-case code block should be equivalent to the implementation
below(if I understood correctly), and looks more readable?
+ /* Only for R1 and PC values */
+ if ((ur_bitmap->bitmap == 0x100000002) &&
+ (regno != PPC64_R1_REGNUM && regno != PPC64_PC_REGNUM))
+ return FALSE;
> +
> +get_all:
> + switch (regno) {
> + case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
> + if (size != sizeof(ur_bitmap->gpr[regno])) {
> + ret = FALSE; break; // size mismatch
> + }
>
In fact , the variable ret can be initialized to FALSE one time at start of
this function, and then it can be simplified as below:
+ switch (regno) {
+ case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+ if (size != sizeof(ur_bitmap->gpr[regno]))
+ break;
+ memcpy(value, &ur_bitmap->gpr[regno], size);
+ ret = TRUE;
+ break;
+ memcpy(value, &ur_bitmap->gpr[regno], size);
> + ret = TRUE; break;
> +
> + case PPC64_PC_REGNUM:
> + if (size != sizeof(ur_bitmap->nip)) {
> + ret = FALSE; break; // size mismatch
> + }
> + memcpy(value, &ur_bitmap->nip, size);
> + ret = TRUE; break;
> +
> + case PPC64_MSR_REGNUM:
> + if (size != sizeof(ur_bitmap->msr)) {
> + ret = FALSE; break; // size mismatch
> + }
> + memcpy(value, &ur_bitmap->msr, size);
> + ret = TRUE; break;
> +
> + case PPC64_LR_REGNUM:
> + if (size != sizeof(ur_bitmap->link)) {
> + ret = FALSE; break; // size mismatch
> + }
> + memcpy(value, &ur_bitmap->link, size);
> + ret = TRUE; break;
> +
> + case PPC64_CTR_REGNUM:
> + if (size != sizeof(ur_bitmap->ctr)) {
> + ret = FALSE; break; // size mismatch
> + }
> + memcpy(value, &ur_bitmap->ctr, size);
> + ret = TRUE; break;
> + }
> + if (bt_info.need_free) {
> + FREEBUF(ur_bitmap);
> + bt_info.need_free = FALSE;
> + }
> +
> + return ret;
> +}
> +
> /*
> * For vmcore typically saved with KDump or FADump, get SP and IP values
> * from the saved ptregs.
> @@ -2613,9 +2751,11 @@ ppc64_get_dumpfile_stack_frame(struct bt_info
> *bt_in, ulong *nip, ulong *ksp)
> pt_regs = (struct ppc64_pt_regs *)bt->machdep;
> ur_nip = pt_regs->nip;
> ur_ksp = pt_regs->gpr[1];
> - /* Print the collected regs for panic task. */
> - ppc64_print_regs(pt_regs);
> - ppc64_print_nip_lr(pt_regs, 1);
> + if (!(bt->flags & BT_NO_PRINT_REGS)) {
> + /* Print the collected regs for panic task. */
> + ppc64_print_regs(pt_regs);
> + ppc64_print_nip_lr(pt_regs, 1);
> + }
> } else if ((pc->flags & KDUMP) ||
> ((pc->flags & DISKDUMP) &&
> (*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
> @@ -2779,19 +2919,28 @@ static void
> ppc64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
> {
> ulong ksp, nip;
> -
> + struct user_regs_bitmap_struct *ur_bitmap;
> +
> nip = ksp = 0;
>
> - if (DUMPFILE() && is_task_active(bt->task))
> + if (DUMPFILE() && is_task_active(bt->task)) {
> ppc64_get_dumpfile_stack_frame(bt, &nip, &ksp);
> - else
> + bt->need_free = FALSE;
> + } else {
> get_ppc64_frame(bt, &nip, &ksp);
> + ur_bitmap = (struct user_regs_bitmap_struct
> *)GETBUF(sizeof(*ur_bitmap));
> + memset(ur_bitmap, 0, sizeof(*ur_bitmap));
> + ur_bitmap->nip = nip;
> + ur_bitmap->gpr[1] = ksp;
> + ur_bitmap->bitmap += 0x100000002;
>
This confused me a little bit. Maybe it's good enough like this?
+ ur_bitmap->bitmap = 0x100000002;
Thanks
Lianbo
+ bt->machdep = ur_bitmap;
> + bt->need_free = TRUE;
> + }
>
> if (pcp)
> *pcp = nip;
> if (spp)
> *spp = ksp;
> -
> }
>
> static ulong
> --
> 2.40.1
>
4 months