Re: [PATCH v1 1/3] ppc64: Fix bt printing error stack trace
by lijiang
Hi, Tao
Thank you for the fix.
On Wed, Sep 18, 2024 at 7:44 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Wed, 18 Sep 2024 11:42:03 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v1 1/3] ppc64: Fix bt printing error
> stack trace
> To: devel(a)lists.crash-utility.osci.io
> Cc: adityag(a)linux.ibm.com, Tao Liu <ltao(a)redhat.com>
> Message-ID: <20240917234205.7783-2-ltao(a)redhat.com>
> Content-Type: text/plain; charset="US-ASCII"; x-default=true
>
> A error stack trace of bt cmd observed:
>
> crash> bt 1
> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
> #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable)
> #1 [c000000003773770] __switch_to at c00000000001fa9c
> #2 [c0000000037737d0] __schedule at c00000000112e4ec
> #3 [c0000000037738b0] schedule at c00000000112ea80
> ...
>
> The #0 stack trace is incorrect, the function address shouldn't exceed
> _end.
> The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from
> STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For
> CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will
> read a
> wrong value from stack when ABI_V2 enabled.
>
>
Can you help to add the related kernel commits to patch log? That will help
me a lot to review the patches.
> To determine if ABI_V2 enabled is tricky. This patch do it by check the
> following:
>
Can you try to read the value of e_flags from the elf header and to
determine what ABI version it is?
Thanks
Lianbo
>
> In arch/powerpc/include/asm/ppc_asm.h:
> #ifdef CONFIG_PPC64_ELF_ABI_V2
> #define STK_GOT 24
> #else
> #define STK_GOT 40
>
> In arch/powerpc/kernel/tm.S:
> _GLOBAL(tm_reclaim)
> mfcr r5
> mflr r0
> stw r5, 8(r1)
> std r0, 16(r1)
> std r2, STK_GOT(r1)
> ...
>
> So a disassemble on tm_reclaim, and extract the STK_GOT value from std
> instruction is used as the approach.
>
> After the patch:
> crash> bt 1
> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd"
> #0 [c0000000037737d0] __schedule at c00000000112e4ec
> #1 [c0000000037738b0] schedule at c00000000112ea80
> ...
>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> defs.h | 1 +
> ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 2231cb6..d5cb8cc 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t {
> #define MSR_PR_LG 14 /* Problem State / Privilege Level */
> /* Used to find the user or kernel-mode
> frame*/
>
> +#define STACK_SWITCH_FRAME_REGS 48
> #define STACK_FRAME_OVERHEAD 112
> #define EXCP_FRAME_MARKER 0x7265677368657265
>
> diff --git a/ppc64.c b/ppc64.c
> index e8930a1..6e5f155 100644
> --- a/ppc64.c
> +++ b/ppc64.c
> @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud);
> static ulong pmd_page_vaddr_l4(ulong pmd);
> static int is_opal_context(ulong sp, ulong nip);
> void opalmsg(void);
> +static bool is_ppc64_elf_abi_v2(void);
>
> static int is_opal_context(ulong sp, ulong nip)
> {
> @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task)
> return sp;
> }
>
> +static bool
> +is_ppc64_elf_abi_v2(void)
> +{
> + char buf1[BUFSIZE];
> + char *pos1, *pos2;
> + int errflag = 0;
> + ulong stk_got = 0;
> + static bool ret = false;
> + static bool checked = false;
> +
> + if (checked == true || !symbol_exists("tm_reclaim"))
> + return ret;
> +
> + sprintf(buf1, "x/16i tm_reclaim");
> + open_tmpfile();
> + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR))
> + goto out;
> + checked = true;
> + rewind(pc->tmpfile);
> + while (fgets(buf1, BUFSIZE, pc->tmpfile)) {
> + // "std r2, STK_GOT(r1)" is expected
> + if (strstr(buf1, "std") &&
> + strstr(buf1, "(r1)") &&
> + (pos1 = strstr(buf1, "r2,"))) {
> + pos1 += strlen("r2,");
> + for (pos2 = pos1; *pos2 != '\0' && *pos2 != '(';
> pos2++);
> + *pos2 = '\0';
> + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET,
> &errflag);
> + break;
> + }
> + }
> +
> + if (!errflag) {
> + switch (stk_got) {
> + case 24:
> + ret = true;
> + case 40:
> + goto out;
> + }
> + }
> + error(WARNING, "Unstable elf_abi v1/v2 detection.\n");
> +out:
> + close_tmpfile();
> + return ret;
> +}
>
> /*
> * get the SP and PC values for idle tasks.
> @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc,
> ulong *getsp)
> sp = ppc64_get_sp(task);
> if (!INSTACK(sp, bt))
> goto out;
> - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
> - sizeof(struct ppc64_pt_regs),
> - "PPC64 pt_regs", FAULT_ON_ERROR);
> +
> + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) {
> + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s,
> + sizeof(struct ppc64_pt_regs),
> + "PPC64 pt_regs", FAULT_ON_ERROR);
> + } else {
> + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s,
> + sizeof(struct ppc64_pt_regs),
> + "PPC64 pt_regs", FAULT_ON_ERROR);
> + }
> +
> ip = regs.nip;
> closest = closest_symbol(ip);
> if (STREQ(closest, ".__switch_to") || STREQ(closest,
> "__switch_to")) {
> --
> 2.40.1
>
4 days, 1 hour
Re: [PATCH v7 00/15] gdb stack unwinding support for crash utility
by lijiang
Hi, Tao
Thank you for the update.
The following patch is a regression issue, so I tend to discuss it as a
separate patch.
[PATCH v7 01/15] Fix the regression of cpumask_t for xen hyper
In addition, I found another issue in my tests(on ppc64le), the gdb bt can
display the back trace for the panic task, but when I switch to another
task, the gdb bt can not display the back trace:
crash> gdb bt
#0 0xc0000000002bde04 in crash_setup_regs (newregs=0xc00000003264b858,
oldregs=0x0) at ./arch/powerpc/include/asm/kexec.h:133
#1 0xc0000000002be4f8 in __crash_kexec (regs=0x0) at
kernel/crash_core.c:122
#2 0xc00000000016c254 in panic (fmt=0xc0000000015eef20 "sysrq triggered
crash\n") at kernel/panic.c:373
#3 0xc000000000a708b8 in sysrq_handle_crash (key=<optimized out>) at
drivers/tty/sysrq.c:154
#4 0xc000000000a713d4 in __handle_sysrq (key=key@entry=99 'c',
check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:612
#5 0xc000000000a71e94 in write_sysrq_trigger (file=<optimized out>,
buf=<optimized out>, count=2, ppos=<optimized out>) at
drivers/tty/sysrq.c:1181
#6 0xc00000000073260c in pde_write (pde=0xc00000000af9cc00,
file=<optimized out>, buf=<optimized out>, count=<optimized out>,
ppos=<optimized out>) at fs/proc/inode.c:334
#7 proc_reg_write (file=<optimized out>, buf=<optimized out>,
count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:346
#8 0xc00000000063c0e0 in vfs_write (file=0xc0000000092d2900,
buf=0x10012536f60 <error: Cannot access memory at address 0x10012536f60>,
count=2, pos=0xc00000003264bd30) at fs/read_write.c:588
#9 vfs_write (file=0xc0000000092d2900, buf=0x10012536f60 <error: Cannot
access memory at address 0x10012536f60>, count=<optimized out>,
pos=0xc00000003264bd30) at fs/read_write.c:570
#10 0xc00000000063c690 in ksys_write (fd=<optimized out>, buf=0x10012536f60
<error: Cannot access memory at address 0x10012536f60>, count=2) at
fs/read_write.c:643
#11 0xc000000000031a28 in system_call_exception (regs=0xc00000003264be80,
r0=<optimized out>) at arch/powerpc/kernel/syscall.c:153
#12 0xc00000000000d05c in system_call_vectored_common () at
arch/powerpc/kernel/interrupt_64.S:198
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
0 0 0 c000000002bda980 RU 0.0 0 0
[swapper/0]
> 0 0 1 c000000003864c80 RU 0.0 0 0
[swapper/1]
...
8017 923 0 c000000043a20000 IN 0.2 22528 16256
sshd-session
8025 8017 6 c000000032271880 IN 0.1 22784 11840
sshd-session
> 8026 8025 0 c000000043a26600 RU 0.1 9664 6208 bash
...
11645 2 3 c000000032264c80 ID 0.0 0 0
[kworker/u32:2]
11738 6188 2 c00000003811b180 IN 0.1 43520 9408 pickup
12326 2 0 c00000003226b280 ID 0.0 0 0
[kworker/0:1]
13112 6089 2 c00000000c809900 IN 0.0 7232 3456 sleep
Let's take the "pickup" task as an example:
crash> set 11738
PID: 11738
COMMAND: "pickup"
TASK: c00000003811b180 [THREAD_INFO: c00000003811b180]
CPU: 2
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 0xc0000000a7f876a0 in ?? ()
gdb: gdb request failed: bt
crash> set gdb on
gdb: on
gdb> bt
#0 0xc0000000a7f876a0 in ?? ()
gdb>
Anyway, I did the same test on x86 64 and aarch64, it can work well as
expected. Can you help to double check on ppc64 architecture?
X86 64:
crash> set 14599
PID: 14599
COMMAND: "pickup"
TASK: ffff8f57a0d7c180 [THREAD_INFO: ffff8f57a0d7c180]
CPU: 41
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 0xffffffff8b3efe29 in context_switch (rq=0xffff8f6f1f835900,
prev=0xffff8f57a0d7c180, next=0xffff8f5786720000, rf=0xffff9df22fea7b80) at
kernel/sched/core.c:5208
#1 __schedule (sched_mode=sched_mode@entry=0) at kernel/sched/core.c:6549
#2 0xffffffff8b3f0217 in __schedule_loop (sched_mode=<optimized out>) at
kernel/sched/core.c:6626
#3 schedule () at kernel/sched/core.c:6641
#4 0xffffffff8b3f6eef in schedule_hrtimeout_range_clock
(expires=expires@entry=0xffff9df22fea7cb0, delta=<optimized out>,
delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS,
clock_id=clock_id@entry=1) at kernel/time/hrtimer.c:2293
#5 0xffffffff8b3f7003 in schedule_hrtimeout_range
(expires=expires@entry=0xffff9df22fea7cb0,
delta=delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS) at
kernel/time/hrtimer.c:2340
#6 0xffffffff8aae301c in ep_poll (ep=0xffff8f5790d15d40,
events=events@entry=0x7ffea91b6b90, maxevents=maxevents@entry=100,
timeout=timeout@entry=0xffff9df22fea7d58) at fs/eventpoll.c:2062
#7 0xffffffff8aae3138 in do_epoll_wait (epfd=epfd@entry=8,
events=events@entry=0x7ffea91b6b90, maxevents=maxevents@entry=100,
to=0xffff9df22fea7d58) at fs/eventpoll.c:2464
#8 0xffffffff8aae44a1 in __do_sys_epoll_wait (epfd=<optimized out>,
events=0x7ffea91b6b90, maxevents=<optimized out>, timeout=<optimized out>)
at fs/eventpoll.c:2476
#9 __se_sys_epoll_wait (epfd=<optimized out>, events=<optimized out>,
maxevents=<optimized out>, timeout=<optimized out>) at fs/eventpoll.c:2471
#10 __x64_sys_epoll_wait (regs=<optimized out>) at fs/eventpoll.c:2471
#11 0xffffffff8b3e293d in do_syscall_x64 (regs=0xffff9df22fea7f48, nr=232)
at arch/x86/entry/common.c:52
#12 do_syscall_64 (regs=0xffff9df22fea7f48, nr=232) at
arch/x86/entry/common.c:83
#13 0xffffffff8b40012f in entry_SYSCALL_64 () at
arch/x86/entry/entry_64.S:121
crash>
aarch64:
crash> set 9338
PID: 9338
COMMAND: "pickup"
TASK: ffff0000c7b05400 [THREAD_INFO: ffff0000c7b05400]
CPU: 3
STATE: TASK_INTERRUPTIBLE
crash> gdb bt
#0 __switch_to (prev=<unavailable>, prev@entry=0xffff0000c7b05400,
next=next@entry=<unavailable>) at arch/arm64/kernel/process.c:555
#1 0xffffafc5b5ebd744 in context_switch (rq=0xffff00077bbd0ec0,
prev=0xffff0000c7b05400, next=<unavailable>, rf=0xffff80008ac63a60) at
kernel/sched/core.c:5208
#2 __schedule (sched_mode=sched_mode@entry=0) at kernel/sched/core.c:6549
#3 0xffffafc5b5ebdc2c in __schedule_loop (sched_mode=<optimized out>) at
kernel/sched/core.c:6626
#4 schedule () at kernel/sched/core.c:6641
#5 0xffffafc5b5ec6030 in schedule_hrtimeout_range_clock
(expires=expires@entry=0xffff80008ac63be8, delta=delta@entry=99999999,
mode=mode@entry=HRTIMER_MODE_ABS, clock_id=clock_id@entry=1) at
kernel/time/hrtimer.c:2293
#6 0xffffafc5b5ec618c in schedule_hrtimeout_range
(expires=expires@entry=0xffff80008ac63be8,
delta=delta@entry=99999999, mode=mode@entry=HRTIMER_MODE_ABS) at
kernel/time/hrtimer.c:2340
#7 0xffffafc5b545d33c in ep_poll (ep=<unavailable>,
events=events@entry=0xffffde5c3f68,
maxevents=maxevents@entry=100, timeout=timeout@entry=0xffff80008ac63ce0) at
fs/eventpoll.c:2062
#8 0xffffafc5b545d4e4 in do_epoll_wait (epfd=epfd@entry=8,
events=events@entry=0xffffde5c3f68, maxevents=maxevents@entry=100,
to=to@entry=0xffff80008ac63ce0) at fs/eventpoll.c:2464
#9 0xffffafc5b545d534 in do_epoll_pwait (epfd=epfd@entry=8,
events=events@entry=0xffffde5c3f68, maxevents=maxevents@entry=100,
to=to@entry=0xffff80008ac63ce0, sigsetsize=<optimized out>,
sigmask=<optimized out>) at fs/eventpoll.c:2498
#10 0xffffafc5b545e7c8 in do_epoll_pwait (epfd=8, events=0xffffde5c3f68,
maxevents=100, to=0xffff80008ac63ce0, sigmask=<optimized out>,
sigsetsize=<optimized out>) at fs/eventpoll.c:2495
#11 __do_sys_epoll_pwait (epfd=8, events=0xffffde5c3f68, maxevents=100,
timeout=<optimized out>, sigmask=<optimized out>, sigsetsize=<optimized
out>) at fs/eventpoll.c:2511
#12 __se_sys_epoll_pwait (epfd=8, events=281474412330856, maxevents=100,
timeout=<optimized out>, sigmask=<optimized out>, sigsetsize=<optimized
out>) at fs/eventpoll.c:2505
#13 __arm64_sys_epoll_pwait (regs=<optimized out>) at fs/eventpoll.c:2505
#14 0xffffafc5b4fa99bc in __invoke_syscall (regs=0xffff80008ac63eb0,
syscall_fn=<optimized out>) at arch/arm64/kernel/syscall.c:35
#15 invoke_syscall (regs=regs@entry=0xffff80008ac63eb0, scno=<optimized
out>, sc_nr=sc_nr@entry=463, syscall_table=<optimized out>) at
arch/arm64/kernel/syscall.c:49
#16 0xffffafc5b4fa9ac8 in el0_svc_common (sc_nr=463,
syscall_table=<optimized out>, regs=0xffff80008ac63eb0, scno=<optimized
out>) at arch/arm64/kernel/syscall.c:132
#17 do_el0_svc (regs=regs@entry=0xffff80008ac63eb0) at
arch/arm64/kernel/syscall.c:151
#18 0xffffafc5b5eb6fa4 in el0_svc (regs=0xffff80008ac63eb0) at
arch/arm64/kernel/entry-common.c:712
#19 0xffffafc5b5eb74c0 in el0t_64_sync_handler (regs=<optimized out>) at
arch/arm64/kernel/entry-common.c:730
#20 0xffffafc5b4f91634 in el0t_64_sync () at arch/arm64/kernel/entry.S:598
crash>
BTW: other changes are fine to me.
Thanks
Lianbo
On Wed, Sep 4, 2024 at 3:54 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Wed, 4 Sep 2024 19:49:25 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v7 00/15] gdb stack unwinding support
> for crash utility
> To: devel(a)lists.crash-utility.osci.io
> Cc: Tao Liu <ltao(a)redhat.com>
> Message-ID: <20240904074940.21331-1-ltao(a)redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> This patchset is a rebase/merged version of the following 3 patchsets:
>
> 1): [PATCH v10 0/5] Improve stack unwind on ppc64 [1]
> 2): [PATCH 0/5] x86_64 gdb stack unwinding support [2]
> 3): Clean up on top of one-thread-v2 [3]
>
> A complete description of gdb stack unwinding support for crash can be
> found in [1].
>
> This patchset can be divided into the following 3 parts:
>
> 1) part1: preparations before stack unwinding support, some
> bugs/regressions found when drafting this patchset.
> 2) part2: common part for all CPU archs, mainly dealing with
> crash_target.c/gdb_interface.c files, in order to
> support different archs.
> 3) part3: arch specific, for each ppc64/x86_64/arm64/vmware
> stack unwinding support.
>
> === part 3
> arm64: Add gdb stack unwinding support
> vmware_guestdump: Various format versions support
> x86_64: Add gdb stack unwinding support
> ppc64: correct gdb passthroughs by implementing
> machdep->get_current_task_reg
>
> === part 2
> Conditionally output gdb stack unwinding stop reasons
> Stop stack unwinding at non-kernel address
> Print task pid/command instead of CPU index
> Rename get_cpu_reg to get_current_task_reg
> Let crash change gdb context
> Leave only one gdb thread for crash
> Remove 'frame' from prohibited commands list
>
> === part 1
> Fix gdb_interface: restore gdb's output streams at end of gdb_interface
> x86_64: Fix invalid input "=>" for bt command
> Fix cpumask_t recursive dependence issue
> Fix the regression of cpumask_t for xen hyper
> ===
>
> v7 -> v6:
> 1) Reorganise the patchset, re-divided them into 3 part against the
> previous 2 parts.
> 2) Re-dealed with the cpumask_t part, which solved the comment No.4
> pointed out by lianbo in [4].
> 3) Add conditional output for the failing message of gdb stack unwinding.
> see [PATCH 11/15] Conditionally output gdb stack unwinding stop reasons
> 4) Redraft the commit messages, updated some outdated info.
> 5) Merged "Let crash change gdb context" and "set_context(): check if
> context is already current" into one.
>
> [4]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01067.html
>
> v6 -> v5:
> 1) Refactor patch 4 & 9, which changed the function signature of struct
> get_cpu_reg/get_current_task_reg, and let each patch compile with no
> error when added on.
> 2) Rebased the patchset on top of latest upstream:
> ("79b93ecb2e72ec Fix a "Bus error" issue caused by 'crash --osrelease'
> or
> crash loading")
>
> v5 -> v4:
> 1) Plenty of code refactoring based on Lianbo's comments on v4.
> 2) Removed the magic number when dealing with regs bitmap, see [6].
> 3) Rebased the patchset on top of latest upstream:
> ("1c6da3eaff8207 arm64: Fix bt command show wrong stacktrace on ramdump
> source")
>
> v4 -> v3:
> Fixed the author issue in [PATCH v3 06/16] Fix gdb_interface: restore gdb's
> output streams at end of gdb_interface.
>
> v3 -> v2:
> 1) Updated CC list as pointed out in [4]
> 2) Compiling issues as in [5]
>
> v2 -> v1:
> 1) Added the patch: x86_64: Fix invalid input "=>" for bt command,
> thanks for Kazu's testing.
> 2) Modify the patch: x86_64: Add gdb stack unwinding support, added the
> pcp_save, spp_save and sp, for restoring the value in match of the
> original
> code logic.
>
> [1]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00469.html
> [2]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00488.html
> [3]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00554.html
> [4]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00681.html
> [5]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00715.html
> [6]:
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00819.html
>
> Aditya Gupta (3):
> Fix gdb_interface: restore gdb's output streams at end of
> gdb_interface
> Remove 'frame' from prohibited commands list
> ppc64: correct gdb passthroughs by implementing
> machdep->get_current_task_reg
>
> Alexey Makhalov (1):
> vmware_guestdump: Various format versions support
>
> Tao Liu (11):
> Fix the regression of cpumask_t for xen hyper
> Fix cpumask_t recursive dependence issue
> x86_64: Fix invalid input "=>" for bt command
> Leave only one gdb thread for crash
> Let crash change gdb context
> Rename get_cpu_reg to get_current_task_reg
> Print task pid/command instead of CPU index
> Stop stack unwinding at non-kernel address
> Conditionally output gdb stack unwinding stop reasons
> x86_64: Add gdb stack unwinding support
> arm64: Add gdb stack unwinding support
>
> arm64.c | 120 +++++++++++++++--
> crash_target.c | 71 ++++++----
> defs.h | 194 ++++++++++++++++++++++++++-
> gdb-10.2.patch | 96 ++++++++++++++
> gdb_interface.c | 39 ++----
> kernel.c | 63 +++++++--
> ppc64.c | 174 +++++++++++++++++++++++-
> symbols.c | 15 +++
> task.c | 34 +++--
> tools.c | 16 ++-
> unwind_x86_64.h | 4 -
> vmware_guestdump.c | 321 +++++++++++++++++++++++++++++++-------------
> x86_64.c | 323 ++++++++++++++++++++++++++++++++++++++++-----
> 13 files changed, 1247 insertions(+), 223 deletions(-)
>
> --
> 2.40.1
>
1 week
Re: [PATCH] X86 64: fix the method for determining whether to enable kalsr
by lijiang
On Tue, Sep 24, 2024 at 11:42 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Tue, 24 Sep 2024 11:32:47 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] Re: [PATCH] X86 64: fix the method for
> determining whether to enable kalsr
> To: 1127955419(a)qq.com
> Cc: devel(a)lists.crash-utility.osci.io, Zach Wade
> <zachwade.k(a)gmail.com>
> Message-ID:
> <CAO7dBbWEmV29R-KJeoJ6=
> Nq0u3EBa6ua-qyOAA20h4pE9q+THA(a)mail.gmail.com>
> Content-Type: text/plain; charset="UTF-8"
>
> Hi XingYang,
>
> On Sun, Sep 22, 2024 at 5:16 AM <1127955419(a)qq.com> wrote:
> >
> > From: Li XingYang <1127955419(a)qq.com>
> >
> > The recently commit 6752571d8d78 fixed the issue where linux kernel with
> > 223b5e57d0d5 ("mm/execmem, arch: convert remaining overrides of
> module_alloc to execmem")
> > could not load crash, but it did not work in the following two
> situations:
> > 1: Kernel enables KASAN
> > 2: The kernel set CONFIG_RANDOMIZE_BASE but not set
> CONFIG_RANDOMIZE_MEMORY
> >
> > crash: seek error: kernel virtual address: ffffffff826bb418 type:
> "page_offset_base"
> >
>
> Thanks for the fix, looking good to me, ack.
>
>
Applied:
https://github.com/crash-utility/crash/commit/7b5c8bca7d05b72b252756ff902...
Thanks
Lianbo
> Thanks,
> Tao Liu
>
> > In both cases, kaslr_regions will not be exported in /proc/kallsyms,
> > but kaslr_get_random_long will still be exported in /proc/kallsyms.
> > So use kaslr_get_random_long instead of kaslr_degions
> > Determine whether to enable kalsr
> >
> > Signed-off-by: Li XingYang <1127955419(a)qq.com>
> > Signed-off-by: Zach Wade <zachwade.k(a)gmail.com>
> > ---
> > symbols.c | 8 ++++----
> > 1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/symbols.c b/symbols.c
> > index 69a1fbb..02359a4 100644
> > --- a/symbols.c
> > +++ b/symbols.c
> > @@ -619,7 +619,7 @@ strip_symbol_end(const char *name, char *buf)
> > * or in /proc/kallsyms on a live system.
> > *
> > * Setting KASLR_CHECK will trigger a search for "module_load_offset"
> > - * or "kaslr_regions" during the initial symbol sort operation, and
> > + * or "kaslr_get_random_long" during the initial symbol sort
> operation, and
> > * if found, will set (RELOC_AUTO|KASLR). On live systems, the search
> > * is done here by checking /proc/kallsyms.
> > */
> > @@ -646,7 +646,7 @@ kaslr_init(void)
> > st->_stext_vmlinux = UNINITIALIZED;
> >
> > if (ACTIVE() && /* Linux 3.15 */
> > - ((symbol_value_from_proc_kallsyms("kaslr_regions") !=
> BADVAL) ||
> > + ((symbol_value_from_proc_kallsyms("kaslr_get_random_long")
> != BADVAL) ||
> > (symbol_value_from_proc_kallsyms("module_load_offset") !=
> BADVAL))) {
> > kt->flags2 |= (RELOC_AUTO|KASLR);
> > st->_stext_vmlinux = UNINITIALIZED;
> > @@ -14253,8 +14253,8 @@ numeric_forward(const void *P_x, const void *P_y)
> > st->_stext_vmlinux = valueof(y);
> > }
> > if (kt->flags2 & KASLR_CHECK) {
> > - if (STREQ(x->name, "kaslr_regions") ||
> > - STREQ(y->name, "kaslr_regions") ||
> > + if (STREQ(x->name, "kaslr_get_random_long") ||
> > + STREQ(y->name, "kaslr_get_random_long") ||
> > STREQ(x->name, "module_load_offset") ||
> > STREQ(y->name, "module_load_offset")) {
> > kt->flags2 &= ~KASLR_CHECK;
> > --
> > 2.46.1
> >
>
1 week, 6 days
Re: [PATCH v3] kmem: fix the determination for slab page
by lijiang
On Tue, Sep 24, 2024 at 11:42 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Tue, 24 Sep 2024 15:30:00 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] Re: [PATCH v3] kmem: fix the determination
> for slab page
> To: lijiang <lijiang(a)redhat.com>
> Cc: devel(a)lists.crash-utility.osci.io
> Message-ID:
> <CAO7dBbVf-GeGZXs011x3LEf_c=
> 0L7jGq8RRv5QRgsN_MEF2UqQ(a)mail.gmail.com>
> Content-Type: text/plain; charset="UTF-8"
>
> Hi lianbo & qiwu,
>
> For the v3, LGTM, so ack.
>
Applied:
https://github.com/crash-utility/crash/commit/9babe985a7eb001ec398a3734c1...
Thanks
Lianbo
>
> Thanks,
> Tao Liu
>
>
> On Fri, Sep 20, 2024 at 6:55 PM lijiang <lijiang(a)redhat.com> wrote:
> >
> > On Fri, Sep 20, 2024 at 9:30 AM <
> devel-request(a)lists.crash-utility.osci.io> wrote:
> >>
> >> Date: Fri, 20 Sep 2024 01:28:32 -0000
> >> From: qiwu.chen(a)transsion.com
> >> Subject: [Crash-utility] [PATCH v3] kmem: fix the determination for
> >> slab page
> >> To: devel(a)lists.crash-utility.osci.io
> >> Message-ID: <20240920012832.29184.28326(a)lists.crash-utility.osci.io>
> >> Content-Type: text/plain; charset="utf-8"
> >>
> >> The determination for a slab page has changed due to changing
> >> PG_slab from a page flag to a page type since kernel commit
> >> 46df8e73a4a3.
> >>
> >> Before apply this patch:
> >> crash> kmem -s ffff000002aa4100
> >> kmem: address is not allocated in slab subsystem: ffff000002aa4100
> >>
> >> After apply this patch:
> >> crash> kmem -s ffff000002aa4100
> >> CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
> >> ffff00000140f900 4096 94 126 18 32k
> task_struct
> >> SLAB MEMORY NODE TOTAL ALLOCATED FREE
> >> fffffdffc00aa800 ffff000002aa0000 0 7 5 2
> >> FREE / [ALLOCATED]
> >> [ffff000002aa4100]
> >>
> >> Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
> >> ---
> >> defs.h | 7 ++++++
> >> memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++---------
> >> symbols.c | 2 ++
> >> 3 files changed, 66 insertions(+), 11 deletions(-)
> >>
> >
> > Thank you for the update, qiwu.
> >
> > I have no other issues, for the v3: Ack
> >
> > Thanks
> > Lianbo
> >
> >>
> >> diff --git a/defs.h b/defs.h
> >> index 2231cb6..e2a9278 100644
> >> --- a/defs.h
> >> +++ b/defs.h
> >> @@ -2243,6 +2243,7 @@ struct offset_table { /* stash
> of commonly-used offsets */
> >> long vmap_node_busy;
> >> long rb_list_head;
> >> long file_f_inode;
> >> + long page_page_type;
> >> };
> >>
> >> struct size_table { /* stash of commonly-used sizes */
> >> @@ -2651,6 +2652,7 @@ struct vm_table { /* kernel
> VM-related data */
> >> ulong max_mem_section_nr;
> >> ulong zero_paddr;
> >> ulong huge_zero_paddr;
> >> + uint page_type_base;
> >> };
> >>
> >> #define NODES (0x1)
> >> @@ -2684,6 +2686,11 @@ struct vm_table { /* kernel
> VM-related data */
> >> #define SLAB_CPU_CACHE (0x10000000)
> >> #define SLAB_ROOT_CACHES (0x20000000)
> >> #define USE_VMAP_NODES (0x40000000)
> >> +/*
> >> + * The SLAB_PAGEFLAGS flag is introduced to detect the change of
> >> + * PG_slab's type from a page flag to a page type.
> >> + */
> >> +#define SLAB_PAGEFLAGS (0x80000000)
> >>
> >> #define IS_FLATMEM() (vt->flags & FLATMEM)
> >> #define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
> >> diff --git a/memory.c b/memory.c
> >> index 967a9cf..8befe8c 100644
> >> --- a/memory.c
> >> +++ b/memory.c
> >> @@ -351,6 +351,43 @@ static ulong handle_each_vm_area(struct
> handle_each_vm_area_args *);
> >>
> >> static ulong DISPLAY_DEFAULT;
> >>
> >> +/*
> >> + * Before kernel commit ff202303c398e, the value is defined as a
> macro, so copy it here;
> >> + * After this commit, the value is defined as an enum, which can be
> evaluated at runtime.
> >> + */
> >> +#define PAGE_TYPE_BASE 0xf0000000
> >> +#define PageType(page_type, flag)
> \
> >> + ((page_type & (vt->page_type_base | flag)) ==
> vt->page_type_base)
> >> +
> >> +static void page_type_init(void)
> >> +{
> >> + if (!enumerator_value("PAGE_TYPE_BASE", (long
> *)&vt->page_type_base))
> >> + vt->page_type_base = PAGE_TYPE_BASE;
> >> +}
> >> +
> >> +/*
> >> + * The PG_slab's type has changed from a page flag to a page type
> >> + * since kernel commit 46df8e73a4a3.
> >> + */
> >> +static bool page_slab(ulong page, ulong flags)
> >> +{
> >> + if (vt->flags & SLAB_PAGEFLAGS) {
> >> + if ((flags >> vt->PG_slab) & 1)
> >> + return TRUE;
> >> + }
> >> +
> >> + if (VALID_MEMBER(page_page_type)) {
> >> + uint page_type;
> >> +
> >> + readmem(page+OFFSET(page_page_type), KVADDR, &page_type,
> >> + sizeof(page_type), "page_type", FAULT_ON_ERROR);
> >> + if (PageType(page_type, (uint)vt->PG_slab))
> >> + return TRUE;
> >> + }
> >> +
> >> + return FALSE;
> >> +}
> >> +
> >> /*
> >> * Verify that the sizeof the primitive types are reasonable.
> >> */
> >> @@ -504,6 +541,7 @@ vm_init(void)
> >> ANON_MEMBER_OFFSET_INIT(page_compound_head, "page",
> "compound_head");
> >> MEMBER_OFFSET_INIT(page_private, "page", "private");
> >> MEMBER_OFFSET_INIT(page_freelist, "page", "freelist");
> >> + MEMBER_OFFSET_INIT(page_page_type, "page", "page_type");
> >>
> >> MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd");
> >>
> >> @@ -1278,6 +1316,8 @@ vm_init(void)
> >>
> >> page_flags_init();
> >>
> >> + page_type_init();
> >> +
> >> rss_page_types_init();
> >>
> >> vt->flags |= VM_INIT;
> >> @@ -5931,7 +5971,7 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
> >> if ((flags >> v22_PG_Slab) & 1)
> >> slabs++;
> >> } else if (vt->PG_slab) {
> >> - if ((flags >> vt->PG_slab) & 1)
> >> + if (page_slab(pp, flags))
> >> slabs++;
> >> } else {
> >> if ((flags >> v24_PG_slab) & 1)
> >> @@ -6381,7 +6421,7 @@ dump_mem_map(struct meminfo *mi)
> >> if ((flags >> v22_PG_Slab) & 1)
> >> slabs++;
> >> } else if (vt->PG_slab) {
> >> - if ((flags >> vt->PG_slab) & 1)
> >> + if (page_slab(pp, flags))
> >> slabs++;
> >> } else {
> >> if ((flags >> v24_PG_slab) & 1)
> >> @@ -6694,7 +6734,6 @@ dump_hstates()
> >> FREEBUF(hstate);
> >> }
> >>
> >> -
> >> static void
> >> page_flags_init(void)
> >> {
> >> @@ -6775,6 +6814,9 @@ page_flags_init_from_pageflag_names(void)
> >> vt->pageflags_data[i].name = nameptr;
> >> vt->pageflags_data[i].mask = mask;
> >>
> >> + if (!strncmp(nameptr, "slab", 4))
> >> + vt->flags |= SLAB_PAGEFLAGS;
> >> +
> >> if (CRASHDEBUG(1)) {
> >> fprintf(fp, " %08lx %s\n",
> >> vt->pageflags_data[i].mask,
> >> @@ -6835,8 +6877,9 @@ page_flags_init_from_pageflags_enum(void)
> >> }
> >> strcpy(nameptr, arglist[0] + strlen("PG_"));
> >> vt->pageflags_data[p].name = nameptr;
> >> - vt->pageflags_data[p].mask = 1 <<
> atoi(arglist[2]);
> >> -
> >> + vt->pageflags_data[p].mask = 1 <<
> atoi(arglist[2]);
> >> + if (!strncmp(nameptr, "slab", 4))
> >> + vt->flags |= SLAB_PAGEFLAGS;
> >> p++;
> >> }
> >> } else
> >> @@ -9736,14 +9779,14 @@ vaddr_to_kmem_cache(ulong vaddr, char *buf, int
> verbose)
> >> readmem(page+OFFSET(page_flags), KVADDR,
> >> &page_flags, sizeof(ulong), "page.flags",
> >> FAULT_ON_ERROR);
> >> - if (!(page_flags & (1 << vt->PG_slab))) {
> >> + if (!page_slab(page, page_flags)) {
> >> if (((vt->flags & KMALLOC_SLUB) ||
> VALID_MEMBER(page_compound_head)) ||
> >> ((vt->flags & KMALLOC_COMMON) &&
> >> VALID_MEMBER(page_slab) &&
> VALID_MEMBER(page_first_page))) {
> >>
> readmem(compound_head(page)+OFFSET(page_flags), KVADDR,
> >> &page_flags, sizeof(ulong),
> "page.flags",
> >> FAULT_ON_ERROR);
> >> - if (!(page_flags & (1 << vt->PG_slab)))
> >> + if (!page_slab(compound_head(page),
> page_flags))
> >> return NULL;
> >> } else
> >> return NULL;
> >> @@ -14108,6 +14151,8 @@ dump_vm_table(int verbose)
> >> fprintf(fp, "%sNODELISTS_IS_PTR", others++ ? "|" : "");\
> >> if (vt->flags & VM_INIT)
> >> fprintf(fp, "%sVM_INIT", others++ ? "|" : "");\
> >> + if (vt->flags & SLAB_PAGEFLAGS)
> >> + fprintf(fp, "%sSLAB_PAGEFLAGS", others++ ? "|" : "");\
> >>
> >> fprintf(fp, ")\n");
> >> if (vt->kernel_pgd[0] == vt->kernel_pgd[1])
> >> @@ -14237,6 +14282,7 @@ dump_vm_table(int verbose)
> >> vt->pageflags_data[i].mask,
> >> vt->pageflags_data[i].name);
> >> }
> >> + fprintf(fp, " page_type_base: %x\n", vt->page_type_base);
> >>
> >> dump_vma_cache(VERBOSE);
> >> }
> >> @@ -20195,7 +20241,7 @@ char *
> >> is_slab_page(struct meminfo *si, char *buf)
> >> {
> >> int i, cnt;
> >> - ulong page_slab, page_flags, name;
> >> + ulong pg_slab, page_flags, name;
> >> ulong *cache_list;
> >> char *retval;
> >>
> >> @@ -20210,11 +20256,11 @@ is_slab_page(struct meminfo *si, char *buf)
> >> RETURN_ON_ERROR|QUIET))
> >> return NULL;
> >>
> >> - if (!(page_flags & (1 << vt->PG_slab)))
> >> + if (!page_slab(si->spec_addr, page_flags))
> >> return NULL;
> >>
> >> if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
> >> - &page_slab, sizeof(ulong), "page.slab",
> >> + &pg_slab, sizeof(ulong), "page.slab",
> >> RETURN_ON_ERROR|QUIET))
> >> return NULL;
> >>
> >> @@ -20222,7 +20268,7 @@ is_slab_page(struct meminfo *si, char *buf)
> >> cnt = get_kmem_cache_list(&cache_list);
> >>
> >> for (i = 0; i < cnt; i++) {
> >> - if (page_slab == cache_list[i]) {
> >> + if (pg_slab == cache_list[i]) {
> >> if (!readmem(cache_list[i] +
> OFFSET(kmem_cache_name),
> >> KVADDR, &name, sizeof(char *),
> >> "kmem_cache.name", QUIET|RETURN_ON_ERROR))
> >> diff --git a/symbols.c b/symbols.c
> >> index 69a1fbb..014cd29 100644
> >> --- a/symbols.c
> >> +++ b/symbols.c
> >> @@ -10339,6 +10339,8 @@ dump_offset_table(char *spec, ulong makestruct)
> >> fprintf(fp, " page_compound_head: %ld\n",
> >> OFFSET(page_compound_head));
> >> fprintf(fp, " page_private: %ld\n",
> OFFSET(page_private));
> >> + fprintf(fp, " page_page_type: %ld\n",
> >> + OFFSET(page_page_type));
> >>
> >> fprintf(fp, " trace_print_flags_mask: %ld\n",
> >> OFFSET(trace_print_flags_mask));
> >> --
> >> 2.25.1
>
2 weeks
Re: [PATCH v3] kmem: fix the determination for slab page
by lijiang
On Fri, Sep 20, 2024 at 9:30 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Fri, 20 Sep 2024 01:28:32 -0000
> From: qiwu.chen(a)transsion.com
> Subject: [Crash-utility] [PATCH v3] kmem: fix the determination for
> slab page
> To: devel(a)lists.crash-utility.osci.io
> Message-ID: <20240920012832.29184.28326(a)lists.crash-utility.osci.io>
> Content-Type: text/plain; charset="utf-8"
>
> The determination for a slab page has changed due to changing
> PG_slab from a page flag to a page type since kernel commit
> 46df8e73a4a3.
>
> Before apply this patch:
> crash> kmem -s ffff000002aa4100
> kmem: address is not allocated in slab subsystem: ffff000002aa4100
>
> After apply this patch:
> crash> kmem -s ffff000002aa4100
> CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
> ffff00000140f900 4096 94 126 18 32k task_struct
> SLAB MEMORY NODE TOTAL ALLOCATED FREE
> fffffdffc00aa800 ffff000002aa0000 0 7 5 2
> FREE / [ALLOCATED]
> [ffff000002aa4100]
>
> Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
> ---
> defs.h | 7 ++++++
> memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++---------
> symbols.c | 2 ++
> 3 files changed, 66 insertions(+), 11 deletions(-)
>
>
Thank you for the update, qiwu.
I have no other issues, for the v3: Ack
Thanks
Lianbo
> diff --git a/defs.h b/defs.h
> index 2231cb6..e2a9278 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2243,6 +2243,7 @@ struct offset_table { /* stash of
> commonly-used offsets */
> long vmap_node_busy;
> long rb_list_head;
> long file_f_inode;
> + long page_page_type;
> };
>
> struct size_table { /* stash of commonly-used sizes */
> @@ -2651,6 +2652,7 @@ struct vm_table { /* kernel
> VM-related data */
> ulong max_mem_section_nr;
> ulong zero_paddr;
> ulong huge_zero_paddr;
> + uint page_type_base;
> };
>
> #define NODES (0x1)
> @@ -2684,6 +2686,11 @@ struct vm_table { /* kernel
> VM-related data */
> #define SLAB_CPU_CACHE (0x10000000)
> #define SLAB_ROOT_CACHES (0x20000000)
> #define USE_VMAP_NODES (0x40000000)
> +/*
> + * The SLAB_PAGEFLAGS flag is introduced to detect the change of
> + * PG_slab's type from a page flag to a page type.
> + */
> +#define SLAB_PAGEFLAGS (0x80000000)
>
> #define IS_FLATMEM() (vt->flags & FLATMEM)
> #define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
> diff --git a/memory.c b/memory.c
> index 967a9cf..8befe8c 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -351,6 +351,43 @@ static ulong handle_each_vm_area(struct
> handle_each_vm_area_args *);
>
> static ulong DISPLAY_DEFAULT;
>
> +/*
> + * Before kernel commit ff202303c398e, the value is defined as a macro,
> so copy it here;
> + * After this commit, the value is defined as an enum, which can be
> evaluated at runtime.
> + */
> +#define PAGE_TYPE_BASE 0xf0000000
> +#define PageType(page_type, flag)
> \
> + ((page_type & (vt->page_type_base | flag)) == vt->page_type_base)
> +
> +static void page_type_init(void)
> +{
> + if (!enumerator_value("PAGE_TYPE_BASE", (long
> *)&vt->page_type_base))
> + vt->page_type_base = PAGE_TYPE_BASE;
> +}
> +
> +/*
> + * The PG_slab's type has changed from a page flag to a page type
> + * since kernel commit 46df8e73a4a3.
> + */
> +static bool page_slab(ulong page, ulong flags)
> +{
> + if (vt->flags & SLAB_PAGEFLAGS) {
> + if ((flags >> vt->PG_slab) & 1)
> + return TRUE;
> + }
> +
> + if (VALID_MEMBER(page_page_type)) {
> + uint page_type;
> +
> + readmem(page+OFFSET(page_page_type), KVADDR, &page_type,
> + sizeof(page_type), "page_type", FAULT_ON_ERROR);
> + if (PageType(page_type, (uint)vt->PG_slab))
> + return TRUE;
> + }
> +
> + return FALSE;
> +}
> +
> /*
> * Verify that the sizeof the primitive types are reasonable.
> */
> @@ -504,6 +541,7 @@ vm_init(void)
> ANON_MEMBER_OFFSET_INIT(page_compound_head, "page",
> "compound_head");
> MEMBER_OFFSET_INIT(page_private, "page", "private");
> MEMBER_OFFSET_INIT(page_freelist, "page", "freelist");
> + MEMBER_OFFSET_INIT(page_page_type, "page", "page_type");
>
> MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd");
>
> @@ -1278,6 +1316,8 @@ vm_init(void)
>
> page_flags_init();
>
> + page_type_init();
> +
> rss_page_types_init();
>
> vt->flags |= VM_INIT;
> @@ -5931,7 +5971,7 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
> if ((flags >> v22_PG_Slab) & 1)
> slabs++;
> } else if (vt->PG_slab) {
> - if ((flags >> vt->PG_slab) & 1)
> + if (page_slab(pp, flags))
> slabs++;
> } else {
> if ((flags >> v24_PG_slab) & 1)
> @@ -6381,7 +6421,7 @@ dump_mem_map(struct meminfo *mi)
> if ((flags >> v22_PG_Slab) & 1)
> slabs++;
> } else if (vt->PG_slab) {
> - if ((flags >> vt->PG_slab) & 1)
> + if (page_slab(pp, flags))
> slabs++;
> } else {
> if ((flags >> v24_PG_slab) & 1)
> @@ -6694,7 +6734,6 @@ dump_hstates()
> FREEBUF(hstate);
> }
>
> -
> static void
> page_flags_init(void)
> {
> @@ -6775,6 +6814,9 @@ page_flags_init_from_pageflag_names(void)
> vt->pageflags_data[i].name = nameptr;
> vt->pageflags_data[i].mask = mask;
>
> + if (!strncmp(nameptr, "slab", 4))
> + vt->flags |= SLAB_PAGEFLAGS;
> +
> if (CRASHDEBUG(1)) {
> fprintf(fp, " %08lx %s\n",
> vt->pageflags_data[i].mask,
> @@ -6835,8 +6877,9 @@ page_flags_init_from_pageflags_enum(void)
> }
> strcpy(nameptr, arglist[0] + strlen("PG_"));
> vt->pageflags_data[p].name = nameptr;
> - vt->pageflags_data[p].mask = 1 <<
> atoi(arglist[2]);
> -
> + vt->pageflags_data[p].mask = 1 << atoi(arglist[2]);
> + if (!strncmp(nameptr, "slab", 4))
> + vt->flags |= SLAB_PAGEFLAGS;
> p++;
> }
> } else
> @@ -9736,14 +9779,14 @@ vaddr_to_kmem_cache(ulong vaddr, char *buf, int
> verbose)
> readmem(page+OFFSET(page_flags), KVADDR,
> &page_flags, sizeof(ulong), "page.flags",
> FAULT_ON_ERROR);
> - if (!(page_flags & (1 << vt->PG_slab))) {
> + if (!page_slab(page, page_flags)) {
> if (((vt->flags & KMALLOC_SLUB) ||
> VALID_MEMBER(page_compound_head)) ||
> ((vt->flags & KMALLOC_COMMON) &&
> VALID_MEMBER(page_slab) &&
> VALID_MEMBER(page_first_page))) {
>
> readmem(compound_head(page)+OFFSET(page_flags), KVADDR,
> &page_flags, sizeof(ulong),
> "page.flags",
> FAULT_ON_ERROR);
> - if (!(page_flags & (1 << vt->PG_slab)))
> + if (!page_slab(compound_head(page),
> page_flags))
> return NULL;
> } else
> return NULL;
> @@ -14108,6 +14151,8 @@ dump_vm_table(int verbose)
> fprintf(fp, "%sNODELISTS_IS_PTR", others++ ? "|" : "");\
> if (vt->flags & VM_INIT)
> fprintf(fp, "%sVM_INIT", others++ ? "|" : "");\
> + if (vt->flags & SLAB_PAGEFLAGS)
> + fprintf(fp, "%sSLAB_PAGEFLAGS", others++ ? "|" : "");\
>
> fprintf(fp, ")\n");
> if (vt->kernel_pgd[0] == vt->kernel_pgd[1])
> @@ -14237,6 +14282,7 @@ dump_vm_table(int verbose)
> vt->pageflags_data[i].mask,
> vt->pageflags_data[i].name);
> }
> + fprintf(fp, " page_type_base: %x\n", vt->page_type_base);
>
> dump_vma_cache(VERBOSE);
> }
> @@ -20195,7 +20241,7 @@ char *
> is_slab_page(struct meminfo *si, char *buf)
> {
> int i, cnt;
> - ulong page_slab, page_flags, name;
> + ulong pg_slab, page_flags, name;
> ulong *cache_list;
> char *retval;
>
> @@ -20210,11 +20256,11 @@ is_slab_page(struct meminfo *si, char *buf)
> RETURN_ON_ERROR|QUIET))
> return NULL;
>
> - if (!(page_flags & (1 << vt->PG_slab)))
> + if (!page_slab(si->spec_addr, page_flags))
> return NULL;
>
> if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
> - &page_slab, sizeof(ulong), "page.slab",
> + &pg_slab, sizeof(ulong), "page.slab",
> RETURN_ON_ERROR|QUIET))
> return NULL;
>
> @@ -20222,7 +20268,7 @@ is_slab_page(struct meminfo *si, char *buf)
> cnt = get_kmem_cache_list(&cache_list);
>
> for (i = 0; i < cnt; i++) {
> - if (page_slab == cache_list[i]) {
> + if (pg_slab == cache_list[i]) {
> if (!readmem(cache_list[i] +
> OFFSET(kmem_cache_name),
> KVADDR, &name, sizeof(char *),
> "kmem_cache.name", QUIET|RETURN_ON_ERROR))
> diff --git a/symbols.c b/symbols.c
> index 69a1fbb..014cd29 100644
> --- a/symbols.c
> +++ b/symbols.c
> @@ -10339,6 +10339,8 @@ dump_offset_table(char *spec, ulong makestruct)
> fprintf(fp, " page_compound_head: %ld\n",
> OFFSET(page_compound_head));
> fprintf(fp, " page_private: %ld\n",
> OFFSET(page_private));
> + fprintf(fp, " page_page_type: %ld\n",
> + OFFSET(page_page_type));
>
> fprintf(fp, " trace_print_flags_mask: %ld\n",
> OFFSET(trace_print_flags_mask));
> --
> 2.25.1
>
2 weeks
Re: [PATCH] X86 64: fix the method for determining whether to enable kalsr
by Tao Liu
Hi XingYang,
On Sun, Sep 22, 2024 at 5:16 AM <1127955419(a)qq.com> wrote:
>
> From: Li XingYang <1127955419(a)qq.com>
>
> The recently commit 6752571d8d78 fixed the issue where linux kernel with
> 223b5e57d0d5 ("mm/execmem, arch: convert remaining overrides of module_alloc to execmem")
> could not load crash, but it did not work in the following two situations:
> 1: Kernel enables KASAN
> 2: The kernel set CONFIG_RANDOMIZE_BASE but not set CONFIG_RANDOMIZE_MEMORY
>
> crash: seek error: kernel virtual address: ffffffff826bb418 type: "page_offset_base"
>
Thanks for the fix, looking good to me, ack.
Thanks,
Tao Liu
> In both cases, kaslr_regions will not be exported in /proc/kallsyms,
> but kaslr_get_random_long will still be exported in /proc/kallsyms.
> So use kaslr_get_random_long instead of kaslr_degions
> Determine whether to enable kalsr
>
> Signed-off-by: Li XingYang <1127955419(a)qq.com>
> Signed-off-by: Zach Wade <zachwade.k(a)gmail.com>
> ---
> symbols.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/symbols.c b/symbols.c
> index 69a1fbb..02359a4 100644
> --- a/symbols.c
> +++ b/symbols.c
> @@ -619,7 +619,7 @@ strip_symbol_end(const char *name, char *buf)
> * or in /proc/kallsyms on a live system.
> *
> * Setting KASLR_CHECK will trigger a search for "module_load_offset"
> - * or "kaslr_regions" during the initial symbol sort operation, and
> + * or "kaslr_get_random_long" during the initial symbol sort operation, and
> * if found, will set (RELOC_AUTO|KASLR). On live systems, the search
> * is done here by checking /proc/kallsyms.
> */
> @@ -646,7 +646,7 @@ kaslr_init(void)
> st->_stext_vmlinux = UNINITIALIZED;
>
> if (ACTIVE() && /* Linux 3.15 */
> - ((symbol_value_from_proc_kallsyms("kaslr_regions") != BADVAL) ||
> + ((symbol_value_from_proc_kallsyms("kaslr_get_random_long") != BADVAL) ||
> (symbol_value_from_proc_kallsyms("module_load_offset") != BADVAL))) {
> kt->flags2 |= (RELOC_AUTO|KASLR);
> st->_stext_vmlinux = UNINITIALIZED;
> @@ -14253,8 +14253,8 @@ numeric_forward(const void *P_x, const void *P_y)
> st->_stext_vmlinux = valueof(y);
> }
> if (kt->flags2 & KASLR_CHECK) {
> - if (STREQ(x->name, "kaslr_regions") ||
> - STREQ(y->name, "kaslr_regions") ||
> + if (STREQ(x->name, "kaslr_get_random_long") ||
> + STREQ(y->name, "kaslr_get_random_long") ||
> STREQ(x->name, "module_load_offset") ||
> STREQ(y->name, "module_load_offset")) {
> kt->flags2 &= ~KASLR_CHECK;
> --
> 2.46.1
>
2 weeks
Re: [PATCH] X86 64: fix the method for determining whether to enable kalsr
by lijiang
Thank you for the patch, XingYang.
On Sun, Sep 22, 2024 at 1:08 AM <root(a)lists.crash-utility.osci.io> wrote:
> From: 1127955419(a)qq.com
> To: devel(a)lists.crash-utility.osci.io, ltao(a)redhat.com
> Cc: Li XingYang <1127955419(a)qq.com>, Zach Wade <zachwade.k(a)gmail.com>
> Bcc:
> Date: Sun, 22 Sep 2024 01:00:29 +0800
> Subject: [PATCH] X86 64: fix the method for determining whether to enable
> kalsr
> From: Li XingYang <1127955419(a)qq.com>
>
> The recently commit 6752571d8d78 fixed the issue where linux kernel with
> 223b5e57d0d5 ("mm/execmem, arch: convert remaining overrides of
> module_alloc to execmem")
> could not load crash, but it did not work in the following two situations:
> 1: Kernel enables KASAN
> 2: The kernel set CONFIG_RANDOMIZE_BASE but not set CONFIG_RANDOMIZE_MEMORY
>
> crash: seek error: kernel virtual address: ffffffff826bb418 type:
> "page_offset_base"
>
> In both cases, kaslr_regions will not be exported in /proc/kallsyms,
> but kaslr_get_random_long will still be exported in /proc/kallsyms.
> So use kaslr_get_random_long instead of kaslr_degions
> Determine whether to enable kalsr
>
The kaslr_get_random_long() is not exported by EXPORT_SYMBOL(), not sure if
this could be optimized out(elimited) by the compiler, we have observed the
kaslr_regions symbol could be optimized out in some kernels.
But anyway, I do not have a better solution for the time being. So I agree
to this change: Ack.
BTW: The following functions in the kernel really misled me.
static inline bool kaslr_enabled(void)
{
return IS_ENABLED(CONFIG_RANDOMIZE_MEMORY) &&
!!(boot_params.hdr.loadflags & KASLR_FLAG);
}
/*
* Apply no randomization if KASLR was disabled at boot or if KASAN
* is enabled. KASAN shadow mappings rely on regions being PGD aligned.
*/
static inline bool kaslr_memory_enabled(void)
{
return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
}
Thanks
Lianbo
> Signed-off-by: Li XingYang <1127955419(a)qq.com>
> Signed-off-by: Zach Wade <zachwade.k(a)gmail.com>
> ---
> symbols.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/symbols.c b/symbols.c
> index 69a1fbb..02359a4 100644
> --- a/symbols.c
> +++ b/symbols.c
> @@ -619,7 +619,7 @@ strip_symbol_end(const char *name, char *buf)
> * or in /proc/kallsyms on a live system.
> *
> * Setting KASLR_CHECK will trigger a search for "module_load_offset"
> - * or "kaslr_regions" during the initial symbol sort operation, and
> + * or "kaslr_get_random_long" during the initial symbol sort operation,
> and
> * if found, will set (RELOC_AUTO|KASLR). On live systems, the search
> * is done here by checking /proc/kallsyms.
> */
> @@ -646,7 +646,7 @@ kaslr_init(void)
> st->_stext_vmlinux = UNINITIALIZED;
>
> if (ACTIVE() && /* Linux 3.15 */
> - ((symbol_value_from_proc_kallsyms("kaslr_regions") != BADVAL)
> ||
> + ((symbol_value_from_proc_kallsyms("kaslr_get_random_long") !=
> BADVAL) ||
> (symbol_value_from_proc_kallsyms("module_load_offset") !=
> BADVAL))) {
> kt->flags2 |= (RELOC_AUTO|KASLR);
> st->_stext_vmlinux = UNINITIALIZED;
> @@ -14253,8 +14253,8 @@ numeric_forward(const void *P_x, const void *P_y)
> st->_stext_vmlinux = valueof(y);
> }
> if (kt->flags2 & KASLR_CHECK) {
> - if (STREQ(x->name, "kaslr_regions") ||
> - STREQ(y->name, "kaslr_regions") ||
> + if (STREQ(x->name, "kaslr_get_random_long") ||
> + STREQ(y->name, "kaslr_get_random_long") ||
> STREQ(x->name, "module_load_offset") ||
> STREQ(y->name, "module_load_offset")) {
> kt->flags2 &= ~KASLR_CHECK;
> --
> 2.46.1
>
2 weeks
Revert "arm64: section_size_bits compatible with macro definitions"
by Guanyou Chen
Hi tao
crash load 5.12 before vmcore, parse section_size_bits equals 27,
but it should be equals 30. because defined "CONFIG_ARM64_4K_PAGES",
The commit "568c6f04" for compat android gki, so we should cleanup.
Exp:
Before:
crash vmcore vmlinux -d1
...
xtime timespec.tv_sec: 603549d0: Wed Feb 24 02:30:40 CST 2021
utsname:
sysname: Linux
nodename: localhost
release: 4.14.180-perf-g4483caa8ae80-dirty
version: #1 SMP PREEMPT Wed Feb 24 03:16:01 CST 2021
machine: aarch64
domainname: localdomain
...
SECTION_SIZE_BITS: 27
...
After:
crash vmcore vmlinux -d1
...
xtime timespec.tv_sec: 603549d0: Wed Feb 24 02:30:40 CST 2021
utsname:
sysname: Linux
nodename: localhost
release: 4.14.180-perf-g4483caa8ae80-dirty
version: #1 SMP PREEMPT Wed Feb 24 03:16:01 CST 2021
machine: aarch64
domainname: localdomain
...
SECTION_SIZE_BITS: 30
...
diff --git a/arm64.c b/arm64.c
index 06e7451..05ffdb3 100644
--- a/arm64.c
+++ b/arm64.c
@@ -1698,14 +1698,7 @@ arm64_get_section_size_bits(void)
if ((ret = get_kernel_config("CONFIG_MEMORY_HOTPLUG",
NULL)) == IKCONFIG_Y) {
if ((ret =
get_kernel_config("CONFIG_HOTPLUG_SIZE_BITS", &string)) == IKCONFIG_STR)
machdep->section_size_bits = atol(string);
- }
-
- /* arm64: reduce section size for sparsemem */
- if ((ret = get_kernel_config("CONFIG_ARM64_4K_PAGES",
NULL)) == IKCONFIG_Y
- || (ret =
get_kernel_config("CONFIG_ARM64_16K_PAGES", NULL)) == IKCONFIG_Y)
- machdep->section_size_bits =
_SECTION_SIZE_BITS_5_12;
- else if ((ret = get_kernel_config("CONFIG_ARM64_64K_PAGES",
NULL)) == IKCONFIG_Y)
- machdep->section_size_bits =
_SECTION_SIZE_BITS_5_12_64K;
+ }
}
if (CRASHDEBUG(1))
Thanks,
Guanyou.
2 weeks, 3 days
[PATCH v3] kmem: fix the determination for slab page
by qiwu.chen@transsion.com
The determination for a slab page has changed due to changing
PG_slab from a page flag to a page type since kernel commit
46df8e73a4a3.
Before apply this patch:
crash> kmem -s ffff000002aa4100
kmem: address is not allocated in slab subsystem: ffff000002aa4100
After apply this patch:
crash> kmem -s ffff000002aa4100
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff00000140f900 4096 94 126 18 32k task_struct
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffdffc00aa800 ffff000002aa0000 0 7 5 2
FREE / [ALLOCATED]
[ffff000002aa4100]
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
defs.h | 7 ++++++
memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++---------
symbols.c | 2 ++
3 files changed, 66 insertions(+), 11 deletions(-)
diff --git a/defs.h b/defs.h
index 2231cb6..e2a9278 100644
--- a/defs.h
+++ b/defs.h
@@ -2243,6 +2243,7 @@ struct offset_table { /* stash of commonly-used offsets */
long vmap_node_busy;
long rb_list_head;
long file_f_inode;
+ long page_page_type;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2651,6 +2652,7 @@ struct vm_table { /* kernel VM-related data */
ulong max_mem_section_nr;
ulong zero_paddr;
ulong huge_zero_paddr;
+ uint page_type_base;
};
#define NODES (0x1)
@@ -2684,6 +2686,11 @@ struct vm_table { /* kernel VM-related data */
#define SLAB_CPU_CACHE (0x10000000)
#define SLAB_ROOT_CACHES (0x20000000)
#define USE_VMAP_NODES (0x40000000)
+/*
+ * The SLAB_PAGEFLAGS flag is introduced to detect the change of
+ * PG_slab's type from a page flag to a page type.
+ */
+#define SLAB_PAGEFLAGS (0x80000000)
#define IS_FLATMEM() (vt->flags & FLATMEM)
#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
diff --git a/memory.c b/memory.c
index 967a9cf..8befe8c 100644
--- a/memory.c
+++ b/memory.c
@@ -351,6 +351,43 @@ static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
static ulong DISPLAY_DEFAULT;
+/*
+ * Before kernel commit ff202303c398e, the value is defined as a macro, so copy it here;
+ * After this commit, the value is defined as an enum, which can be evaluated at runtime.
+ */
+#define PAGE_TYPE_BASE 0xf0000000
+#define PageType(page_type, flag) \
+ ((page_type & (vt->page_type_base | flag)) == vt->page_type_base)
+
+static void page_type_init(void)
+{
+ if (!enumerator_value("PAGE_TYPE_BASE", (long *)&vt->page_type_base))
+ vt->page_type_base = PAGE_TYPE_BASE;
+}
+
+/*
+ * The PG_slab's type has changed from a page flag to a page type
+ * since kernel commit 46df8e73a4a3.
+ */
+static bool page_slab(ulong page, ulong flags)
+{
+ if (vt->flags & SLAB_PAGEFLAGS) {
+ if ((flags >> vt->PG_slab) & 1)
+ return TRUE;
+ }
+
+ if (VALID_MEMBER(page_page_type)) {
+ uint page_type;
+
+ readmem(page+OFFSET(page_page_type), KVADDR, &page_type,
+ sizeof(page_type), "page_type", FAULT_ON_ERROR);
+ if (PageType(page_type, (uint)vt->PG_slab))
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
/*
* Verify that the sizeof the primitive types are reasonable.
*/
@@ -504,6 +541,7 @@ vm_init(void)
ANON_MEMBER_OFFSET_INIT(page_compound_head, "page", "compound_head");
MEMBER_OFFSET_INIT(page_private, "page", "private");
MEMBER_OFFSET_INIT(page_freelist, "page", "freelist");
+ MEMBER_OFFSET_INIT(page_page_type, "page", "page_type");
MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd");
@@ -1278,6 +1316,8 @@ vm_init(void)
page_flags_init();
+ page_type_init();
+
rss_page_types_init();
vt->flags |= VM_INIT;
@@ -5931,7 +5971,7 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
if ((flags >> v22_PG_Slab) & 1)
slabs++;
} else if (vt->PG_slab) {
- if ((flags >> vt->PG_slab) & 1)
+ if (page_slab(pp, flags))
slabs++;
} else {
if ((flags >> v24_PG_slab) & 1)
@@ -6381,7 +6421,7 @@ dump_mem_map(struct meminfo *mi)
if ((flags >> v22_PG_Slab) & 1)
slabs++;
} else if (vt->PG_slab) {
- if ((flags >> vt->PG_slab) & 1)
+ if (page_slab(pp, flags))
slabs++;
} else {
if ((flags >> v24_PG_slab) & 1)
@@ -6694,7 +6734,6 @@ dump_hstates()
FREEBUF(hstate);
}
-
static void
page_flags_init(void)
{
@@ -6775,6 +6814,9 @@ page_flags_init_from_pageflag_names(void)
vt->pageflags_data[i].name = nameptr;
vt->pageflags_data[i].mask = mask;
+ if (!strncmp(nameptr, "slab", 4))
+ vt->flags |= SLAB_PAGEFLAGS;
+
if (CRASHDEBUG(1)) {
fprintf(fp, " %08lx %s\n",
vt->pageflags_data[i].mask,
@@ -6835,8 +6877,9 @@ page_flags_init_from_pageflags_enum(void)
}
strcpy(nameptr, arglist[0] + strlen("PG_"));
vt->pageflags_data[p].name = nameptr;
- vt->pageflags_data[p].mask = 1 << atoi(arglist[2]);
-
+ vt->pageflags_data[p].mask = 1 << atoi(arglist[2]);
+ if (!strncmp(nameptr, "slab", 4))
+ vt->flags |= SLAB_PAGEFLAGS;
p++;
}
} else
@@ -9736,14 +9779,14 @@ vaddr_to_kmem_cache(ulong vaddr, char *buf, int verbose)
readmem(page+OFFSET(page_flags), KVADDR,
&page_flags, sizeof(ulong), "page.flags",
FAULT_ON_ERROR);
- if (!(page_flags & (1 << vt->PG_slab))) {
+ if (!page_slab(page, page_flags)) {
if (((vt->flags & KMALLOC_SLUB) || VALID_MEMBER(page_compound_head)) ||
((vt->flags & KMALLOC_COMMON) &&
VALID_MEMBER(page_slab) && VALID_MEMBER(page_first_page))) {
readmem(compound_head(page)+OFFSET(page_flags), KVADDR,
&page_flags, sizeof(ulong), "page.flags",
FAULT_ON_ERROR);
- if (!(page_flags & (1 << vt->PG_slab)))
+ if (!page_slab(compound_head(page), page_flags))
return NULL;
} else
return NULL;
@@ -14108,6 +14151,8 @@ dump_vm_table(int verbose)
fprintf(fp, "%sNODELISTS_IS_PTR", others++ ? "|" : "");\
if (vt->flags & VM_INIT)
fprintf(fp, "%sVM_INIT", others++ ? "|" : "");\
+ if (vt->flags & SLAB_PAGEFLAGS)
+ fprintf(fp, "%sSLAB_PAGEFLAGS", others++ ? "|" : "");\
fprintf(fp, ")\n");
if (vt->kernel_pgd[0] == vt->kernel_pgd[1])
@@ -14237,6 +14282,7 @@ dump_vm_table(int verbose)
vt->pageflags_data[i].mask,
vt->pageflags_data[i].name);
}
+ fprintf(fp, " page_type_base: %x\n", vt->page_type_base);
dump_vma_cache(VERBOSE);
}
@@ -20195,7 +20241,7 @@ char *
is_slab_page(struct meminfo *si, char *buf)
{
int i, cnt;
- ulong page_slab, page_flags, name;
+ ulong pg_slab, page_flags, name;
ulong *cache_list;
char *retval;
@@ -20210,11 +20256,11 @@ is_slab_page(struct meminfo *si, char *buf)
RETURN_ON_ERROR|QUIET))
return NULL;
- if (!(page_flags & (1 << vt->PG_slab)))
+ if (!page_slab(si->spec_addr, page_flags))
return NULL;
if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
- &page_slab, sizeof(ulong), "page.slab",
+ &pg_slab, sizeof(ulong), "page.slab",
RETURN_ON_ERROR|QUIET))
return NULL;
@@ -20222,7 +20268,7 @@ is_slab_page(struct meminfo *si, char *buf)
cnt = get_kmem_cache_list(&cache_list);
for (i = 0; i < cnt; i++) {
- if (page_slab == cache_list[i]) {
+ if (pg_slab == cache_list[i]) {
if (!readmem(cache_list[i] + OFFSET(kmem_cache_name),
KVADDR, &name, sizeof(char *),
"kmem_cache.name", QUIET|RETURN_ON_ERROR))
diff --git a/symbols.c b/symbols.c
index 69a1fbb..014cd29 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10339,6 +10339,8 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " page_compound_head: %ld\n",
OFFSET(page_compound_head));
fprintf(fp, " page_private: %ld\n", OFFSET(page_private));
+ fprintf(fp, " page_page_type: %ld\n",
+ OFFSET(page_page_type));
fprintf(fp, " trace_print_flags_mask: %ld\n",
OFFSET(trace_print_flags_mask));
--
2.25.1
2 weeks, 4 days