On 2023/02/06 16:01, Aureau, Georges (Kernel Tools ERT) wrote:
Hello,
I've submitted
https://github.com/crash-utility/crash/issues/130 describing this
"kmem -s/-S" problem.
For CONFIG_SLAB_FREELIST_HARDENED, the crash memory.c:freelist_ptr() code is checking for
an additional bswap using a simple release test eg. THIS_KERNEL_VERSION >=
LINUX(5,7,0), basically checking for RHEL 9 and beyond.
However, for RHEL 8.6/8.7, we have CONFIG_SLAB_FREELIST_HARDENED=y, and we also have the
additional bswap, but the current crash memory.c:freelist_ptr() code is not handling this
case, hence kmem -s/-S will not not work properly for RHEL 8.6/8.7, and free objects will
not be counted nor reported properly.
Thank you for the patch!
I see RHEL8 crash has a patch additionally for this issue, but
it's better also for upstream crash to handle it.
An example from a RHEL 8.6 x86_64 kdump, a kmem cache with a single slab having 42
objects, only the freelist head is seen as free as crash can't walk freelist next
pointers, and crash is wrongly reporting 41 allocated objects:
crash> sys | grep RELEASE
RELEASE: 4.18.0-372.9.1.el8.x86_64
crash> kmem -s nfs_commit_data
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff9ad40c7cb2c0 728 41 42 1 32k nfs_commit_data
When properly accounting for the additional bswap, we can walk the freelist and find 38
free objects, and crash is now reporting only 4 allocated objects:
crash> kmem -s nfs_commit_data
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff9ad40c7cb2c0 728 4 42 1 32k nfs_commit_data
See also pull request
https://github.com/crash-utility/crash/pull/131 and diff/patch
below.
-- georges(a)hpe.com (hpux/linux kernel tools)
diff --git a/defs.h b/defs.h
index 33a823b..5d01f69 100644
--- a/defs.h
+++ b/defs.h
@@ -731,6 +731,7 @@ struct kernel_table { /* kernel data */
#define ONLINE_MAP (ONLINE)
#define ACTIVE_MAP (0x10)
int BUG_bytes;
+ int freelist_ptr_has_bswap;
I would like to suggest adding a flag to vm_table.flag because it's
0 or 1 and related to slab, e.g.
#define FREELIST_PTR_BSWAP (0x40000000)
and showing this with "help -v".
ulong xen_flags;
#define WRITABLE_PAGE_TABLES (0x1)
#define SHADOW_PAGE_TABLES (0x2)
diff --git a/kernel.c b/kernel.c
index a42e6ad..6658370 100644
--- a/kernel.c
+++ b/kernel.c
@@ -81,6 +81,7 @@ static void read_in_kernel_config_err(int, char *);
static void BUG_bytes_init(void);
static int BUG_x86(void);
static int BUG_x86_64(void);
+static void freelist_ptr_init();
Please add "void", otherwise:
$ make clean ; make warn
...
kernel.c:84:1: warning: function declaration isn't a prototype [-Wstrict-prototypes]
static void freelist_ptr_init();
^~~~~~
kernel.c:2402:1: warning: 'freelist_ptr_init' was used with no prototype before
its definition [-Wmissing-prototypes]
freelist_ptr_init(void)
^~~~~~~~~~~~~~~~~
static void cpu_maps_init(void);
static void get_xtime(struct timespec *);
static char *log_from_idx(uint32_t, char *);
@@ -766,6 +767,7 @@ kernel_init()
STRUCT_SIZE_INIT(mem_section, "mem_section");
BUG_bytes_init();
+ freelist_ptr_init();
I think it's better to be in KMALLOC_SLUB block in vm_init().
/*
* for hrtimer
@@ -2352,6 +2354,59 @@ BUG_x86_64(void)
}
+/*
+ * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's,
+ * and for recent release with an additionnal bswap. Some releases prio to 5.7.0
+ * may be using the additionnal bswap. The only easy and reliable way to tell is
+ * to inspect assembly code (eg. "__slab_free") for a bswap instruction.
Nice way :)
Thanks,
Kazu
> + */
> +static int
> +freelist_ptr_has_bswap_x86(void)
> +{
> + struct syment *sp, *spn;
> + char buf1[BUFSIZE];
> + char buf2[BUFSIZE];
> + char *arglist[MAXARGS];
> + ulong vaddr;
> + int found;
> +
> + if (!(sp = symbol_search("__slab_free")) ||
> + !(spn = next_symbol(NULL, sp)))
> + return FALSE;
> +
> + sprintf(buf1, "x/%ldi 0x%lx", spn->value - sp->value,
sp->value);
> +
> + found = FALSE;
> + vaddr = 0;
> + open_tmpfile();
> + gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR);
> + rewind(pc->tmpfile);
> + while (fgets(buf2, BUFSIZE, pc->tmpfile)) {
> + if (parse_line(buf2, arglist) < 3)
> + continue;
> +
> + if ((vaddr = htol(strip_ending_char(arglist[0], ':'),
> + RETURN_ON_ERROR|QUIET, NULL)) >= spn->value)
> + continue;
> +
> + if (STREQ(arglist[2], "bswap")) {
> + found = TRUE;
> + break;
> + }
> + }
> + close_tmpfile();
> + return found;
> +}
> +
> +static void
> +freelist_ptr_init(void)
> +{
> + if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
> + kt->freelist_ptr_has_bswap = TRUE;
> + else if (machine_type("X86") || machine_type("X86_64"))
> + kt->freelist_ptr_has_bswap = freelist_ptr_has_bswap_x86();
> +}
> +
> /*
> * Callback from gdb disassembly code.
> */
> diff --git a/memory.c b/memory.c
> index 5141fbe..c835614 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -19525,7 +19525,7 @@ freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
> if (VALID_MEMBER(kmem_cache_random)) {
> /* CONFIG_SLAB_FREELIST_HARDENED */
>
> - if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
> + if (kt->freelist_ptr_has_bswap)
> ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
> : bswap_32(ptr_addr);
> return (ptr ^ si->random ^ ptr_addr);
>
> --
> Crash-utility mailing list
> Crash-utility(a)redhat.com
>
https://listman.redhat.com/mailman/listinfo/crash-utility
> Contribution Guidelines:
https://github.com/crash-utility/crash/wiki