Hello,
I've submitted
https://github.com/crash-utility/crash/issues/130 describing this
"kmem -s/-S" problem.
For CONFIG_SLAB_FREELIST_HARDENED, the crash memory.c:freelist_ptr() code is checking for
an additional bswap using a simple release test eg. THIS_KERNEL_VERSION >=
LINUX(5,7,0), basically checking for RHEL 9 and beyond.
However, for RHEL 8.6/8.7, we have CONFIG_SLAB_FREELIST_HARDENED=y, and we also have the
additional bswap, but the current crash memory.c:freelist_ptr() code is not handling this
case, hence kmem -s/-S will not not work properly for RHEL 8.6/8.7, and free objects will
not be counted nor reported properly.
An example from a RHEL 8.6 x86_64 kdump, a kmem cache with a single slab having 42
objects, only the freelist head is seen as free as crash can't walk freelist next
pointers, and crash is wrongly reporting 41 allocated objects:
crash> sys | grep RELEASE
RELEASE: 4.18.0-372.9.1.el8.x86_64
crash> kmem -s nfs_commit_data
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff9ad40c7cb2c0 728 41 42 1 32k nfs_commit_data
When properly accounting for the additional bswap, we can walk the freelist and find 38
free objects, and crash is now reporting only 4 allocated objects:
crash> kmem -s nfs_commit_data
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff9ad40c7cb2c0 728 4 42 1 32k nfs_commit_data
See also pull request
https://github.com/crash-utility/crash/pull/131 and diff/patch
below.
-- georges(a)hpe.com (hpux/linux kernel tools)
diff --git a/defs.h b/defs.h
index 33a823b..5d01f69 100644
--- a/defs.h
+++ b/defs.h
@@ -731,6 +731,7 @@ struct kernel_table { /* kernel data */
#define ONLINE_MAP (ONLINE)
#define ACTIVE_MAP (0x10)
int BUG_bytes;
+ int freelist_ptr_has_bswap;
ulong xen_flags;
#define WRITABLE_PAGE_TABLES (0x1)
#define SHADOW_PAGE_TABLES (0x2)
diff --git a/kernel.c b/kernel.c
index a42e6ad..6658370 100644
--- a/kernel.c
+++ b/kernel.c
@@ -81,6 +81,7 @@ static void read_in_kernel_config_err(int, char *);
static void BUG_bytes_init(void);
static int BUG_x86(void);
static int BUG_x86_64(void);
+static void freelist_ptr_init();
static void cpu_maps_init(void);
static void get_xtime(struct timespec *);
static char *log_from_idx(uint32_t, char *);
@@ -766,6 +767,7 @@ kernel_init()
STRUCT_SIZE_INIT(mem_section, "mem_section");
BUG_bytes_init();
+ freelist_ptr_init();
/*
* for hrtimer
@@ -2352,6 +2354,59 @@ BUG_x86_64(void)
}
+/*
+ * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's,
+ * and for recent release with an additionnal bswap. Some releases prio to 5.7.0
+ * may be using the additionnal bswap. The only easy and reliable way to tell is
+ * to inspect assembly code (eg. "__slab_free") for a bswap instruction.
+ */
+static int
+freelist_ptr_has_bswap_x86(void)
+{
+ struct syment *sp, *spn;
+ char buf1[BUFSIZE];
+ char buf2[BUFSIZE];
+ char *arglist[MAXARGS];
+ ulong vaddr;
+ int found;
+
+ if (!(sp = symbol_search("__slab_free")) ||
+ !(spn = next_symbol(NULL, sp)))
+ return FALSE;
+
+ sprintf(buf1, "x/%ldi 0x%lx", spn->value - sp->value,
sp->value);
+
+ found = FALSE;
+ vaddr = 0;
+ open_tmpfile();
+ gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR);
+ rewind(pc->tmpfile);
+ while (fgets(buf2, BUFSIZE, pc->tmpfile)) {
+ if (parse_line(buf2, arglist) < 3)
+ continue;
+
+ if ((vaddr = htol(strip_ending_char(arglist[0], ':'),
+ RETURN_ON_ERROR|QUIET, NULL)) >= spn->value)
+ continue;
+
+ if (STREQ(arglist[2], "bswap")) {
+ found = TRUE;
+ break;
+ }
+ }
+ close_tmpfile();
+ return found;
+}
+
+static void
+freelist_ptr_init(void)
+{
+ if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
+ kt->freelist_ptr_has_bswap = TRUE;
+ else if (machine_type("X86") || machine_type("X86_64"))
+ kt->freelist_ptr_has_bswap = freelist_ptr_has_bswap_x86();
+}
+
/*
* Callback from gdb disassembly code.
*/
diff --git a/memory.c b/memory.c
index 5141fbe..c835614 100644
--- a/memory.c
+++ b/memory.c
@@ -19525,7 +19525,7 @@ freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
if (VALID_MEMBER(kmem_cache_random)) {
/* CONFIG_SLAB_FREELIST_HARDENED */
- if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
+ if (kt->freelist_ptr_has_bswap)
ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
: bswap_32(ptr_addr);
return (ptr ^ si->random ^ ptr_addr);