On Fri, Dec 16, 2022 at 8:00 PM <crash-utility-request@redhat.com> wrote:
Date: Fri, 16 Dec 2022 05:03:46 +0000
From: HAGIO KAZUHITO(?????)  <k-hagio-ab@nec.com>
To: "crash-utility@redhat.com" <crash-utility@redhat.com>,
        "lijiang@redhat.com" <lijiang@redhat.com>
Subject: [Crash-utility] [PATCH] SLUB: Fix for offset change of struct
        slab members on Linux 6.2-rc1
Message-ID: <1671167016-29225-1-git-send-email-k-hagio-ab@nec.com>
Content-Type: text/plain; charset="iso-2022-jp"

From: Kazuhito Hagio <k-hagio-ab@nec.com>

The following kernel commits split slab info from struct page into
struct slab in Linux 5.17.

  d122019bf061 ("mm: Split slab into its own type")
  07f910f9b729 ("mm: Remove slab from struct page")

Crash commit 5f390ed811b0 followed the change for SLUB, but crash still
uses the offset of page.lru inappropriately.  It could happen to work
well because it was the same value as the offset of slab.slab_list until
Linux 6.1.

However, kernel commit 130d4df57390 ("mm/sl[au]b: rearrange struct slab
fields to allow larger rcu_head") in Linux 6.2-rc1 changed the offset of
slab.slab_list.  As a result, without the patch, "kmem -s|-S" options
print the following errors and fail to print values correctly for
kernels configured with CONFIG_SLUB.

  crash> kmem -S filp
  CACHE             OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE  NAME
  kmem: filp: partial list slab: ffffcc650405ab88 invalid page.inuse: -1
  ffff8fa0401eca00      232       1267      1792     56     8k  filp
  ...
  KMEM_CACHE_NODE   NODE  SLABS  PARTIAL  PER-CPU
  ffff8fa0401cb8c0     0     56       24        8
  NODE 0 PARTIAL:
    SLAB              MEMORY            NODE  TOTAL  ALLOCATED  FREE
  kmem: filp: invalid partial list slab pointer: ffffcc650405ab88


Thank you for the fix, Kazu.
 
After applying the patch, I got another error based on the latest kernel commit 9d2f6060fe4c3b49d0cdc1dce1c99296f33379c8:

 crash> kmem -S filp
CACHE             OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE  NAME
ffff9d80c030a100      232       1125      3936    123     8k  filp
CPU 0 KMEM_CACHE_CPU:
  ffff9d81e7a38470
CPU 0 SLAB:
  SLAB              MEMORY            NODE  TOTAL  ALLOCATED  FREE
  fffff0f3841a7700  ffff9d80c69dc000     0     32         10    22
  FREE / [ALLOCATED]
kmem: filp: slab: fffff0f3841a7700 invalid freepointer: 4404c55079ecb7fe
CPU 1 KMEM_CACHE_CPU:
  ffff9d81e7a78470
CPU 1 SLAB:
  SLAB              MEMORY            NODE  TOTAL  ALLOCATED  FREE
  fffff0f38446a600  ffff9d80d1a98000     0     32          0    32
  FREE / [ALLOCATED]
...

And this issue can not always be reproduced, I have tested it more than ten times, the above error can be observed on my side, maybe one or two times.

But anyway, I'm curious if this is another issue. Could you please also double check it?

Thanks.
Lianbo

Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
---
 defs.h    |  1 +
 memory.c  | 16 ++++++++++------
 symbols.c |  1 +
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/defs.h b/defs.h
index 04476b3ff62e..57c1acc4e8df 100644
--- a/defs.h
+++ b/defs.h
@@ -2182,6 +2182,7 @@ struct offset_table {                    /* stash of commonly-used offsets */
        long blk_mq_tags_rqs;
        long request_queue_hctx_table;
        long percpu_counter_counters;
+       long slab_slab_list;
 };

 struct size_table {         /* stash of commonly-used sizes */
diff --git a/memory.c b/memory.c
index 9d003713534b..d05737cc1429 100644
--- a/memory.c
+++ b/memory.c
@@ -781,6 +781,8 @@ vm_init(void)
                if (INVALID_MEMBER(page_slab))
                        MEMBER_OFFSET_INIT(page_slab, "slab", "slab_cache");

+               MEMBER_OFFSET_INIT(slab_slab_list, "slab", "slab_list");
+
                MEMBER_OFFSET_INIT(page_slab_page, "page", "slab_page");
                if (INVALID_MEMBER(page_slab_page))
                        ANON_MEMBER_OFFSET_INIT(page_slab_page, "page", "slab_page");
@@ -19474,6 +19476,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
 {
        ulong next, last, list_head, flags;
        int first;
+       long list_off = VALID_MEMBER(slab_slab_list) ? OFFSET(slab_slab_list) : OFFSET(page_lru);

        if (!node_ptr)
                return;
@@ -19487,7 +19490,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
                next == list_head ? "  (empty)\n" : "");
        first = 0;
         while (next != list_head) {
-               si->slab = last = next - OFFSET(page_lru);
+               si->slab = last = next - list_off;
                if (first++ == 0)
                        fprintf(fp, "  %s", slab_hdr);

@@ -19510,7 +19513,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)

                if (!IS_KVADDR(next) ||
                    ((next != list_head) &&
-                    !is_page_ptr(next - OFFSET(page_lru), NULL))) {
+                    !is_page_ptr(next - list_off, NULL))) {
                        error(INFO,
                            "%s: partial list slab: %lx invalid page.lru.next: %lx\n",
                                si->curname, last, next);
@@ -19537,7 +19540,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
                next == list_head ? "  (empty)\n" : "");
        first = 0;
         while (next != list_head) {
-               si->slab = next - OFFSET(page_lru);
+               si->slab = next - list_off;
                if (first++ == 0)
                        fprintf(fp, "  %s", slab_hdr);

@@ -19754,6 +19757,7 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
        short inuse, objects;
        ulong total_inuse;
        ulong count = 0;
+       long list_off = VALID_MEMBER(slab_slab_list) ? OFFSET(slab_slab_list) : OFFSET(page_lru);

        count = 0;
        total_inuse = 0;
@@ -19765,12 +19769,12 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
        hq_open();

        while (next != list_head) {
-               if (!readmem(next - OFFSET(page_lru) + OFFSET(page_inuse),
+               if (!readmem(next - list_off + OFFSET(page_inuse),
                    KVADDR, &inuse, sizeof(ushort), "page.inuse", RETURN_ON_ERROR)) {
                        hq_close();
                        return -1;
                }
-               last = next - OFFSET(page_lru);
+               last = next - list_off;

                if (inuse == -1) {
                        error(INFO,
@@ -19796,7 +19800,7 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
                }
                if (!IS_KVADDR(next) ||
                    ((next != list_head) &&
-                    !is_page_ptr(next - OFFSET(page_lru), NULL))) {
+                    !is_page_ptr(next - list_off, NULL))) {
                        error(INFO, "%s: partial list slab: %lx invalid page.lru.next: %lx\n",
                                si->curname, last, next);
                        break;
diff --git a/symbols.c b/symbols.c
index e279cfa68490..66158dcf1744 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9700,6 +9700,7 @@ dump_offset_table(char *spec, ulong makestruct)
                 OFFSET(slab_inuse));
         fprintf(fp, "                     slab_free: %ld\n",
                 OFFSET(slab_free));
+        fprintf(fp, "                slab_slab_list: %ld\n", OFFSET(slab_slab_list));

         fprintf(fp, "               kmem_cache_size: %ld\n",
                 OFFSET(kmem_cache_size));
--
2.31.1