[PATCH v2 2/2] kmem: introduce -t flag to get slab debug trace
by qiwu.chen@transsion.com
Introduce -t flag for kmem command to get slab debug trace.
Here is the user help manual:
1. Dump slab debug trace when used "-st" with an allocated slab object address:
crash> kmem -st ffff000007e79d00
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff000001c0ed00 3392 93 104 13 32k task_struct
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffc00001f9e00 ffff000007e78000 0 8 6 2
FREE / [ALLOCATED]
[ffff000007e79d00]
object ffff000007e79d00 allocated in alloc_task_struct_node+36 when=4294915270 cpu=2 pid=415
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
__do_sys_clone+152
__se_sys_clone+60
__arm64_sys_clone+88
__invoke_syscall+36
invoke_syscall+284
el0_svc_common+248
do_el0_svc+56
el0_svc+248
el0t_64_sync_handler+92
el0t_64_sync+344
object ffff000007e79d00 freed in free_task_struct+32 when=4294911569 cpu=1 pid=0
kmem_cache_free+780
free_task_struct+32
free_task+164
__put_task_struct+328
put_task_struct+44
delayed_put_task_struct+64
rcu_do_batch+972
rcu_core+592
rcu_core_si+24
__softirqentry_text_start+388
do_softirq_own_stack+12
invoke_softirq+216
__irq_exit_rcu+164
irq_exit+20
handle_domain_irq+120
gic_handle_irq+312
2. Dump slab debug trace for each allocated object belongs to this slab
when used "-st" with an slab page address:
crash> kmem -st fffffc00001f9e00
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff000001c0ed00 3392 93 104 13 32k task_struct
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffc00001f9e00 ffff000007e78000 0 8 6 2
FREE / [ALLOCATED]
[ffff000007e78000]
object ffff000007e78000 allocated in alloc_task_struct_node+36 when=4294911106 cpu=3 pid=1
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
__do_sys_clone+152
__se_sys_clone+60
__arm64_sys_clone+88
__invoke_syscall+36
invoke_syscall+284
el0_svc_common+248
do_el0_svc+56
el0_svc+248
el0t_64_sync_handler+92
el0t_64_sync+344
object ffff000007e78000 freed in free_task_struct+32 when=4294911104 cpu=1 pid=0
kmem_cache_free+780
free_task_struct+32
free_task+164
__put_task_struct+328
put_task_struct+44
delayed_put_task_struct+64
rcu_do_batch+972
rcu_core+592
rcu_core_si+24
__softirqentry_text_start+388
do_softirq_own_stack+12
invoke_softirq+216
__irq_exit_rcu+164
irq_exit+20
handle_domain_irq+120
gic_handle_irq+312
3. Dump slab debug trace for each allocated object belongs to slab cache
when used "-S -t" with a slab cache address.
crash> kmem -S -t ffff000001c0ed00
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff000001c0ed00 3392 93 104 13 32k task_struct
CPU 0 KMEM_CACHE_CPU:
ffff00003fd6b7a0
CPU 0 SLAB:
(empty)
CPU 0 PARTIAL:
(empty)
CPU 1 KMEM_CACHE_CPU:
ffff00003fd8a7a0
CPU 1 SLAB:
(empty)
CPU 1 PARTIAL:
(empty)
CPU 2 KMEM_CACHE_CPU:
ffff00003fda97a0
CPU 2 SLAB:
(empty)
CPU 2 PARTIAL:
(empty)
CPU 3 KMEM_CACHE_CPU:
ffff00003fdc87a0
CPU 3 SLAB:
(empty)
CPU 3 PARTIAL:
(empty)
KMEM_CACHE_NODE NODE SLABS PARTIAL PER-CPU
ffff000001eeb200 0 13 5 0
NODE 0 PARTIAL:
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffc00000e5e00 ffff000003978000 0 8 5 3
fffffc00000e5e00 ffff000003978000 0 8 5 3
FREE / [ALLOCATED]
[ffff000003978000]
object ffff000003978000 allocated in alloc_task_struct_node+36 when=4294914449 cpu=1 pid=1
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
__do_sys_clone+152
__se_sys_clone+60
__arm64_sys_clone+88
__invoke_syscall+36
invoke_syscall+284
el0_svc_common+248
do_el0_svc+56
el0_svc+248
el0t_64_sync_handler+92
el0t_64_sync+344
With this patch, the slab allocation/free times can be sorted by a script,
which will be helpful to inspect slab memory leak.
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
defs.h | 7 ++++
help.c | 4 ++-
memory.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 108 insertions(+), 4 deletions(-)
diff --git a/defs.h b/defs.h
index 3d729c8..a46c702 100644
--- a/defs.h
+++ b/defs.h
@@ -2283,6 +2283,12 @@ struct offset_table { /* stash of commonly-used offsets */
long page_owner_handle;
long page_owner_free_handle;
long mem_section_page_ext;
+ long track_addr;
+ long track_addrs;
+ long track_pid;
+ long track_cpu;
+ long track_when;
+ long track_handle;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2462,6 +2468,7 @@ struct size_table { /* stash of commonly-used sizes */
long page_ext;
long page_owner;
long stack_record;
+ long track;
};
struct array_table {
diff --git a/help.c b/help.c
index f8ec62f..81c70af 100644
--- a/help.c
+++ b/help.c
@@ -6816,7 +6816,7 @@ char *help_kmem[] = {
"kmem",
"kernel memory",
"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n"
-" [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
+" [[-s|-S|-S=cpu[s]|-r|-t] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
" This command displays information about the use of kernel memory.\n",
" -f displays the contents of the system free memory headers.",
" also verifies that the page count equals nr_free_pages.",
@@ -6894,6 +6894,8 @@ char *help_kmem[] = {
" address when used with -s or -S, searches the kmalloc() slab subsystem",
" for the slab containing of this virtual address, showing whether",
" it is in use or free.",
+" when added extra -t, displays the slab debug trace for the allocated",
+" object belongs to this slab",
" address when used with -f, the address can be either a page pointer,",
" a physical address, or a kernel virtual address; the free_area",
" header containing the page (if any) is displayed.",
diff --git a/memory.c b/memory.c
index 6c69b6a..3c4766b 100644
--- a/memory.c
+++ b/memory.c
@@ -865,6 +865,15 @@ vm_init(void)
"kmem_cache_node", "partial");
MEMBER_OFFSET_INIT(kmem_cache_node_full,
"kmem_cache_node", "full");
+ STRUCT_SIZE_INIT(track, "track");
+ MEMBER_OFFSET_INIT(track_addr, "track", "addr");
+ if (MEMBER_EXISTS("track", "addrs"))
+ MEMBER_OFFSET_INIT(track_addrs, "track", "addrs");
+ if (MEMBER_EXISTS("track", "handle"))
+ MEMBER_OFFSET_INIT(track_handle, "track", "handle");
+ MEMBER_OFFSET_INIT(track_when, "track", "when");
+ MEMBER_OFFSET_INIT(track_cpu, "track", "cpu");
+ MEMBER_OFFSET_INIT(track_pid, "track", "pid");
} else {
MEMBER_OFFSET_INIT(kmem_cache_s_c_nextp,
"kmem_cache_s", "c_nextp");
@@ -5047,6 +5056,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
#define SLAB_GATHER_FAILURE (ADDRESS_SPECIFIED << 26)
#define GET_SLAB_ROOT_CACHES (ADDRESS_SPECIFIED << 27)
#define GET_PAGE_OWNER (ADDRESS_SPECIFIED << 28)
+#define GET_SLAB_DEBUG_TRACE (ADDRESS_SPECIFIED << 29)
#define GET_ALL \
(GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
@@ -5309,6 +5319,8 @@ cmd_kmem(void)
meminfo.reqname = p1;
meminfo.cache = value[i];
meminfo.flags |= CACHE_SET;
+ if (tflag)
+ meminfo.flags |= GET_SLAB_DEBUG_TRACE;
if ((i+1) == spec_addr) { /* done? */
if (meminfo.calls++)
fprintf(fp, "\n");
@@ -5318,6 +5330,8 @@ cmd_kmem(void)
} else {
meminfo.spec_addr = value[i];
meminfo.flags = ADDRESS_SPECIFIED;
+ if (tflag)
+ meminfo.flags |= GET_SLAB_DEBUG_TRACE;
if (Sflag && (vt->flags & KMALLOC_SLUB))
meminfo.flags |= VERBOSE;
if (meminfo.calls++)
@@ -20015,6 +20029,85 @@ do_kmem_cache_slub(struct meminfo *si)
FREEBUF(per_cpu);
}
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline uint get_info_end(struct meminfo *si)
+{
+ uint inuse = UINT(si->cache_buf + OFFSET(kmem_cache_inuse));
+ uint offset = UINT(si->cache_buf + OFFSET(kmem_cache_offset));
+
+ if (offset >= inuse)
+ return inuse + sizeof(void *);
+ else
+ return inuse;
+}
+
+#define TRACK_ADDRS_COUNT 16
+void print_track(struct meminfo *si, char *track, ulong object, enum track_item alloc)
+{
+ ulong track_addr, addr, addrs, when, entries, nr_entries;
+ uint i, cpu, pid, handle;
+ ulonglong jiffies;
+ char buf[BUFSIZE];
+
+ track_addr = object + get_info_end(si) + alloc * STRUCT_SIZE("track");
+ if (!readmem(track_addr, KVADDR, track, SIZE(track), "track", FAULT_ON_ERROR))
+ return;
+
+ addr = ULONG(track + OFFSET(track_addr));
+ if (addr) {
+ when = ULONG(track + OFFSET(track_when));
+ cpu = UINT(track + OFFSET(track_cpu));
+ pid = UINT(track + OFFSET(track_pid));
+ get_uptime(NULL, &jiffies);
+ fprintf(fp, "object %lx %s in %s when=%lu cpu=%u pid=%d\n",
+ object, alloc ? "freed" : "allocated",
+ value_to_symstr(addr, buf, 0),
+ when, cpu, pid);
+ if (VALID_MEMBER(track_addrs)) {
+ addrs = track_addr + OFFSET(track_addrs);
+ stack_trace_print(addrs, TRACK_ADDRS_COUNT);
+ } else if (VALID_MEMBER(track_handle)) {
+ handle = UINT(track + OFFSET(track_handle));
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ } else {
+ fprintf(fp, "stack trace missing\n");
+ handle = track_addr + OFFSET(track_handle);
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ }
+ }
+}
+
+#define SLAB_STORE_USER (0x00010000UL)
+static ulong get_slab_store_user_flag(void)
+{
+ ulong slab_store_user_flag;
+
+ if (enumerator_value("_SLAB_STORE_USER", &slab_store_user_flag))
+ return (1 << slab_store_user_flag);
+ else
+ return SLAB_STORE_USER;
+}
+
+static void slab_debug_trace_show(struct meminfo *si, ulong object)
+{
+ ulong flags;
+ char *track;
+
+ flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
+ if (!(flags & get_slab_store_user_flag()))
+ return;
+
+ track = (char *)GETBUF(SIZE(track));
+ print_track(si, track, object, TRACK_ALLOC);
+ print_track(si, track, object, TRACK_FREE);
+ FREEBUF(track);
+}
+
#define DUMP_SLAB_INFO_SLUB() \
{ \
char b1[BUFSIZE], b2[BUFSIZE]; \
@@ -20070,7 +20163,8 @@ do_slab_slub(struct meminfo *si, int verbose)
if (!verbose) {
DUMP_SLAB_INFO_SLUB();
- return TRUE;
+ if (!(si->flags & GET_SLAB_DEBUG_TRACE))
+ return TRUE;
}
cpu_freelist = 0;
@@ -20173,6 +20267,8 @@ do_slab_slub(struct meminfo *si, int verbose)
if (is_free && (cpu_slab >= 0))
fprintf(fp, "(cpu %d cache)", cpu_slab);
fprintf(fp, "\n");
+ if (!is_free && (si->flags & GET_SLAB_DEBUG_TRACE))
+ slab_debug_trace_show(si, p + red_left_pad);
}
return TRUE;
@@ -20283,11 +20379,10 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
}
-#define SLAB_STORE_USER (0x00010000UL)
flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
if (INVALID_MEMBER(kmem_cache_node_full) ||
- !(flags & SLAB_STORE_USER)) {
+ !(flags & get_slab_store_user_flag())) {
fprintf(fp, "NODE %d FULL:\n (not tracked)\n", node);
return;
}
--
2.25.1
2 months, 2 weeks
[PATCH v2 1/2] kmem: introduce -t flag to get page owner
by qiwu.chen@transsion.com
Introduce -t flag for kmem command to get page owner.
Here is the user help manual:
1. Dump page_owner allocated stack trace for each allocated page in
buddy system when used with "kmem -pt":
crash> kmem -pt
Page allocated via order 0, mask 0x1112c4a, pid 1, ts 16155269152 ns
PFN 0x40000, type Movable, Flags 0xffff00000020836
set_page_owner+84
post_alloc_hook+308
prep_new_page+48
get_page_from_freelist+736
__alloc_pages+348
alloc_pages+280
__page_cache_alloc+120
page_cache_ra_unbounded+272
do_page_cache_ra+172
do_sync_mmap_readahead+492
filemap_fault+340
__do_fault+64
__handle_mm_fault+528
handle_mm_fault+208
__do_page_fault+232
do_page_fault+1264
......
2. Dump page_owner allocated/freed trace for an allocated page when used
"kmem -pt" with a page address.
crash> kmem -pt fffffc00001f9e40
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
fffffc00001f9e40 47e79000 dead000000000008 0 0 ffff00000000000
page_owner tracks the page 0xfffffc00001f9e40 as allocated
Page allocated via order 3, mask 0xd20c0, pid 163, ts 39197221904 ns
PFN 0x47e79, type Unmovable, Flags 0xffff00000000000
set_page_owner+84
post_alloc_hook+308
prep_new_page+48
get_page_from_freelist+736
__alloc_pages+348
alloc_pages+280
alloc_slab_page+60
allocate_slab+212
new_slab+200
___slab_alloc+1432
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
page last free ts 38730338480 ns, stack trace:
set_page_owner+84
post_alloc_hook+308
prep_new_page+48
get_page_from_freelist+736
__alloc_pages+348
alloc_pages+280
alloc_slab_page+60
allocate_slab+212
new_slab+200
___slab_alloc+1432
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
With this patch, the page allocation times can be sorted by page_owner_sort
tool easily.
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
defs.h | 43 ++++++
help.c | 4 +-
memory.c | 434 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 461 insertions(+), 20 deletions(-)
diff --git a/defs.h b/defs.h
index 2231cb6..3d729c8 100644
--- a/defs.h
+++ b/defs.h
@@ -206,6 +206,34 @@ static inline int string_exists(char *s) { return (s ? TRUE : FALSE); }
#undef roundup
#endif
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define rounddown(x, y) ((x) & ~__round_mask(x, y))
+#define IS_ALIGNED(x, y) (((x) & ((typeof(x))(y) - 1)) == 0)
+
+/* stack depot macros before kernel commit 8151c7a35d8bd */
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_NULL_PROTECTION_BITS 1
+#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGESHIFT() - STACK_ALLOC_ALIGN)
+#define DEPOT_STACK_BITS (sizeof(uint) * 8)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
+ STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
+
+/* stack depot macros since kernel commit 8151c7a35d8bd */
+#define STACK_DEPOT_EXTRA_BITS 5
+#define DEPOT_HANDLE_BITS (sizeof(uint) * 8)
+#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
+#define DEPOT_POOL_SIZE (1LL << (PAGESHIFT() + DEPOT_POOL_ORDER))
+#define DEPOT_STACK_ALIGN 4
+#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGESHIFT() - DEPOT_STACK_ALIGN)
+#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
+ STACK_DEPOT_EXTRA_BITS)
+
+/* GFP flags */
+#define __GFP_RECLAIMABLE 0x10u
+#define __GFP_MOVABLE 0x08u
+#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+#define GFP_MOVABLE_SHIFT 3
typedef uint64_t physaddr_t;
@@ -2243,6 +2271,18 @@ struct offset_table { /* stash of commonly-used offsets */
long vmap_node_busy;
long rb_list_head;
long file_f_inode;
+ long pglist_data_node_page_ext;
+ long stack_record_size;
+ long stack_record_entries;
+ long stack_record_count;
+ long page_owner_order;
+ long page_owner_gfp_mask;
+ long page_owner_ts_nsec;
+ long page_owner_free_ts_nsec;
+ long page_owner_pid;
+ long page_owner_handle;
+ long page_owner_free_handle;
+ long mem_section_page_ext;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2419,6 +2459,9 @@ struct size_table { /* stash of commonly-used sizes */
long module_memory;
long fred_frame;
long vmap_node;
+ long page_ext;
+ long page_owner;
+ long stack_record;
};
struct array_table {
diff --git a/help.c b/help.c
index e95ac1d..f8ec62f 100644
--- a/help.c
+++ b/help.c
@@ -6815,7 +6815,7 @@ NULL
char *help_kmem[] = {
"kmem",
"kernel memory",
-"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n"
+"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n"
" [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
" This command displays information about the use of kernel memory.\n",
" -f displays the contents of the system free memory headers.",
@@ -6845,6 +6845,7 @@ char *help_kmem[] = {
" mem_map[] array, made up of the page struct address, its associated",
" physical address, the page.mapping, page.index, page._count and",
" page.flags fields.",
+" -t displays page_owner allocated stack trace for each allocated page.",
" -m member similar to -p, but displays page structure contents specified by",
" a comma-separated list of one or more struct page members. The",
" \"flags\" member will always be expressed in hexadecimal format, and",
@@ -6899,6 +6900,7 @@ char *help_kmem[] = {
" address when used with -p, the address can be either a page pointer, a",
" physical address, or a kernel virtual address; its basic mem_map",
" page information is displayed.",
+" when added extra -t, display the page_owner traces for this page",
" address when used with -m, the address can be either a page pointer, a",
" physical address, or a kernel virtual address; the specified",
" members of the associated page struct are displayed.",
diff --git a/memory.c b/memory.c
index 967a9cf..6c69b6a 100644
--- a/memory.c
+++ b/memory.c
@@ -323,6 +323,11 @@ static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
static ulong freelist_ptr(struct meminfo *, ulong, ulong);
static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
+static void page_owner_init(void);
+static int page_owner_enabled(void);
+static void stack_depot_init(void);
+static void dump_page_owner(struct meminfo *, ulong, physaddr_t);
+enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_ALL };
/*
* Memory display modes specific to this file.
@@ -983,6 +988,8 @@ vm_init(void)
vt->flags |= DISCONTIGMEM;
sparse_mem_init();
+ page_owner_init();
+ stack_depot_init();
vt->vmalloc_start = machdep->vmalloc_start();
if (IS_VMALLOC_ADDR(vt->mem_map))
@@ -1099,6 +1106,8 @@ vm_init(void)
MEMBER_OFFSET_INIT(pglist_data_bdata, "pglist_data", "bdata");
MEMBER_OFFSET_INIT(pglist_data_nr_zones, "pglist_data",
"nr_zones");
+ MEMBER_OFFSET_INIT(pglist_data_node_page_ext, "pglist_data",
+ "node_page_ext");
MEMBER_OFFSET_INIT(pglist_data_node_start_pfn, "pglist_data",
"node_start_pfn");
MEMBER_OFFSET_INIT(pglist_data_pgdat_next, "pglist_data",
@@ -5037,6 +5046,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
#define SLAB_BITFIELD (ADDRESS_SPECIFIED << 25)
#define SLAB_GATHER_FAILURE (ADDRESS_SPECIFIED << 26)
#define GET_SLAB_ROOT_CACHES (ADDRESS_SPECIFIED << 27)
+#define GET_PAGE_OWNER (ADDRESS_SPECIFIED << 28)
#define GET_ALL \
(GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
@@ -5048,7 +5058,7 @@ cmd_kmem(void)
int c;
int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag;
int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag;
- int rflag;
+ int rflag, tflag;
struct meminfo meminfo;
ulonglong value[MAXARGS];
char buf[BUFSIZE];
@@ -5061,13 +5071,13 @@ cmd_kmem(void)
spec_addr = choose_cpu = 0;
sflag = Sflag = pflag = fflag = Fflag = Pflag = zflag = oflag = 0;
vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0;
- gflag = hflag = rflag = 0;
+ gflag = hflag = rflag = tflag = 0;
escape = FALSE;
BZERO(&meminfo, sizeof(struct meminfo));
BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
pc->curcmd_flags &= ~HEADER_PRINTED;
- while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh")) != EOF) {
+ while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoht")) != EOF) {
switch(c)
{
case 'V':
@@ -5204,6 +5214,10 @@ cmd_kmem(void)
gflag = 1;
break;
+ case 't':
+ tflag = 1;
+ break;
+
default:
argerrs++;
break;
@@ -5213,7 +5227,7 @@ cmd_kmem(void)
if (argerrs)
cmd_usage(pc->curcmd, SYNOPSIS);
- if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag +
+ if ((fflag + Fflag + Vflag + oflag +
vflag + Cflag + cflag + iflag + lflag + Lflag + gflag +
hflag + rflag) > 1) {
error(INFO, "only one flag allowed!\n");
@@ -5264,10 +5278,13 @@ cmd_kmem(void)
if (pflag) {
meminfo.spec_addr = value[i];
meminfo.flags = ADDRESS_SPECIFIED;
+ if (tflag)
+ meminfo.flags |= GET_PAGE_OWNER;
dump_mem_map(&meminfo);
pflag++;
}
+
if (sflag || Sflag) {
if (vt->flags & KMEM_CACHE_UNAVAIL)
error(FATAL,
@@ -5346,25 +5363,25 @@ cmd_kmem(void)
gflag++;
}
- /*
- * no value arguments allowed!
- */
- if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
+ /*
+ * no value arguments allowed!
+ */
+ if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
Vflag || oflag || hflag || rflag) {
error(INFO,
"no address arguments allowed with this option\n");
cmd_usage(pc->curcmd, SYNOPSIS);
}
- if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
- lflag + Lflag + gflag)) {
+ if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
+ lflag + Lflag + gflag + tflag)) {
meminfo.spec_addr = value[i];
- meminfo.flags = ADDRESS_SPECIFIED;
- if (meminfo.calls++)
- fprintf(fp, "\n");
+ meminfo.flags = ADDRESS_SPECIFIED;
+ if (meminfo.calls++)
+ fprintf(fp, "\n");
else
kmem_cache_init();
- kmem_search(&meminfo);
+ kmem_search(&meminfo);
}
}
@@ -5372,8 +5389,11 @@ cmd_kmem(void)
if (iflag == 1)
dump_kmeminfo();
- if (pflag == 1)
+ if (pflag == 1) {
+ if (tflag)
+ meminfo.flags = GET_PAGE_OWNER;
dump_mem_map(&meminfo);
+ }
if (fflag == 1)
vt->dump_free_pages(&meminfo);
@@ -5457,7 +5477,7 @@ cmd_kmem(void)
if (!(sflag + Sflag + pflag + fflag + Fflag + vflag +
Vflag + zflag + oflag + cflag + Cflag + iflag +
nflag + lflag + Lflag + gflag + hflag + rflag +
- meminfo.calls))
+ tflag + meminfo.calls))
cmd_usage(pc->curcmd, SYNOPSIS);
}
@@ -5749,7 +5769,8 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
switch (mi->flags)
{
- case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
switch (mi->memtype)
{
case KVADDR:
@@ -5774,6 +5795,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
print_hdr = TRUE;
break;
+ case GET_PAGE_OWNER:
+ print_hdr = FALSE;
+ break;
+
case GET_ALL:
shared = 0;
reserved = 0;
@@ -5959,6 +5984,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
shared++;
}
continue;
+
+ case GET_PAGE_OWNER:
+ dump_page_owner(mi, pp, phys);
+ continue;
}
page_mapping = VALID_MEMBER(page_mapping);
@@ -6083,6 +6112,7 @@ display_members:
if (done)
break;
+
}
if (done)
@@ -6119,7 +6149,10 @@ display_members:
break;
case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
mi->retval = done;
+ if (mi->flags & GET_PAGE_OWNER)
+ dump_page_owner(mi, pp, phys);
break;
}
@@ -6129,6 +6162,345 @@ display_members:
FREEBUF(page_cache);
}
+static int stack_depot_enabled(void)
+{
+ struct syment *sp;
+ int disable = TRUE;
+
+ if ((sp = symbol_search("stack_depot_disable")))
+ readmem(sp->value, KVADDR, &disable, sizeof(int),
+ "stack_depot_disable", RETURN_ON_ERROR);
+ else if ((sp = symbol_search("stack_depot_disabled")))
+ readmem(sp->value, KVADDR, &disable, sizeof(int),
+ "stack_depot_disabled", RETURN_ON_ERROR);
+ else if ((sp = symbol_search("stack_slabs")))
+ return sp->value ? FALSE : TRUE;
+
+ return !disable;
+}
+
+static void stack_depot_init(void)
+{
+ if (stack_depot_enabled()) {
+ STRUCT_SIZE_INIT(stack_record, "stack_record");
+ MEMBER_OFFSET_INIT(stack_record_size, "stack_record", "size");
+ MEMBER_OFFSET_INIT(stack_record_entries, "stack_record", "entries");
+ if (MEMBER_EXISTS("stack_record", "count"))
+ MEMBER_OFFSET_INIT(stack_record_count, "stack_record", "count");
+ }
+}
+
+/* Fetch stack entries from a depot. */
+static unsigned int stack_depot_fetch(uint handle, ulong *entries)
+{
+ struct syment *sp;
+ uint valid, offset, slabindex, poolindex, pools_num, stack_record_count;
+ uint stack_size = 0;
+ ulong stack_record_addr, sym_value;
+
+ if (!handle)
+ return 0;
+
+ if ((sp = symbol_search("stack_slabs"))) {
+ valid = (handle >> (STACK_ALLOC_INDEX_BITS + STACK_ALLOC_OFFSET_BITS))
+ & STACK_ALLOC_NULL_PROTECTION_BITS;
+ if (!valid)
+ return 0;
+
+ slabindex = handle & ((1 << STACK_ALLOC_INDEX_BITS) - 1);
+ readmem(sp->value + slabindex * sizeof(void *), KVADDR, &stack_record_addr,
+ sizeof(void *), "stack_record_addr", FAULT_ON_ERROR);
+
+ offset = (handle >> STACK_ALLOC_INDEX_BITS) &
+ ((1 << STACK_ALLOC_OFFSET_BITS) - 1);
+ stack_record_addr += (offset << STACK_ALLOC_ALIGN);
+ *entries = stack_record_addr + OFFSET(stack_record_entries);
+ readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size,
+ sizeof(stack_size), "stack_record_entries", FAULT_ON_ERROR);
+ } else if ((sp = symbol_search("stack_pools")) &&
+ (sym_value = symbol_value("pools_num"))) {
+ poolindex = handle & ((1 << DEPOT_POOL_INDEX_BITS) - 1);
+ readmem(sym_value, KVADDR, &pools_num, sizeof(int),
+ "pools_num", RETURN_ON_ERROR);
+ if (poolindex >= pools_num) {
+ error(INFO, "pool index %d out of bounds (%d) for stack id %08x\n",
+ poolindex, pools_num, handle);
+ return 0;
+ }
+
+ readmem(sp->value + (poolindex-1) * sizeof(void *), KVADDR, &stack_record_addr,
+ sizeof(void *), "stack_record_addr", FAULT_ON_ERROR);
+ if (!stack_record_addr)
+ return 0;
+
+ offset = (handle >> DEPOT_POOL_INDEX_BITS) & ((1 << DEPOT_OFFSET_BITS) - 1);
+ stack_record_addr += (offset << DEPOT_STACK_ALIGN);
+ readmem(stack_record_addr + OFFSET(stack_record_count), KVADDR, &stack_record_count,
+ sizeof(stack_record_count), "stack_record_count", FAULT_ON_ERROR);
+ if (!stack_record_count)
+ return 0;
+
+ *entries = stack_record_addr + OFFSET(stack_record_entries);
+ readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size,
+ sizeof(stack_size), "stack_record_entries", FAULT_ON_ERROR);
+ }
+
+ return stack_size;
+}
+
+static void stack_trace_print(ulong entries, uint nr_entries)
+{
+ int i;
+ struct syment *sp;
+ ulong value, offset;
+ char buf[BUFSIZE];
+
+ if (!nr_entries)
+ return;
+
+ for (i = 0; i < nr_entries; i++) {
+ if (!readmem(entries, KVADDR, &value, sizeof(value),
+ "stack_trace", FAULT_ON_ERROR))
+ break;
+
+ entries += sizeof(ulong);
+ sp = value_search(value, &offset);
+ if (!sp)
+ break;
+
+ fprintf(fp, "%s\n", value_to_symstr(sp->value+offset, buf, 0));
+ }
+ fprintf(fp, "\n");
+}
+
+static ulong gfp_migratetype(ulong gfp_flags)
+{
+ struct syment *sp;
+ int page_group_by_mobility_disabled;
+
+ if ((sp = symbol_search("page_group_by_mobility_disabled"))) {
+ readmem(sp->value, KVADDR, &page_group_by_mobility_disabled, sizeof(int),
+ "page_group_by_mobility_disabled", RETURN_ON_ERROR);
+ if (page_group_by_mobility_disabled) {
+ ulong migrate_unmovable;
+
+ enumerator_value("MIGRATE_UNMOVABLE", &migrate_unmovable);
+ return migrate_unmovable;
+ }
+ }
+
+ return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
+}
+
+static void migratetype_name(ulong migratetype, char *buf)
+{
+ struct syment *sp;
+ ulong migratetype_name_addr;
+
+ sp = symbol_search("migratetype_names");
+ if (!sp)
+ return;
+
+ readmem(sp->value + migratetype * sizeof(ulong), KVADDR, &migratetype_name_addr,
+ sizeof(ulong), "migratetype_name", RETURN_ON_ERROR);
+ read_string(migratetype_name_addr, buf, BUFSIZE-1);
+}
+
+static void print_page_owner(ulong pfn, ulong page, char *page_owner, enum track_item alloc)
+{
+ int i, pid;
+ ushort order;
+ uint handle, free_handle, gfp_mask, nr_entries;
+ u64 ts_nsec, free_ts_nsec;
+ ulong entries, offset, page_flags;
+ struct syment *sp;
+ char buf[BUFSIZE];
+
+ order = USHORT(page_owner + OFFSET(page_owner_order));
+ gfp_mask = UINT(page_owner + OFFSET(page_owner_gfp_mask));
+ handle = UINT(page_owner + OFFSET(page_owner_handle));
+ free_handle = UINT(page_owner + OFFSET(page_owner_free_handle));
+ ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_ts_nsec));
+ free_ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_free_ts_nsec));
+ pid = INT(page_owner + OFFSET(page_owner_pid));
+
+ if (handle && (alloc != TRACK_FREE)) {
+ fprintf(fp, "Page allocated via order %u, mask %#x, pid %d, ts %llu ns\n",
+ order, gfp_mask, pid, ts_nsec);
+ migratetype_name(gfp_migratetype(gfp_mask), buf);
+ readmem(page+OFFSET(page_flags), KVADDR, &page_flags, sizeof(ulong),
+ "page.flags", FAULT_ON_ERROR);
+ fprintf(fp, "PFN %#lx, type %s, Flags %#lx\n", pfn, buf, page_flags);
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ }
+
+ if (alloc != TRACK_ALLOC &&
+ (free_handle = UINT(page_owner + OFFSET(page_owner_free_handle)))) {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ fprintf(fp, "page last free ts %llu ns, stack trace:\n", free_ts_nsec);
+ stack_trace_print(entries, nr_entries);
+ }
+}
+
+/* Get the max order for zoned buddy allocator */
+static inline ulong get_max_order(void)
+{
+ char *string;
+
+ if ((kt->ikconfig_flags & IKCONFIG_AVAIL) &&
+ get_kernel_config("CONFIG_FORCE_MAX_ZONEORDER", &string) == IKCONFIG_STR)
+ return atol(string);
+
+ return 11;
+}
+
+#define MAX_ORDER get_max_order()
+#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+
+static int lookup_page_ext(ulong pfn, ulong pp, ulong *page_ext)
+{
+ int node;
+ ulong page_ext_size, section, section_nr, pgdat;
+ ulong node_page_ext, node_start_pfn, page_ext_idx;
+
+ if (!kernel_symbol_exists("page_ext_size"))
+ return FALSE;
+
+ readmem(symbol_value("page_ext_size"), KVADDR, &page_ext_size,
+ sizeof(page_ext_size), "page_ext_size", FAULT_ON_ERROR);
+
+ if (IS_SPARSEMEM()) {
+ section_nr = pfn_to_section_nr(pfn);
+ if (!(section = valid_section_nr(section_nr)))
+ return FALSE;
+
+ readmem(section + OFFSET(mem_section_page_ext), KVADDR, &node_page_ext,
+ sizeof(ulong), "mem_section page_ext", FAULT_ON_ERROR);
+ if (!node_page_ext)
+ return FALSE;
+
+ *page_ext = node_page_ext + pfn * page_ext_size;
+ return TRUE;
+ }
+
+ if ((node = page_to_nid(pp) >= 0)) {
+ pgdat = vt->node_table[node].pgdat;
+ if (!VALID_MEMBER(pglist_data_node_page_ext) ||
+ !VALID_MEMBER(pglist_data_node_start_pfn))
+ return FALSE;
+
+ readmem(pgdat + OFFSET(pglist_data_node_page_ext), KVADDR, &node_page_ext,
+ sizeof(ulong), "pglist node_page_ext", FAULT_ON_ERROR);
+ if (!node_page_ext)
+ return FALSE;
+
+ readmem(pgdat + OFFSET(pglist_data_node_start_pfn), KVADDR, &node_start_pfn,
+ sizeof(ulong), "pglist node_start_pfn", FAULT_ON_ERROR);
+ if (!node_start_pfn)
+ return FALSE;
+
+ page_ext_idx = pfn - rounddown(node_start_pfn, MAX_ORDER_NR_PAGES);
+ *page_ext = node_page_ext + pfn * page_ext_size;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static ulong get_page_owner(ulong page_ext)
+{
+ struct syment *sp;
+ ulong page_owner_ops_offset;
+
+ sp = symbol_search("page_owner_ops");
+ if (!sp)
+ return FALSE;
+
+ readmem(sp->value, KVADDR, &page_owner_ops_offset, sizeof(ulong),
+ "page_owner_ops_offset", RETURN_ON_ERROR);
+
+ return page_ext + page_owner_ops_offset;
+}
+
+static int page_owner_enabled(void)
+{
+ struct syment *sp;
+ int enabled;
+
+ if ((sp = symbol_search("page_owner_enabled")) &&
+ readmem(sp->value, KVADDR, &enabled, sizeof(int),
+ "page_owner_enabled", RETURN_ON_ERROR))
+ return enabled;
+
+ if ((sp = symbol_search("page_owner_inited")) &&
+ readmem(sp->value, KVADDR, &enabled, sizeof(int),
+ "page_owner_inited", RETURN_ON_ERROR))
+ return enabled;
+
+ return FALSE;
+}
+
+static void page_owner_init(void)
+{
+ if (page_owner_enabled()) {
+ STRUCT_SIZE_INIT(page_ext, "page_ext");
+ STRUCT_SIZE_INIT(page_owner, "page_owner");
+ MEMBER_OFFSET_INIT(mem_section_page_ext, "mem_section", "page_ext");
+ MEMBER_OFFSET_INIT(page_owner_handle, "page_owner", "handle");
+ MEMBER_OFFSET_INIT(page_owner_free_handle, "page_owner", "free_handle");
+ MEMBER_OFFSET_INIT(page_owner_order, "page_owner", "order");
+ MEMBER_OFFSET_INIT(page_owner_gfp_mask, "page_owner", "gfp_mask");
+ MEMBER_OFFSET_INIT(page_owner_ts_nsec, "page_owner", "ts_nsec");
+ MEMBER_OFFSET_INIT(page_owner_free_ts_nsec, "page_owner", "free_ts_nsec");
+ MEMBER_OFFSET_INIT(page_owner_pid, "page_owner", "pid");
+ }
+}
+
+static void dump_page_owner(struct meminfo *mi, ulong pp, physaddr_t phys)
+{
+ ulong pfn, page_ext_addr, page_owner_addr, page_ext;
+ long page_ext_owner, page_ext_owner_allocated;
+ char *page_owner;
+
+ pfn = BTOP(phys);
+ if (!lookup_page_ext(pfn, pp, &page_ext_addr))
+ return;
+
+ page_owner_addr = get_page_owner(page_ext_addr);
+ if (!page_owner_addr)
+ return;
+
+ page_owner = (char *)GETBUF(SIZE(page_owner));
+ readmem(page_owner_addr, KVADDR, page_owner, SIZE(page_owner),
+ "page_owner", FAULT_ON_ERROR);
+
+ enumerator_value("PAGE_EXT_OWNER", &page_ext_owner);
+ readmem(page_ext_addr, KVADDR, &page_ext, sizeof(ulong),
+ "page_ext", FAULT_ON_ERROR);
+ if (!(page_ext & (1 << page_ext_owner)))
+ goto exit;
+
+ enumerator_value("PAGE_EXT_OWNER_ALLOCATED", &page_ext_owner_allocated);
+ if (mi->flags == GET_PAGE_OWNER) {
+ if (!(page_ext & (1 << page_ext_owner_allocated)) ||
+ !IS_ALIGNED(pfn, 1 << USHORT(page_owner + OFFSET(page_owner_order))))
+ goto exit;
+
+ /* dump allocated page owner for current memory usage */
+ print_page_owner(pfn, pp, page_owner, TRACK_ALLOC);
+ } else {
+ if (page_ext & (1 << page_ext_owner_allocated))
+ fprintf(fp, "page_owner tracks the page 0x%lx as allocated\n", pp);
+ else
+ fprintf(fp, "page_owner tracks the page 0x%lx as freed\n", pp);
+ print_page_owner(pfn, pp, page_owner, TRACK_ALL);
+ }
+
+exit:
+ FREEBUF(page_owner);
+}
+
static void
dump_mem_map(struct meminfo *mi)
{
@@ -6161,6 +6533,19 @@ dump_mem_map(struct meminfo *mi)
char style3[100];
char style4[100];
+ if (mi->flags & GET_PAGE_OWNER) {
+ if (!page_owner_enabled()) {
+ error(INFO, "page_owner is disabled\n");
+ return;
+ }
+
+ /* TODO: support page owner for early kernels without stack depot */
+ if (!stack_depot_enabled()) {
+ error(INFO, "stack_depot is disabled\n");
+ return;
+ }
+ }
+
if (IS_SPARSEMEM()) {
dump_mem_map_SPARSEMEM(mi);
return;
@@ -6238,7 +6623,8 @@ dump_mem_map(struct meminfo *mi)
switch (mi->flags)
{
- case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
switch (mi->memtype)
{
case KVADDR:
@@ -6263,6 +6649,10 @@ dump_mem_map(struct meminfo *mi)
print_hdr = TRUE;
break;
+ case GET_PAGE_OWNER:
+ print_hdr = FALSE;
+ break;
+
case GET_ALL:
shared = 0;
reserved = 0;
@@ -6376,6 +6766,10 @@ dump_mem_map(struct meminfo *mi)
/* FALLTHROUGH */
+ case GET_PAGE_OWNER:
+ dump_page_owner(mi, pp, phys);
+ break;
+
case GET_SLAB_PAGES:
if (v22) {
if ((flags >> v22_PG_Slab) & 1)
@@ -6570,7 +6964,10 @@ display_members:
break;
case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
mi->retval = done;
+ if (mi->flags & GET_PAGE_OWNER)
+ dump_page_owner(mi, pp, phys);
break;
}
@@ -19776,7 +20173,6 @@ do_slab_slub(struct meminfo *si, int verbose)
if (is_free && (cpu_slab >= 0))
fprintf(fp, "(cpu %d cache)", cpu_slab);
fprintf(fp, "\n");
-
}
return TRUE;
--
2.25.1
2 months, 2 weeks
[PATCH v2] kmem: fix the determination for slab page
by qiwu.chen@transsion.com
The determination for a slab page has changed due to changing
PG_slab from a page flag to a page type since kernel commit
46df8e73a4a3.
Before apply this patch:
crash> kmem -s ffff000002aa4100
kmem: address is not allocated in slab subsystem: ffff000002aa4100
After apply this patch:
crash> kmem -s ffff000002aa4100
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff00000140f900 4096 94 126 18 32k task_struct
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffdffc00aa800 ffff000002aa0000 0 7 5 2
FREE / [ALLOCATED]
[ffff000002aa4100]
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
defs.h | 7 ++++++
memory.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++---------
2 files changed, 62 insertions(+), 10 deletions(-)
diff --git a/defs.h b/defs.h
index 2231cb6..e2a9278 100644
--- a/defs.h
+++ b/defs.h
@@ -2243,6 +2243,7 @@ struct offset_table { /* stash of commonly-used offsets */
long vmap_node_busy;
long rb_list_head;
long file_f_inode;
+ long page_page_type;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2651,6 +2652,7 @@ struct vm_table { /* kernel VM-related data */
ulong max_mem_section_nr;
ulong zero_paddr;
ulong huge_zero_paddr;
+ uint page_type_base;
};
#define NODES (0x1)
@@ -2684,6 +2686,11 @@ struct vm_table { /* kernel VM-related data */
#define SLAB_CPU_CACHE (0x10000000)
#define SLAB_ROOT_CACHES (0x20000000)
#define USE_VMAP_NODES (0x40000000)
+/*
+ * The SLAB_PAGEFLAGS flag is introduced to detect the change of
+ * PG_slab's type from a page flag to a page type.
+ */
+#define SLAB_PAGEFLAGS (0x80000000)
#define IS_FLATMEM() (vt->flags & FLATMEM)
#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
diff --git a/memory.c b/memory.c
index 967a9cf..48ac627 100644
--- a/memory.c
+++ b/memory.c
@@ -351,6 +351,43 @@ static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
static ulong DISPLAY_DEFAULT;
+/*
+ * Before kernel commit ff202303c398e, the value is defined as a macro, so copy it here;
+ * After this commit, the value is defined as an enum, which can be evaluated at runtime.
+ */
+#define PAGE_TYPE_BASE 0xf0000000
+#define PageType(page_type, flag) \
+ ((page_type & (vt->page_type_base | flag)) == vt->page_type_base)
+
+static void page_type_init(void)
+{
+ if (!enumerator_value("PAGE_TYPE_BASE", (long *)&vt->page_type_base))
+ vt->page_type_base = PAGE_TYPE_BASE;
+}
+
+/*
+ * The PG_slab's type has changed from a page flag to a page type
+ * since kernel commit 46df8e73a4a3.
+ */
+static bool page_slab(ulong page, ulong flags)
+{
+ if (vt->flags & SLAB_PAGEFLAGS) {
+ if ((flags >> vt->PG_slab) & 1)
+ return TRUE;
+ }
+
+ if (VALID_MEMBER(page_page_type)) {
+ uint page_type;
+
+ readmem(page+OFFSET(page_page_type), KVADDR, &page_type,
+ sizeof(page_type), "page_type", FAULT_ON_ERROR);
+ if (PageType(page_type, (uint)vt->PG_slab))
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
/*
* Verify that the sizeof the primitive types are reasonable.
*/
@@ -504,6 +541,10 @@ vm_init(void)
ANON_MEMBER_OFFSET_INIT(page_compound_head, "page", "compound_head");
MEMBER_OFFSET_INIT(page_private, "page", "private");
MEMBER_OFFSET_INIT(page_freelist, "page", "freelist");
+ if (MEMBER_EXISTS("page", "page_type")) {
+ MEMBER_OFFSET_INIT(page_page_type, "page", "page_type");
+ page_type_init();
+ }
MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd");
@@ -5931,7 +5972,7 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
if ((flags >> v22_PG_Slab) & 1)
slabs++;
} else if (vt->PG_slab) {
- if ((flags >> vt->PG_slab) & 1)
+ if (page_slab(pp, flags))
slabs++;
} else {
if ((flags >> v24_PG_slab) & 1)
@@ -6381,7 +6422,7 @@ dump_mem_map(struct meminfo *mi)
if ((flags >> v22_PG_Slab) & 1)
slabs++;
} else if (vt->PG_slab) {
- if ((flags >> vt->PG_slab) & 1)
+ if (page_slab(pp, flags))
slabs++;
} else {
if ((flags >> v24_PG_slab) & 1)
@@ -6775,6 +6816,9 @@ page_flags_init_from_pageflag_names(void)
vt->pageflags_data[i].name = nameptr;
vt->pageflags_data[i].mask = mask;
+ if (!strncmp(nameptr, "slab", 4))
+ vt->flags |= SLAB_PAGEFLAGS;
+
if (CRASHDEBUG(1)) {
fprintf(fp, " %08lx %s\n",
vt->pageflags_data[i].mask,
@@ -6836,7 +6880,8 @@ page_flags_init_from_pageflags_enum(void)
strcpy(nameptr, arglist[0] + strlen("PG_"));
vt->pageflags_data[p].name = nameptr;
vt->pageflags_data[p].mask = 1 << atoi(arglist[2]);
-
+ if (!strncmp(nameptr, "slab", 4))
+ vt->flags |= SLAB_PAGEFLAGS;
p++;
}
} else
@@ -9736,14 +9781,14 @@ vaddr_to_kmem_cache(ulong vaddr, char *buf, int verbose)
readmem(page+OFFSET(page_flags), KVADDR,
&page_flags, sizeof(ulong), "page.flags",
FAULT_ON_ERROR);
- if (!(page_flags & (1 << vt->PG_slab))) {
+ if (!page_slab(page, page_flags)) {
if (((vt->flags & KMALLOC_SLUB) || VALID_MEMBER(page_compound_head)) ||
((vt->flags & KMALLOC_COMMON) &&
VALID_MEMBER(page_slab) && VALID_MEMBER(page_first_page))) {
readmem(compound_head(page)+OFFSET(page_flags), KVADDR,
&page_flags, sizeof(ulong), "page.flags",
FAULT_ON_ERROR);
- if (!(page_flags & (1 << vt->PG_slab)))
+ if (!page_slab(compound_head(page), page_flags))
return NULL;
} else
return NULL;
@@ -20195,7 +20240,7 @@ char *
is_slab_page(struct meminfo *si, char *buf)
{
int i, cnt;
- ulong page_slab, page_flags, name;
+ ulong pg_slab, page_flags, name;
ulong *cache_list;
char *retval;
@@ -20210,11 +20255,11 @@ is_slab_page(struct meminfo *si, char *buf)
RETURN_ON_ERROR|QUIET))
return NULL;
- if (!(page_flags & (1 << vt->PG_slab)))
+ if (!page_slab(si->spec_addr, page_flags))
return NULL;
- if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
- &page_slab, sizeof(ulong), "page.slab",
+ if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
+ &pg_slab, sizeof(ulong), "page.slab",
RETURN_ON_ERROR|QUIET))
return NULL;
@@ -20222,7 +20267,7 @@ is_slab_page(struct meminfo *si, char *buf)
cnt = get_kmem_cache_list(&cache_list);
for (i = 0; i < cnt; i++) {
- if (page_slab == cache_list[i]) {
+ if (pg_slab == cache_list[i]) {
if (!readmem(cache_list[i] + OFFSET(kmem_cache_name),
KVADDR, &name, sizeof(char *),
"kmem_cache.name", QUIET|RETURN_ON_ERROR))
--
2.25.1
2 months, 2 weeks
[PATCH] kmem: fix the determination for slab page
by qiwu.chen@transsion.com
The determination for a slab page is changed since kernel commit
8db00ad564617 which migrates PG_slab flag from page flags to the
lower 16 bit of the page type.
Before apply this patch:
crash> kmem -s ffff000002aa4100
kmem: address is not allocated in slab subsystem: ffff000002aa4100
After apply this patch:
crash> kmem -s ffff000002aa4100
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff00000140f900 4096 94 126 18 32k task_struct
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffdffc00aa800 ffff000002aa0000 0 7 5 2
FREE / [ALLOCATED]
[ffff000002aa4100]
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
defs.h | 1 +
memory.c | 23 +++++++++++++++++++----
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/defs.h b/defs.h
index e9eb9e3..9ef7f97 100644
--- a/defs.h
+++ b/defs.h
@@ -2289,6 +2289,7 @@ struct offset_table { /* stash of commonly-used offsets */
long track_cpu;
long track_when;
long track_handle;
+ long page_page_type;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/memory.c b/memory.c
index 7bf8f86..f9ad6e4 100644
--- a/memory.c
+++ b/memory.c
@@ -509,6 +509,8 @@ vm_init(void)
ANON_MEMBER_OFFSET_INIT(page_compound_head, "page", "compound_head");
MEMBER_OFFSET_INIT(page_private, "page", "private");
MEMBER_OFFSET_INIT(page_freelist, "page", "freelist");
+ if (MEMBER_EXISTS("page", "page_type"))
+ MEMBER_OFFSET_INIT(page_page_type, "page", "page_type");
MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd");
@@ -10114,7 +10116,7 @@ static char *
vaddr_to_kmem_cache(ulong vaddr, char *buf, int verbose)
{
physaddr_t paddr;
- ulong page, cache, page_flags;
+ ulong page, cache, page_flags, page_type;
if (!kvtop(NULL, vaddr, &paddr, 0)) {
if (verbose)
@@ -10143,7 +10145,10 @@ vaddr_to_kmem_cache(ulong vaddr, char *buf, int verbose)
readmem(compound_head(page)+OFFSET(page_flags), KVADDR,
&page_flags, sizeof(ulong), "page.flags",
FAULT_ON_ERROR);
- if (!(page_flags & (1 << vt->PG_slab)))
+ readmem(page + OFFSET(page_page_type), KVADDR, &page_type,
+ sizeof(ulong), "page type", FAULT_ON_ERROR);
+ if (!(page_flags & (1 << vt->PG_slab)) &&
+ !(page_type & (1 << vt->PG_slab)))
return NULL;
} else
return NULL;
@@ -20688,7 +20693,7 @@ char *
is_slab_page(struct meminfo *si, char *buf)
{
int i, cnt;
- ulong page_slab, page_flags, name;
+ ulong page_slab, page_flags, page_type, name;
ulong *cache_list;
char *retval;
@@ -20703,7 +20708,17 @@ is_slab_page(struct meminfo *si, char *buf)
RETURN_ON_ERROR|QUIET))
return NULL;
- if (!(page_flags & (1 << vt->PG_slab)))
+ if (!readmem(si->spec_addr + OFFSET(page_page_type), KVADDR,
+ &page_type, sizeof(ulong), "page.page_type",
+ RETURN_ON_ERROR|QUIET))
+ return NULL;
+
+ /*
+ * PG_slab is migrated from the page flags to the lower 16 bit
+ * of the page type since linux commit 8db00ad564617.
+ */
+ if (!(page_flags & (1 << vt->PG_slab)) &&
+ !(page_type & (1 << vt->PG_slab)))
return NULL;
if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
--
2.25.1
2 months, 2 weeks
[PATCH] kmem: introduce -t flag to get page owner or slab debug trace
by qiwu.chen@transsion.com
Introduce -t flag for kmem command to support getting page owner and
slab debug trace. Here is the user help manual:
1. Dump page_owner allocated stack trace for each allocated page in
buddy system when used with "kmem -pt":
crash> kmem -pt
Page allocated via order 0, mask 0x1112c4a, pid 1, ts 16155269152 ns
PFN 0x40000, type Movable, Flags 0xffff00000020836
set_page_owner+84
post_alloc_hook+308
prep_new_page+48
get_page_from_freelist+736
__alloc_pages+348
alloc_pages+280
__page_cache_alloc+120
page_cache_ra_unbounded+272
do_page_cache_ra+172
do_sync_mmap_readahead+492
filemap_fault+340
__do_fault+64
__handle_mm_fault+528
handle_mm_fault+208
__do_page_fault+232
do_page_fault+1264
......
2. Dump page_owner allocated/freed trace for an allocated page when used
"kmem -pt" with a page address.
3. Dump slab debug trace when used "-st" with an allocated slab object address:
crash> kmem -st ffff000007e79d00
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff000001c0ed00 3392 93 104 13 32k task_struct
SLAB MEMORY NODE TOTAL ALLOCATED FREE
fffffc00001f9e00 ffff000007e78000 0 8 6 2
FREE / [ALLOCATED]
[ffff000007e79d00]
object ffff000007e79d00 allocated in alloc_task_struct_node+36 age=8431 cpu=2 pid=415
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
__do_sys_clone+152
__se_sys_clone+60
__arm64_sys_clone+88
__invoke_syscall+36
invoke_syscall+284
el0_svc_common+248
do_el0_svc+56
el0_svc+248
el0t_64_sync_handler+92
el0t_64_sync+344
object ffff000007e79d00 freed in free_task_struct+32 age=12132 cpu=1 pid=0
kmem_cache_free+780
free_task_struct+32
free_task+164
__put_task_struct+328
put_task_struct+44
delayed_put_task_struct+64
rcu_do_batch+972
rcu_core+592
rcu_core_si+24
__softirqentry_text_start+388
do_softirq_own_stack+12
invoke_softirq+216
__irq_exit_rcu+164
irq_exit+20
handle_domain_irq+120
4. Dump slab debug trace for each allocated object belongs to this slab
when used "-st" with an slab page address.
5. Dump slab debug trace for each allocated object belongs to slab cache
when used "-S -t" with a slab cache address.
With this patch, the page allocation times can be sorted by page_owner_sort
tool easily, and the slab allocation/free times can be sorted by a script.
Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
---
defs.h | 50 ++++++
help.c | 8 +-
memory.c | 538 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 572 insertions(+), 24 deletions(-)
diff --git a/defs.h b/defs.h
index dfbd241..e9eb9e3 100644
--- a/defs.h
+++ b/defs.h
@@ -206,6 +206,34 @@ static inline int string_exists(char *s) { return (s ? TRUE : FALSE); }
#undef roundup
#endif
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define rounddown(x, y) ((x) & ~__round_mask(x, y))
+#define IS_ALIGNED(x, y) (((x) & ((typeof(x))(y) - 1)) == 0)
+
+/* stack depot macros before kernel commit 8151c7a35d8bd */
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_NULL_PROTECTION_BITS 1
+#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGESHIFT() - STACK_ALLOC_ALIGN)
+#define DEPOT_STACK_BITS (sizeof(uint) * 8)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
+ STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
+
+/* stack depot macros since kernel commit 8151c7a35d8bd */
+#define STACK_DEPOT_EXTRA_BITS 5
+#define DEPOT_HANDLE_BITS (sizeof(uint) * 8)
+#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
+#define DEPOT_POOL_SIZE (1LL << (PAGESHIFT() + DEPOT_POOL_ORDER))
+#define DEPOT_STACK_ALIGN 4
+#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGESHIFT() - DEPOT_STACK_ALIGN)
+#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
+ STACK_DEPOT_EXTRA_BITS)
+
+/* GFP flags */
+#define __GFP_RECLAIMABLE 0x10u
+#define __GFP_MOVABLE 0x08u
+#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+#define GFP_MOVABLE_SHIFT 3
typedef uint64_t physaddr_t;
@@ -1626,6 +1654,7 @@ struct offset_table { /* stash of commonly-used offsets */
long pglist_data_node_present_pages;
long pglist_data_node_spanned_pages;
long pglist_data_bdata;
+ long pglist_data_node_page_ext;
long page_cache_bucket_chain;
long zone_struct_free_pages;
long zone_struct_free_area;
@@ -2243,6 +2272,23 @@ struct offset_table { /* stash of commonly-used offsets */
long vmap_node_busy;
long rb_list_head;
long file_f_inode;
+ long stack_record_size;
+ long stack_record_entries;
+ long stack_record_count;
+ long page_owner_order;
+ long page_owner_gfp_mask;
+ long page_owner_ts_nsec;
+ long page_owner_free_ts_nsec;
+ long page_owner_pid;
+ long page_owner_handle;
+ long page_owner_free_handle;
+ long mem_section_page_ext;
+ long track_addr;
+ long track_addrs;
+ long track_pid;
+ long track_cpu;
+ long track_when;
+ long track_handle;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2419,6 +2465,10 @@ struct size_table { /* stash of commonly-used sizes */
long module_memory;
long fred_frame;
long vmap_node;
+ long page_ext;
+ long page_owner;
+ long stack_record;
+ long track;
};
struct array_table {
diff --git a/help.c b/help.c
index e95ac1d..6a59064 100644
--- a/help.c
+++ b/help.c
@@ -6815,8 +6815,8 @@ NULL
char *help_kmem[] = {
"kmem",
"kernel memory",
-"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n"
-" [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
+"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n"
+" [[-s|-S|-S=cpu[s]|-r|-t] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]",
" This command displays information about the use of kernel memory.\n",
" -f displays the contents of the system free memory headers.",
" also verifies that the page count equals nr_free_pages.",
@@ -6845,6 +6845,7 @@ char *help_kmem[] = {
" mem_map[] array, made up of the page struct address, its associated",
" physical address, the page.mapping, page.index, page._count and",
" page.flags fields.",
+" -t displays page_owner allocated stack trace for each allocated page in buddy system.",
" -m member similar to -p, but displays page structure contents specified by",
" a comma-separated list of one or more struct page members. The",
" \"flags\" member will always be expressed in hexadecimal format, and",
@@ -6893,12 +6894,15 @@ char *help_kmem[] = {
" address when used with -s or -S, searches the kmalloc() slab subsystem",
" for the slab containing of this virtual address, showing whether",
" it is in use or free.",
+" when added extra -t, displays the slab debug trace for the allocated",
+" object belongs to this slab",
" address when used with -f, the address can be either a page pointer,",
" a physical address, or a kernel virtual address; the free_area",
" header containing the page (if any) is displayed.",
" address when used with -p, the address can be either a page pointer, a",
" physical address, or a kernel virtual address; its basic mem_map",
" page information is displayed.",
+" when added extra -t, display the page_owner traces for this page",
" address when used with -m, the address can be either a page pointer, a",
" physical address, or a kernel virtual address; the specified",
" members of the associated page struct are displayed.",
diff --git a/memory.c b/memory.c
index a74ebaf..7bf8f86 100644
--- a/memory.c
+++ b/memory.c
@@ -323,6 +323,11 @@ static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
static ulong freelist_ptr(struct meminfo *, ulong, ulong);
static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
+static void page_owner_init(void);
+static int page_owner_enabled(void);
+static void stack_depot_init(void);
+static void dump_page_owner(struct meminfo *, ulong, physaddr_t);
+enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_ALL };
/*
* Memory display modes specific to this file.
@@ -860,6 +865,16 @@ vm_init(void)
"kmem_cache_node", "partial");
MEMBER_OFFSET_INIT(kmem_cache_node_full,
"kmem_cache_node", "full");
+
+ STRUCT_SIZE_INIT(track, "track");
+ MEMBER_OFFSET_INIT(track_addr, "track", "addr");
+ if (MEMBER_EXISTS("track", "addrs"))
+ MEMBER_OFFSET_INIT(track_addrs, "track", "addrs");
+ if (MEMBER_EXISTS("track", "handle"))
+ MEMBER_OFFSET_INIT(track_handle, "track", "handle");
+ MEMBER_OFFSET_INIT(track_when, "track", "when");
+ MEMBER_OFFSET_INIT(track_cpu, "track", "cpu");
+ MEMBER_OFFSET_INIT(track_pid, "track", "pid");
} else {
MEMBER_OFFSET_INIT(kmem_cache_s_c_nextp,
"kmem_cache_s", "c_nextp");
@@ -983,6 +998,8 @@ vm_init(void)
vt->flags |= DISCONTIGMEM;
sparse_mem_init();
+ page_owner_init();
+ stack_depot_init();
vt->vmalloc_start = machdep->vmalloc_start();
if (IS_VMALLOC_ADDR(vt->mem_map))
@@ -1099,6 +1116,8 @@ vm_init(void)
MEMBER_OFFSET_INIT(pglist_data_bdata, "pglist_data", "bdata");
MEMBER_OFFSET_INIT(pglist_data_nr_zones, "pglist_data",
"nr_zones");
+ MEMBER_OFFSET_INIT(pglist_data_node_page_ext, "pglist_data",
+ "node_page_ext");
MEMBER_OFFSET_INIT(pglist_data_node_start_pfn, "pglist_data",
"node_start_pfn");
MEMBER_OFFSET_INIT(pglist_data_pgdat_next, "pglist_data",
@@ -5037,6 +5056,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
#define SLAB_BITFIELD (ADDRESS_SPECIFIED << 25)
#define SLAB_GATHER_FAILURE (ADDRESS_SPECIFIED << 26)
#define GET_SLAB_ROOT_CACHES (ADDRESS_SPECIFIED << 27)
+#define GET_SLAB_DEBUG_TRACE (ADDRESS_SPECIFIED << 28)
+#define GET_PAGE_OWNER (ADDRESS_SPECIFIED << 29)
#define GET_ALL \
(GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
@@ -5048,7 +5069,7 @@ cmd_kmem(void)
int c;
int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag;
int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag;
- int rflag;
+ int rflag, tflag;
struct meminfo meminfo;
ulonglong value[MAXARGS];
char buf[BUFSIZE];
@@ -5061,13 +5082,13 @@ cmd_kmem(void)
spec_addr = choose_cpu = 0;
sflag = Sflag = pflag = fflag = Fflag = Pflag = zflag = oflag = 0;
vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0;
- gflag = hflag = rflag = 0;
+ gflag = hflag = rflag = tflag = 0;
escape = FALSE;
BZERO(&meminfo, sizeof(struct meminfo));
BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
pc->curcmd_flags &= ~HEADER_PRINTED;
- while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh")) != EOF) {
+ while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoht")) != EOF) {
switch(c)
{
case 'V':
@@ -5204,6 +5225,10 @@ cmd_kmem(void)
gflag = 1;
break;
+ case 't':
+ tflag = 1;
+ break;
+
default:
argerrs++;
break;
@@ -5213,7 +5238,7 @@ cmd_kmem(void)
if (argerrs)
cmd_usage(pc->curcmd, SYNOPSIS);
- if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag +
+ if ((fflag + Fflag + Vflag + oflag +
vflag + Cflag + cflag + iflag + lflag + Lflag + gflag +
hflag + rflag) > 1) {
error(INFO, "only one flag allowed!\n");
@@ -5264,10 +5289,13 @@ cmd_kmem(void)
if (pflag) {
meminfo.spec_addr = value[i];
meminfo.flags = ADDRESS_SPECIFIED;
+ if (tflag)
+ meminfo.flags |= GET_PAGE_OWNER;
dump_mem_map(&meminfo);
pflag++;
}
+
if (sflag || Sflag) {
if (vt->flags & KMEM_CACHE_UNAVAIL)
error(FATAL,
@@ -5292,6 +5320,8 @@ cmd_kmem(void)
meminfo.reqname = p1;
meminfo.cache = value[i];
meminfo.flags |= CACHE_SET;
+ if (tflag)
+ meminfo.flags |= GET_SLAB_DEBUG_TRACE;
if ((i+1) == spec_addr) { /* done? */
if (meminfo.calls++)
fprintf(fp, "\n");
@@ -5301,6 +5331,8 @@ cmd_kmem(void)
} else {
meminfo.spec_addr = value[i];
meminfo.flags = ADDRESS_SPECIFIED;
+ if (tflag)
+ meminfo.flags |= GET_SLAB_DEBUG_TRACE;
if (Sflag && (vt->flags & KMALLOC_SLUB))
meminfo.flags |= VERBOSE;
if (meminfo.calls++)
@@ -5346,25 +5378,28 @@ cmd_kmem(void)
gflag++;
}
- /*
- * no value arguments allowed!
- */
- if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
+ if (tflag)
+ tflag++;
+
+ /*
+ * no value arguments allowed!
+ */
+ if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
Vflag || oflag || hflag || rflag) {
error(INFO,
"no address arguments allowed with this option\n");
cmd_usage(pc->curcmd, SYNOPSIS);
}
- if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
- lflag + Lflag + gflag)) {
+ if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
+ lflag + Lflag + gflag + tflag)) {
meminfo.spec_addr = value[i];
- meminfo.flags = ADDRESS_SPECIFIED;
- if (meminfo.calls++)
- fprintf(fp, "\n");
+ meminfo.flags = ADDRESS_SPECIFIED;
+ if (meminfo.calls++)
+ fprintf(fp, "\n");
else
kmem_cache_init();
- kmem_search(&meminfo);
+ kmem_search(&meminfo);
}
}
@@ -5372,8 +5407,11 @@ cmd_kmem(void)
if (iflag == 1)
dump_kmeminfo();
- if (pflag == 1)
+ if (pflag == 1) {
+ if (tflag)
+ meminfo.flags = GET_PAGE_OWNER;
dump_mem_map(&meminfo);
+ }
if (fflag == 1)
vt->dump_free_pages(&meminfo);
@@ -5457,7 +5495,7 @@ cmd_kmem(void)
if (!(sflag + Sflag + pflag + fflag + Fflag + vflag +
Vflag + zflag + oflag + cflag + Cflag + iflag +
nflag + lflag + Lflag + gflag + hflag + rflag +
- meminfo.calls))
+ tflag + meminfo.calls))
cmd_usage(pc->curcmd, SYNOPSIS);
}
@@ -5749,7 +5787,8 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
switch (mi->flags)
{
- case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
switch (mi->memtype)
{
case KVADDR:
@@ -5774,6 +5813,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
print_hdr = TRUE;
break;
+ case GET_PAGE_OWNER:
+ print_hdr = FALSE;
+ break;
+
case GET_ALL:
shared = 0;
reserved = 0;
@@ -5926,6 +5969,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
/* FALLTHROUGH */
+ case GET_PAGE_OWNER:
+ dump_page_owner(mi, pp, phys);
+ break;
+
case GET_SLAB_PAGES:
if (v22) {
if ((flags >> v22_PG_Slab) & 1)
@@ -6083,6 +6130,7 @@ display_members:
if (done)
break;
+
}
if (done)
@@ -6119,7 +6167,10 @@ display_members:
break;
case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
mi->retval = done;
+ if (mi->flags & GET_PAGE_OWNER)
+ dump_page_owner(mi, pp, phys);
break;
}
@@ -6129,6 +6180,331 @@ display_members:
FREEBUF(page_cache);
}
+static int stack_depot_enabled(void)
+{
+ struct syment *sp;
+ int disable = TRUE;
+
+ if ((sp = symbol_search("stack_depot_disable")))
+ readmem(sp->value, KVADDR, &disable, sizeof(int),
+ "stack_depot_disable", RETURN_ON_ERROR);
+ else if ((sp = symbol_search("stack_depot_disabled")))
+ readmem(sp->value, KVADDR, &disable, sizeof(int),
+ "stack_depot_disabled", RETURN_ON_ERROR);
+ else if ((sp = symbol_search("stack_slabs")))
+ return sp->value ? FALSE : TRUE;
+
+ return !disable;
+}
+
+static void stack_depot_init(void)
+{
+ if (stack_depot_enabled()) {
+ STRUCT_SIZE_INIT(stack_record, "stack_record");
+ MEMBER_OFFSET_INIT(stack_record_size, "stack_record", "size");
+ MEMBER_OFFSET_INIT(stack_record_entries, "stack_record", "entries");
+ if (MEMBER_EXISTS("stack_record", "count"))
+ MEMBER_OFFSET_INIT(stack_record_count, "stack_record", "count");
+ }
+}
+
+/* Fetch stack entries from a depot. */
+static unsigned int stack_depot_fetch(uint handle, ulong *entries)
+{
+ struct syment *sp;
+ uint valid, offset, slabindex, poolindex, pools_num, stack_record_count, stack_size = 0;
+ ulong stack_record_addr, sym_value;
+
+ if (!handle)
+ return 0;
+
+ if ((sp = symbol_search("stack_slabs"))) {
+ valid = (handle >> (STACK_ALLOC_INDEX_BITS + STACK_ALLOC_OFFSET_BITS)) & STACK_ALLOC_NULL_PROTECTION_BITS;
+ if (!valid)
+ return 0;
+
+ slabindex = handle & ((1 << STACK_ALLOC_INDEX_BITS) - 1);
+ if (!readmem(sp->value + slabindex * sizeof(void *), KVADDR, &stack_record_addr,
+ sizeof(void *), "stack_record_addr", FAULT_ON_ERROR))
+ return 0;
+
+ offset = (handle >> STACK_ALLOC_INDEX_BITS) & ((1 << STACK_ALLOC_OFFSET_BITS) - 1);
+ stack_record_addr += (offset << STACK_ALLOC_ALIGN);
+ *entries = stack_record_addr + OFFSET(stack_record_entries);
+ if (!readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size, sizeof(stack_size),
+ "stack_record_entries", FAULT_ON_ERROR))
+ return 0;
+ } else if ((sp = symbol_search("stack_pools")) && (sym_value = symbol_value("pools_num"))) {
+ poolindex = handle & ((1 << DEPOT_POOL_INDEX_BITS) - 1);
+ readmem(sym_value, KVADDR, &pools_num, sizeof(int), "pools_num", RETURN_ON_ERROR);
+ if (poolindex >= pools_num) {
+ error(INFO, "pool index %d out of bounds (%d) for stack id %08x\n", poolindex, pools_num, handle);
+ return 0;
+ }
+
+ readmem(sp->value + (poolindex-1) * sizeof(void *), KVADDR, &stack_record_addr,
+ sizeof(void *), "stack_record_addr", FAULT_ON_ERROR);
+ if (!stack_record_addr)
+ return 0;
+
+ offset = (handle >> DEPOT_POOL_INDEX_BITS) & ((1 << DEPOT_OFFSET_BITS) - 1);
+ stack_record_addr += (offset << DEPOT_STACK_ALIGN);
+ if (!readmem(stack_record_addr + OFFSET(stack_record_count), KVADDR, &stack_record_count,
+ sizeof(stack_record_count), "stack_record_count", FAULT_ON_ERROR) || !stack_record_count)
+ return 0;
+
+ *entries = stack_record_addr + OFFSET(stack_record_entries);
+ if (!readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size, sizeof(stack_size),
+ "stack_record_entries", FAULT_ON_ERROR))
+ return 0;
+ }
+
+ return stack_size;
+}
+
+static void stack_trace_print(ulong entries, uint nr_entries)
+{
+ int i;
+ struct syment *sp;
+ ulong value, offset;
+ char buf[BUFSIZE];
+
+ if (!nr_entries)
+ return;
+
+ for (i = 0; i < nr_entries; i++) {
+ if (!readmem(entries, KVADDR, &value, sizeof(value), "stack_trace", FAULT_ON_ERROR))
+ break;
+
+ entries += sizeof(ulong);
+ sp = value_search(value, &offset);
+ if (!sp)
+ break;
+
+ fprintf(fp, "%s\n", value_to_symstr(sp->value+offset, buf, 0));
+ }
+ fprintf(fp, "\n");
+}
+
+static ulong gfp_migratetype(ulong gfp_flags)
+{
+ struct syment *sp;
+ int page_group_by_mobility_disabled;
+
+ if ((sp = symbol_search("page_group_by_mobility_disabled"))) {
+ readmem(sp->value, KVADDR, &page_group_by_mobility_disabled, sizeof(int),
+ "page_group_by_mobility_disabled", RETURN_ON_ERROR);
+ if (page_group_by_mobility_disabled) {
+ ulong migrate_unmovable;
+
+ enumerator_value("MIGRATE_UNMOVABLE", &migrate_unmovable);
+ return migrate_unmovable;
+ }
+ }
+
+ return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
+}
+
+static void migratetype_name(ulong migratetype, char *buf)
+{
+ struct syment *sp;
+ ulong migratetype_name_addr;
+
+ sp = symbol_search("migratetype_names");
+ if (!sp)
+ return;
+
+ readmem(sp->value + migratetype * sizeof(ulong), KVADDR, &migratetype_name_addr,
+ sizeof(ulong), "migratetype_name", RETURN_ON_ERROR);
+ read_string(migratetype_name_addr, buf, BUFSIZE-1);
+}
+
+static void print_page_owner(ulong pfn, ulong page, char *page_owner, enum track_item alloc)
+{
+ int i, pid;
+ ushort order;
+ uint handle, free_handle, gfp_mask, nr_entries;
+ u64 ts_nsec, free_ts_nsec;
+ ulong entries, offset, page_flags;
+ struct syment *sp;
+ char buf[BUFSIZE];
+
+ order = USHORT(page_owner + OFFSET(page_owner_order));
+ gfp_mask = UINT(page_owner + OFFSET(page_owner_gfp_mask));
+ handle = UINT(page_owner + OFFSET(page_owner_handle));
+ free_handle = UINT(page_owner + OFFSET(page_owner_free_handle));
+ ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_ts_nsec));
+ free_ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_free_ts_nsec));
+ pid = INT(page_owner + OFFSET(page_owner_pid));
+
+ if (handle && (alloc != TRACK_FREE)) {
+ fprintf(fp, "Page allocated via order %u, mask %#x, pid %d, ts %llu ns\n",
+ order, gfp_mask, pid, ts_nsec);
+ migratetype_name(gfp_migratetype(gfp_mask), buf);
+ readmem(page+OFFSET(page_flags), KVADDR, &page_flags, sizeof(ulong),
+ "page.flags", FAULT_ON_ERROR);
+ fprintf(fp, "PFN %#lx, type %s, Flags %#lx\n", pfn, buf, page_flags);
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ }
+
+ if (alloc != TRACK_ALLOC &&
+ (free_handle = UINT(page_owner + OFFSET(page_owner_free_handle)))) {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ fprintf(fp, "page last free ts %llu ns, stack trace:\n", free_ts_nsec);
+ stack_trace_print(entries, nr_entries);
+ }
+}
+
+/* Get the max order for zoned buddy allocator */
+static inline ulong get_max_order(void)
+{
+ char *string;
+
+ if ((kt->ikconfig_flags & IKCONFIG_AVAIL) &&
+ get_kernel_config("CONFIG_FORCE_MAX_ZONEORDER", &string) == IKCONFIG_STR)
+ return atol(string);
+
+ return 11;
+}
+
+#define MAX_ORDER get_max_order()
+#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+
+static int lookup_page_ext(ulong pfn, ulong pp, ulong *page_ext)
+{
+ int node;
+ ulong page_ext_size, section, section_nr, pgdat, node_page_ext, node_start_pfn, page_ext_idx;
+
+ if (!kernel_symbol_exists("page_ext_size") ||
+ !readmem(symbol_value("page_ext_size"), KVADDR, &page_ext_size, sizeof(page_ext_size),
+ "page_ext_size", QUIET|RETURN_ON_ERROR))
+ return FALSE;
+
+ if (IS_SPARSEMEM()) {
+ section_nr = pfn_to_section_nr(pfn);
+ if (!(section = valid_section_nr(section_nr)))
+ return FALSE;
+
+ if (!readmem(section + OFFSET(mem_section_page_ext), KVADDR, &node_page_ext, sizeof(ulong),
+ "mem_section page_ext", FAULT_ON_ERROR) || !node_page_ext)
+ return FALSE;
+
+ *page_ext = node_page_ext + pfn * page_ext_size;
+ return TRUE;
+ }
+
+ if ((node = page_to_nid(pp) >= 0)) {
+ pgdat = vt->node_table[node].pgdat;
+ if (!VALID_MEMBER(pglist_data_node_page_ext) ||
+ !readmem(pgdat + OFFSET(pglist_data_node_page_ext), KVADDR, &node_page_ext,
+ sizeof(ulong), "pglist node_page_ext", FAULT_ON_ERROR) || !node_page_ext)
+ return FALSE;
+
+ if (!VALID_MEMBER(pglist_data_node_start_pfn) ||
+ !readmem(pgdat + OFFSET(pglist_data_node_start_pfn), KVADDR, &node_start_pfn,
+ sizeof(ulong), "pglist node_start_pfn", FAULT_ON_ERROR) || !node_start_pfn)
+ return FALSE;
+
+ page_ext_idx = pfn - rounddown(node_start_pfn, MAX_ORDER_NR_PAGES);
+ *page_ext = node_page_ext + pfn * page_ext_size;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static ulong get_page_owner(ulong page_ext)
+{
+ struct syment *sp;
+ ulong page_owner_ops_offset;
+
+ sp = symbol_search("page_owner_ops");
+ if (!sp)
+ return FALSE;
+
+ if (!readmem(sp->value, KVADDR, &page_owner_ops_offset, sizeof(ulong),
+ "page_owner_ops_offset", RETURN_ON_ERROR))
+ return FALSE;
+
+ return page_ext + page_owner_ops_offset;
+}
+
+static int page_owner_enabled(void)
+{
+ struct syment *sp;
+ int enabled;
+
+ if ((sp = symbol_search("page_owner_enabled")) &&
+ readmem(sp->value, KVADDR, &enabled, sizeof(int), "page_owner_enabled", RETURN_ON_ERROR))
+ return enabled;
+
+ if ((sp = symbol_search("page_owner_inited")) &&
+ readmem(sp->value, KVADDR, &enabled, sizeof(int), "page_owner_inited", RETURN_ON_ERROR))
+ return enabled;
+
+ return FALSE;
+}
+
+static void page_owner_init(void)
+{
+ if (page_owner_enabled()) {
+ STRUCT_SIZE_INIT(page_ext, "page_ext");
+ STRUCT_SIZE_INIT(page_owner, "page_owner");
+ MEMBER_OFFSET_INIT(mem_section_page_ext, "mem_section", "page_ext");
+ MEMBER_OFFSET_INIT(page_owner_handle, "page_owner", "handle");
+ MEMBER_OFFSET_INIT(page_owner_free_handle, "page_owner", "free_handle");
+ MEMBER_OFFSET_INIT(page_owner_order, "page_owner", "order");
+ MEMBER_OFFSET_INIT(page_owner_gfp_mask, "page_owner", "gfp_mask");
+ MEMBER_OFFSET_INIT(page_owner_ts_nsec, "page_owner", "ts_nsec");
+ MEMBER_OFFSET_INIT(page_owner_free_ts_nsec, "page_owner", "free_ts_nsec");
+ MEMBER_OFFSET_INIT(page_owner_pid, "page_owner", "pid");
+ }
+}
+
+static void dump_page_owner(struct meminfo *mi, ulong pp, physaddr_t phys)
+{
+ ulong pfn, page_ext_addr, page_owner_addr, page_ext;
+ long page_ext_owner, page_ext_owner_allocated;
+ char *page_owner;
+
+ pfn = BTOP(phys);
+ if (!lookup_page_ext(pfn, pp, &page_ext_addr))
+ return;
+
+ page_owner_addr = get_page_owner(page_ext_addr);
+ if (!page_owner_addr)
+ return;
+
+ page_owner = (char *)GETBUF(SIZE(page_owner));
+ if (!readmem(page_owner_addr, KVADDR, page_owner, SIZE(page_owner), "page_owner", FAULT_ON_ERROR))
+ goto exit;
+
+ enumerator_value("PAGE_EXT_OWNER", &page_ext_owner);
+ if (!readmem(page_ext_addr, KVADDR, &page_ext, sizeof(ulong), "page_ext", FAULT_ON_ERROR)
+ || !(page_ext & (1 << page_ext_owner)))
+ goto exit;
+
+ enumerator_value("PAGE_EXT_OWNER_ALLOCATED", &page_ext_owner_allocated);
+ if (mi->flags == GET_PAGE_OWNER) {
+ if (!(page_ext & (1 << page_ext_owner_allocated)) ||
+ !IS_ALIGNED(pfn, 1 << USHORT(page_owner + OFFSET(page_owner_order))))
+ goto exit;
+
+ /* dump allocated page owner for current memory usage */
+ print_page_owner(pfn, pp, page_owner, TRACK_ALLOC);
+ } else {
+ if (page_ext & (1 << page_ext_owner_allocated))
+ fprintf(fp, "page_owner tracks the page 0x%lx as allocated\n", pp);
+ else
+ fprintf(fp, "page_owner tracks the page 0x%lx as freed\n", pp);
+ print_page_owner(pfn, pp, page_owner, TRACK_ALL);
+ }
+
+exit:
+ FREEBUF(page_owner);
+}
+
static void
dump_mem_map(struct meminfo *mi)
{
@@ -6161,6 +6537,18 @@ dump_mem_map(struct meminfo *mi)
char style3[100];
char style4[100];
+ if (mi->flags & GET_PAGE_OWNER) {
+ if (!page_owner_enabled()) {
+ error(INFO, "page_owner is disabled\n");
+ return;
+ }
+
+ if (!stack_depot_enabled()) {
+ error(INFO, "stack_depot is disabled\n");
+ return;
+ }
+ }
+
if (IS_SPARSEMEM()) {
dump_mem_map_SPARSEMEM(mi);
return;
@@ -6238,7 +6626,8 @@ dump_mem_map(struct meminfo *mi)
switch (mi->flags)
{
- case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
switch (mi->memtype)
{
case KVADDR:
@@ -6263,6 +6652,10 @@ dump_mem_map(struct meminfo *mi)
print_hdr = TRUE;
break;
+ case GET_PAGE_OWNER:
+ print_hdr = FALSE;
+ break;
+
case GET_ALL:
shared = 0;
reserved = 0;
@@ -6376,6 +6769,10 @@ dump_mem_map(struct meminfo *mi)
/* FALLTHROUGH */
+ case GET_PAGE_OWNER:
+ dump_page_owner(mi, pp, phys);
+ break;
+
case GET_SLAB_PAGES:
if (v22) {
if ((flags >> v22_PG_Slab) & 1)
@@ -6570,7 +6967,10 @@ display_members:
break;
case ADDRESS_SPECIFIED:
+ case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
mi->retval = done;
+ if (mi->flags & GET_PAGE_OWNER)
+ dump_page_owner(mi, pp, phys);
break;
}
@@ -19618,6 +20018,99 @@ do_kmem_cache_slub(struct meminfo *si)
FREEBUF(per_cpu);
}
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline uint get_info_end(struct meminfo *si)
+{
+ uint inuse = UINT(si->cache_buf + OFFSET(kmem_cache_inuse));
+ uint offset = UINT(si->cache_buf + OFFSET(kmem_cache_offset));
+
+ if (offset >= inuse)
+ return inuse + sizeof(void *);
+ else
+ return inuse;
+}
+
+static inline u64 get_jiffies(void)
+{
+ ulong jiffies;
+ u64 jiffies_64;
+
+ if (symbol_exists("jiffies_64")) {
+ get_symbol_data("jiffies_64", sizeof(u64), &jiffies_64);
+ return jiffies_64;
+ } else {
+ get_symbol_data("jiffies", sizeof(ulong), &jiffies);
+ return (u64)jiffies;
+ }
+}
+
+#define TRACK_ADDRS_COUNT 16
+void print_track(struct meminfo *si, char *track, ulong object, enum track_item alloc)
+{
+ ulong track_addr, addr, addrs, when, entries, nr_entries;
+ uint i, cpu, pid, handle;
+ char buf[BUFSIZE];
+
+ track_addr = object + get_info_end(si) + alloc * STRUCT_SIZE("track");
+ if (!readmem(track_addr, KVADDR, track, SIZE(track), "track", FAULT_ON_ERROR))
+ return;
+
+ addr = ULONG(track + OFFSET(track_addr));
+ if (addr) {
+ when = ULONG(track + OFFSET(track_when));
+ cpu = UINT(track + OFFSET(track_cpu));
+ pid = UINT(track + OFFSET(track_pid));
+ fprintf(fp, "object %lx %s in %s age=%llu cpu=%u pid=%d\n",
+ object, alloc ? "freed" : "allocated", value_to_symstr(addr, buf, 0),
+ get_jiffies() - (u64)when, cpu, pid);
+ if (VALID_MEMBER(track_addrs)) {
+ addrs = track_addr + OFFSET(track_addrs);
+ stack_trace_print(addrs, TRACK_ADDRS_COUNT);
+ } else if (VALID_MEMBER(track_handle)) {
+ handle = UINT(track + OFFSET(track_handle));
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ } else {
+ fprintf(fp, "stack trace missing\n");
+ handle = track_addr + OFFSET(track_handle);
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries);
+ }
+ }
+}
+
+#define SLAB_STORE_USER (0x00010000UL)
+static ulong get_slab_store_user_flag(void)
+{
+ ulong slab_store_user_flag;
+
+ if (enumerator_value("_SLAB_STORE_USER", &slab_store_user_flag))
+ return (1 << slab_store_user_flag);
+ else
+ return SLAB_STORE_USER;
+}
+
+static void slab_debug_trace_show(struct meminfo *si, ulong object)
+{
+ ulong flags;
+ char *track;
+
+ if (!(si->flags & GET_SLAB_DEBUG_TRACE))
+ return;
+
+ flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
+ if (!(flags & get_slab_store_user_flag()))
+ return;
+
+ track = (char *)GETBUF(SIZE(track));
+ print_track(si, track, object, TRACK_ALLOC);
+ print_track(si, track, object, TRACK_FREE);
+ FREEBUF(track);
+}
+
#define DUMP_SLAB_INFO_SLUB() \
{ \
char b1[BUFSIZE], b2[BUFSIZE]; \
@@ -19672,7 +20165,8 @@ do_slab_slub(struct meminfo *si, int verbose)
if (!verbose) {
DUMP_SLAB_INFO_SLUB();
- return TRUE;
+ if (!(si->flags & GET_SLAB_DEBUG_TRACE))
+ return TRUE;
}
cpu_freelist = 0;
@@ -19775,7 +20269,8 @@ do_slab_slub(struct meminfo *si, int verbose)
if (is_free && (cpu_slab >= 0))
fprintf(fp, "(cpu %d cache)", cpu_slab);
fprintf(fp, "\n");
-
+ if (!is_free)
+ slab_debug_trace_show(si, p + red_left_pad);
}
return TRUE;
@@ -19886,11 +20381,10 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
}
-#define SLAB_STORE_USER (0x00010000UL)
flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
if (INVALID_MEMBER(kmem_cache_node_full) ||
- !(flags & SLAB_STORE_USER)) {
+ !(flags & get_slab_store_user_flag())) {
fprintf(fp, "NODE %d FULL:\n (not tracked)\n", node);
return;
}
--
2.25.1
2 months, 2 weeks
Re: [PATCH v2 0/4] arm64: Introduction of support 16K page with 4-level table
by lijiang
On Tue, Sep 3, 2024 at 3:53 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Tue, 3 Sep 2024 15:51:25 +0800
> From: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
> Subject: [Crash-utility] [PATCH v2 0/4] arm64: Introduction of support
> 16K page with 4-level table
> To: kuan-ying.lee(a)canonical.com,
> devel(a)lists.crash-utility.osci.io
> Message-ID: <20240903075140.37728-1-kuan-ying.lee(a)canonical.com>
>
> 1. Add support to 16K page size and 4-level page table with 48 VA bits.
>
> 2. Fix 64K page size with 52 VA bits issue.
> Becuase we cannot use idmap_ptrs_per_pgd to know the size of
> ptrs_per_pgd.
>
> 3. Refactor the translation of PTE to physical address and fix
> indent issue.
>
>
Thank you for the update, Kuan-Ying.
For the v2: Ack
Lianbo
> V1->V2:
> - Simplify the PTE_TO_PHYS macro. Thanks Lianbo.
>
> Kuan-Ying Lee (4):
> arm64: fix indent issue and refactor PTE_TO_PHYS
> arm64: use the same expression to indicate ptrs_per_pgd
> arm64: fix 64K page and 52-bits VA support
> arm64: Support 16K page, 48 VA bits and 4 level page table
>
> arm64.c | 274 +++++++++++++++++++++++++++++++++++++++++---------------
> defs.h | 33 ++++++-
> 2 files changed, 231 insertions(+), 76 deletions(-)
>
> --
> 2.43.0
>
2 months, 3 weeks
Re: [PATCH] “kmem address” not working properly when redzone is enabled
by lijiang
On Fri, Aug 30, 2024 at 10:08 AM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Fri, 30 Aug 2024 10:59:44 +1200
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] Re: [Crash-utility][PATCH] “kmem address” not
> working properly when redzone is enabled
> To: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau(a)hpe.com>
> Cc: "devel(a)lists.crash-utility.osci.io"
> <devel(a)lists.crash-utility.osci.io>
> Message-ID:
> <
> CAO7dBbV96Ys8sQ1u00b2x1L9SCyEuAGqKaN6Gb4UDX-XbnZXdQ(a)mail.gmail.com>
> Content-Type: text/plain; charset="UTF-8"
>
> Hi Georges
>
> On Thu, Aug 29, 2024 at 9:55 PM Aureau, Georges (Kernel Tools ERT)
> <georges.aureau(a)hpe.com> wrote:
> >
> > Crash “kmem address” not working properly when redzone is enabled.
> >
> > When "slub_debug" is enabled with redzoning, "kmem address" does not
> work properly.
> > The "red_left_pad" member within "struct kmem_cache" is currently an
> "unsigned int",
> > it used to be an "int", but it never was a "long", hence "red_left_pad"
> in do_slab_slub()
> > was not initialized properly. This "red_left_pad" issue resulted in
> reporting free objects
> > as "[ALLOCATED]", and in reporting bogus object addresses when using
> "set redzone off".
> >
> Thanks for the fix, LGTM, so ack.
>
>
Applied:
https://github.com/crash-utility/crash/commit/f20a94016148dce397cded5b4ac...
Thanks
Lianbo
> Thanks,
> Tao Liu
>
> > Signed-off-by: Georges Aureau <georges.aureau(a)hpe.com>
> > --
> > diff --git a/memory.c b/memory.c
> > index a74ebaf..967a9cf 100644
> > --- a/memory.c
> > +++ b/memory.c
> > @@ -19637,7 +19637,8 @@ do_slab_slub(struct meminfo *si, int verbose)
> > int i, free_objects, cpu_slab, is_free, node;
> > ulong p, q;
> > #define SLAB_RED_ZONE 0x00000400UL
> > - ulong flags, red_left_pad;
> > + ulong flags;
> > + uint red_left_pad;
> >
> > if (!si->slab) {
> > if (CRASHDEBUG(1))
> > @@ -19727,7 +19728,7 @@ do_slab_slub(struct meminfo *si, int verbose)
> > if (VALID_MEMBER(kmem_cache_red_left_pad)) {
> > flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
> > if (flags & SLAB_RED_ZONE)
> > - red_left_pad = ULONG(si->cache_buf +
> OFFSET(kmem_cache_red_left_pad));
> > + red_left_pad = UINT(si->cache_buf +
> OFFSET(kmem_cache_red_left_pad));
> > }
> >
> > for (p = vaddr; p < vaddr + objects * si->size; p += si->size) {
> > --
>
2 months, 3 weeks
Re: [PATCH] “kmem address” not working properly when redzone is enabled
by lijiang
Hi, Aureau
Thank you for the fix.
On Thu, Aug 29, 2024 at 5:56 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Thu, 29 Aug 2024 09:15:36 +0000
> From: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau(a)hpe.com>
> Subject: [Crash-utility][PATCH] “kmem address” not working properly
> when redzone is enabled
> To: "devel(a)lists.crash-utility.osci.io"
> <devel(a)lists.crash-utility.osci.io>
> Message-ID: <SJ0PR84MB1482E72F9E168B3B0CE885C89F962(a)SJ0PR84MB1482.NAMP
> RD84.PROD.OUTLOOK.COM>
> Content-Type: text/plain; charset="Windows-1252"
>
> Crash “kmem address” not working properly when redzone is enabled.
>
> When "slub_debug" is enabled with redzoning, "kmem address" does not work
> properly.
> The "red_left_pad" member within "struct kmem_cache" is currently an
> "unsigned int",
> it used to be an "int", but it never was a "long", hence "red_left_pad" in
> do_slab_slub()
> was not initialized properly. This "red_left_pad" issue resulted in
> reporting free objects
> as "[ALLOCATED]", and in reporting bogus object addresses when using "set
> redzone off".
>
>
Can you help add the result of the 'kmem address' command here? We can
clearly see what error it is.
And also please add the kernel commit to patch log:
kernel commit 2ca6d39b3102 ("slub: make ->red_left_pad unsigned int")
Signed-off-by: Georges Aureau <georges.aureau(a)hpe.com>
> --
> diff --git a/memory.c b/memory.c
> index a74ebaf..967a9cf 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -19637,7 +19637,8 @@ do_slab_slub(struct meminfo *si, int verbose)
> int i, free_objects, cpu_slab, is_free, node;
> ulong p, q;
> #define SLAB_RED_ZONE 0x00000400UL
> - ulong flags, red_left_pad;
> + ulong flags;
> + uint red_left_pad;
>
> if (!si->slab) {
> if (CRASHDEBUG(1))
> @@ -19727,7 +19728,7 @@ do_slab_slub(struct meminfo *si, int verbose)
> if (VALID_MEMBER(kmem_cache_red_left_pad)) {
> flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
> if (flags & SLAB_RED_ZONE)
> - red_left_pad = ULONG(si->cache_buf +
> OFFSET(kmem_cache_red_left_pad));
> + red_left_pad = UINT(si->cache_buf +
> OFFSET(kmem_cache_red_left_pad));
> }
>
>
This change looks good to me, but I still have a question:
I can not reproduce the current issue, how did you reproduce this one? Can
you help list the steps to reproduce?
Thanks
Lianbo
> for (p = vaddr; p < vaddr + objects * si->size; p += si->size) {
>
> ------------------------------
>
2 months, 3 weeks
Re: [PATCH] kmem: fix the determination for slab page
by lijiang
On Fri, Aug 30, 2024 at 7:39 PM <devel-request(a)lists.crash-utility.osci.io>
wrote:
> Date: Fri, 30 Aug 2024 10:33:03 -0000
> From: qiwu.chen(a)transsion.com
> Subject: [Crash-utility] [PATCH] kmem: fix the determination for slab
> page
> To: devel(a)lists.crash-utility.osci.io
> Message-ID: <20240830103303.2824.23557(a)lists.crash-utility.osci.io>
> Content-Type: text/plain; charset="utf-8"
>
> The determination for a slab page is changed since kernel commit
> 8db00ad564617 which migrates PG_slab flag from page flags to the
> lower 16 bit of the page type.
>
>
Thank you for the fix, qiwu.
Can you help double check if the code related to PG_* flags in the crash
tool needs to be changed accordingly?
Thanks
Lianbo
> Before apply this patch:
> crash> kmem -s ffff000002aa4100
> kmem: address is not allocated in slab subsystem: ffff000002aa4100
>
> After apply this patch:
> crash> kmem -s ffff000002aa4100
> CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
> ffff00000140f900 4096 94 126 18 32k task_struct
> SLAB MEMORY NODE TOTAL ALLOCATED FREE
> fffffdffc00aa800 ffff000002aa0000 0 7 5 2
> FREE / [ALLOCATED]
> [ffff000002aa4100]
>
> Signed-off-by: qiwu.chen <qiwu.chen(a)transsion.com>
> ---
> defs.h | 1 +
> memory.c | 23 +++++++++++++++++++----
> 2 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index e9eb9e3..9ef7f97 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2289,6 +2289,7 @@ struct offset_table { /* stash of
> commonly-used offsets */
> long track_cpu;
> long track_when;
> long track_handle;
> + long page_page_type;
> };
>
> struct size_table { /* stash of commonly-used sizes */
> diff --git a/memory.c b/memory.c
> index 7bf8f86..f9ad6e4 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -509,6 +509,8 @@ vm_init(void)
> ANON_MEMBER_OFFSET_INIT(page_compound_head, "page",
> "compound_head");
> MEMBER_OFFSET_INIT(page_private, "page", "private");
> MEMBER_OFFSET_INIT(page_freelist, "page", "freelist");
> + if (MEMBER_EXISTS("page", "page_type"))
> + MEMBER_OFFSET_INIT(page_page_type, "page", "page_type");
>
> MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd");
>
> @@ -10114,7 +10116,7 @@ static char *
> vaddr_to_kmem_cache(ulong vaddr, char *buf, int verbose)
> {
> physaddr_t paddr;
> - ulong page, cache, page_flags;
> + ulong page, cache, page_flags, page_type;
>
> if (!kvtop(NULL, vaddr, &paddr, 0)) {
> if (verbose)
> @@ -10143,7 +10145,10 @@ vaddr_to_kmem_cache(ulong vaddr, char *buf, int
> verbose)
>
> readmem(compound_head(page)+OFFSET(page_flags), KVADDR,
> &page_flags, sizeof(ulong),
> "page.flags",
> FAULT_ON_ERROR);
> - if (!(page_flags & (1 << vt->PG_slab)))
> + readmem(page + OFFSET(page_page_type),
> KVADDR, &page_type,
> + sizeof(ulong), "page type",
> FAULT_ON_ERROR);
> + if (!(page_flags & (1 << vt->PG_slab)) &&
> + !(page_type & (1 << vt->PG_slab)))
> return NULL;
> } else
> return NULL;
> @@ -20688,7 +20693,7 @@ char *
> is_slab_page(struct meminfo *si, char *buf)
> {
> int i, cnt;
> - ulong page_slab, page_flags, name;
> + ulong page_slab, page_flags, page_type, name;
> ulong *cache_list;
> char *retval;
>
> @@ -20703,7 +20708,17 @@ is_slab_page(struct meminfo *si, char *buf)
> RETURN_ON_ERROR|QUIET))
> return NULL;
>
> - if (!(page_flags & (1 << vt->PG_slab)))
> + if (!readmem(si->spec_addr + OFFSET(page_page_type), KVADDR,
> + &page_type, sizeof(ulong), "page.page_type",
> + RETURN_ON_ERROR|QUIET))
> + return NULL;
> +
> + /*
> + * PG_slab is migrated from the page flags to the lower 16 bit
> + * of the page type since linux commit 8db00ad564617.
> + */
> + if (!(page_flags & (1 << vt->PG_slab)) &&
> + !(page_type & (1 << vt->PG_slab)))
> return NULL;
>
> if (!readmem(si->spec_addr + OFFSET(page_slab), KVADDR,
> --
> 2.25.1
>
2 months, 3 weeks
[PATCH v2 0/4] arm64: Introduction of support 16K page with 4-level table
by Kuan-Ying Lee
1. Add support to 16K page size and 4-level page table with 48 VA bits.
2. Fix 64K page size with 52 VA bits issue.
Becuase we cannot use idmap_ptrs_per_pgd to know the size of
ptrs_per_pgd.
3. Refactor the translation of PTE to physical address and fix
indent issue.
V1->V2:
- Simplify the PTE_TO_PHYS macro. Thanks Lianbo.
Kuan-Ying Lee (4):
arm64: fix indent issue and refactor PTE_TO_PHYS
arm64: use the same expression to indicate ptrs_per_pgd
arm64: fix 64K page and 52-bits VA support
arm64: Support 16K page, 48 VA bits and 4 level page table
arm64.c | 274 +++++++++++++++++++++++++++++++++++++++++---------------
defs.h | 33 ++++++-
2 files changed, 231 insertions(+), 76 deletions(-)
--
2.43.0
2 months, 3 weeks