[PATCH v2] files: support dump file pages from its address space
by yangoliver
Dave,
Sorry, I can't reply the mail to orignial thread, as my vpn got
blocked and I can't access my gmail account.
This is the v2 version of files -M and -m support. Following issues got
fixed in this version,
1. Patch is based on current git HEAD
2. Fixed warnings during make warn
3. files -M show full file path
4. Change PGCACHE-PGS to PAGE-COUNT
Some background informtion: Each Linux fd points to a file struct.
And each file struct or inode had a address space. If the file is
a regular file, the address space defined a page tree, which are the
page caches for this file. Some of file types don't use page cache.
Page tree support was from 2.6 kernel, I couldn't access early Linux
commit log. But at least 2.6.8 already had the definition.
5. Fixied page tree dump bugs, make sure page dump matched page number
This is a bug in my radix tree dump code.
I will share a method to valid page dumps later.
6. Use common dump_mem_map api instead of SPARSEMEM version
Original dump_mem_map had a bug, I use SPARSEMEM version as workaround.
But forgot switch it back.
7. Fixed the page tree dump bugs on 32bit kernel
This is a bug in my radix tree dump code.
I had fixed and verified it on Fedora 20 32bit kernel.
8. Check address space page tree number, will print error kernel is old
I checked struct address_space.page_tree member.
9. Reused existing radix tree dump api, and extended it.
RADIX_TREE_DUMP has no users in crash, this patch is first user.
I add a call back to make it more flexible.
10. Page count is gotten from address_space.nrpages
Original way(loop in radix tree) is not efficient.
Let me know your comments, thanks.
files: support dump file pages from its address space
Added two options in files command,
1. -M option, which allows dump address space and page number for each files
2. -m option, which could dump each pages in given address mapping
The foreach command also could work with -M, so that we can easily
find which processes/files hold most page cache pages within the system.
Signed-off-by: Yong Yang <yangoliver(a)gmail.com>
---
defs.h | 11 ++++++-
filesys.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++-----------
kernel.c | 4 +--
memory.c | 73 +++++++++++++++++++++++++++++++++++++++++
task.c | 25 +++++++++++---
5 files changed, 197 insertions(+), 27 deletions(-)
diff --git a/defs.h b/defs.h
index b25b505..608e09f 100644
--- a/defs.h
+++ b/defs.h
@@ -1111,6 +1111,7 @@ extern struct machdep_table *machdep;
#define FOREACH_a_FLAG (0x4000000)
#define FOREACH_G_FLAG (0x8000000)
#define FOREACH_F_FLAG2 (0x10000000)
+#define FOREACH_M_FLAG (0x20000000)
#define FOREACH_PS_EXCLUSIVE \
(FOREACH_g_FLAG|FOREACH_a_FLAG|FOREACH_t_FLAG|FOREACH_c_FLAG|FOREACH_p_FLAG|FOREACH_l_FLAG|FOREACH_r_FLAG|FOREACH_m_FLAG)
@@ -1416,6 +1417,7 @@ struct offset_table { /* stash of commonly-used offsets */
long inode_i_flock;
long inode_i_fop;
long inode_i_mapping;
+ long address_space_page_tree;
long address_space_nrpages;
long vfsmount_mnt_next;
long vfsmount_mnt_devname;
@@ -2286,11 +2288,13 @@ struct vm_table { /* kernel VM-related data */
#define PAGEFLAGS (0x4000000)
#define SLAB_OVERLOAD_PAGE (0x8000000)
#define SLAB_CPU_CACHE (0x10000000)
+#define AS_PAGE_TREE (0x20000000)
#define IS_FLATMEM() (vt->flags & FLATMEM)
#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
#define IS_SPARSEMEM() (vt->flags & SPARSEMEM)
#define IS_SPARSEMEM_EX() (vt->flags & SPARSEMEM_EX)
+#define IS_AS_PAGE_TREE() (vt->flags & AS_PAGE_TREE)
#define COMMON_VADDR_SPACE() (vt->flags & COMMON_VADDR)
#define PADDR_PRLEN (vt->paddr_prlen)
@@ -2598,6 +2602,7 @@ struct load_module {
#define PRINT_SINGLE_VMA (0x80)
#define PRINT_RADIX_10 (0x100)
#define PRINT_RADIX_16 (0x200)
+#define PRINT_PAGES (0x400)
#define MIN_PAGE_SIZE (4096)
@@ -4707,6 +4712,9 @@ void alter_stackbuf(struct bt_info *);
int vaddr_type(ulong, struct task_context *);
char *format_stack_entry(struct bt_info *bt, char *, ulong, ulong);
int in_user_stack(ulong, ulong);
+void dump_file_address_mappings(ulong);
+long get_page_tree_count(ulong i_mapping);
+int is_as_page_tree_supported(void);
/*
* filesys.c
@@ -4747,12 +4755,13 @@ struct radix_tree_pair {
ulong index;
void *value;
};
-ulong do_radix_tree(ulong, int, struct radix_tree_pair *);
+ulong do_radix_tree(ulong, int, struct radix_tree_pair *, int (*)(ulong));
int file_dump(ulong, ulong, ulong, int, int);
#define DUMP_FULL_NAME 1
#define DUMP_INODE_ONLY 2
#define DUMP_DENTRY_ONLY 4
#define DUMP_EMPTY_FILE 8
+#define DUMP_FILE_PAGE 16
#endif /* !GDB_COMMON */
int same_file(char *, char *);
#ifndef GDB_COMMON
diff --git a/filesys.c b/filesys.c
index 0573fe6..f0ec78b 100644
--- a/filesys.c
+++ b/filesys.c
@@ -2187,11 +2187,12 @@ cmd_files(void)
int subsequent;
struct reference reference, *ref;
char *refarg;
+ int open_flags = 0;
ref = NULL;
refarg = NULL;
- while ((c = getopt(argcnt, args, "d:R:")) != EOF) {
+ while ((c = getopt(argcnt, args, "d:R:m:M")) != EOF) {
switch(c)
{
case 'R':
@@ -2209,7 +2210,20 @@ cmd_files(void)
value = htol(optarg, FAULT_ON_ERROR, NULL);
display_dentry_info(value);
return;
-
+ case 'm':
+ if (is_as_page_tree_supported()) {
+ value = htol(optarg, FAULT_ON_ERROR, NULL);
+ dump_file_address_mappings(value);
+ } else {
+ option_not_supported('m');
+ }
+ return;
+ case 'M':
+ if (is_as_page_tree_supported())
+ open_flags |= PRINT_PAGES;
+ else
+ option_not_supported('M');
+ break;
default:
argerrs++;
break;
@@ -2222,7 +2236,9 @@ cmd_files(void)
if (!args[optind]) {
if (!ref)
print_task_header(fp, CURRENT_CONTEXT(), 0);
- open_files_dump(CURRENT_TASK(), 0, ref);
+
+ open_files_dump(CURRENT_TASK(), open_flags, ref);
+
return;
}
@@ -2241,7 +2257,7 @@ cmd_files(void)
for (tc = pid_to_context(value); tc; tc = tc->tc_next) {
if (!ref)
print_task_header(fp, tc, subsequent);
- open_files_dump(tc->task, 0, ref);
+ open_files_dump(tc->task, open_flags, ref);
fprintf(fp, "\n");
}
break;
@@ -2249,7 +2265,7 @@ cmd_files(void)
case STR_TASK:
if (!ref)
print_task_header(fp, tc, subsequent);
- open_files_dump(tc->task, 0, ref);
+ open_files_dump(tc->task, open_flags, ref);
break;
case STR_INVALID:
@@ -2321,6 +2337,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
char buf4[BUFSIZE];
char root_pwd[BUFSIZE];
int root_pwd_printed = 0;
+ int file_dump_flags = 0;
BZERO(root_pathname, BUFSIZE);
BZERO(pwd_pathname, BUFSIZE);
@@ -2329,15 +2346,27 @@ open_files_dump(ulong task, int flags, struct reference *ref)
fdtable_buf = GETBUF(SIZE(fdtable));
fill_task_struct(task);
- sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
- space(MINSPACE),
- mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "FILE"),
- space(MINSPACE),
- mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "DENTRY"),
- space(MINSPACE),
- mkstring(buf3, VADDR_PRLEN, CENTER|LJUST, "INODE"),
- space(MINSPACE),
- space(MINSPACE));
+ if (flags & PRINT_PAGES) {
+ sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
+ space(MINSPACE),
+ mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "ADDR-SPACE"),
+ space(MINSPACE),
+ mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "PAGE-COUNT"),
+ space(MINSPACE),
+ mkstring(buf3, VADDR_PRLEN, CENTER|LJUST, "INODE"),
+ space(MINSPACE),
+ space(MINSPACE));
+ } else {
+ sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
+ space(MINSPACE),
+ mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "FILE"),
+ space(MINSPACE),
+ mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "DENTRY"),
+ space(MINSPACE),
+ mkstring(buf3, VADDR_PRLEN, CENTER|LJUST, "INODE"),
+ space(MINSPACE),
+ space(MINSPACE));
+ }
tc = task_to_context(task);
@@ -2523,6 +2552,10 @@ open_files_dump(ulong task, int flags, struct reference *ref)
return;
}
+ file_dump_flags = DUMP_FULL_NAME | DUMP_EMPTY_FILE;
+ if (flags & PRINT_PAGES)
+ file_dump_flags |= DUMP_FILE_PAGE;
+
j = 0;
for (;;) {
unsigned long set;
@@ -2539,8 +2572,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
if (ref && file) {
open_tmpfile();
- if (file_dump(file, 0, 0, i,
- DUMP_FULL_NAME|DUMP_EMPTY_FILE)) {
+ if (file_dump(file, 0, 0, i, file_dump_flags)) {
BZERO(buf4, BUFSIZE);
rewind(pc->tmpfile);
ret = fgets(buf4, BUFSIZE,
@@ -2558,8 +2590,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
fprintf(fp, "%s", files_header);
header_printed = 1;
}
- file_dump(file, 0, 0, i,
- DUMP_FULL_NAME|DUMP_EMPTY_FILE);
+ file_dump(file, 0, 0, i, file_dump_flags);
}
}
i++;
@@ -2754,6 +2785,8 @@ file_dump(ulong file, ulong dentry, ulong inode, int fd, int flags)
char buf1[BUFSIZE];
char buf2[BUFSIZE];
char buf3[BUFSIZE];
+ ulong i_mapping = 0;
+ ulong count = 0;
file_buf = NULL;
@@ -2863,6 +2896,28 @@ file_dump(ulong file, ulong dentry, ulong inode, int fd, int flags)
type,
space(MINSPACE),
pathname+1);
+ } else if (flags & DUMP_FILE_PAGE) {
+ i_mapping = ULONG(inode_buf + OFFSET(inode_i_mapping));
+ count = get_page_tree_count(i_mapping);
+
+ fprintf(fp, "%3d%s%s%s%s%s%s%s%s%s%s\n",
+ fd,
+ space(MINSPACE),
+ mkstring(buf1, VADDR_PRLEN,
+ CENTER|RJUST|LONG_HEX,
+ MKSTR(i_mapping)),
+ space(MINSPACE),
+ mkstring(buf2, VADDR_PRLEN,
+ CENTER|RJUST|LONG_DEC,
+ MKSTR(count)),
+ space(MINSPACE),
+ mkstring(buf3, VADDR_PRLEN,
+ CENTER|RJUST|LONG_HEX,
+ MKSTR(inode)),
+ space(MINSPACE),
+ type,
+ space(MINSPACE),
+ pathname);
} else {
fprintf(fp, "%3d%s%s%s%s%s%s%s%s%s%s\n",
fd,
@@ -3877,9 +3932,13 @@ ulong RADIX_TREE_MAP_MASK = UNINITIALIZED;
* RADIX_TREE_GATHER; the dimension (max count) of the array may
* be stored in the index field of the first structure to avoid
* any chance of an overrun.
+ *
+ * entry_ops: The operation against each of returned entry value.
+ * Only used by RADIX_TREE_DUMP.
*/
ulong
-do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp)
+do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp,
+ int (*entry_ops)(ulong))
{
int i, ilen, height;
long nlen;
@@ -3970,7 +4029,19 @@ do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp)
for (index = count = 0; index <= maxindex; index++) {
if ((ret =
radix_tree_lookup(root_rnode, index, height))) {
- fprintf(fp, "[%ld] %lx\n", index, (ulong)ret);
+ if (entry_ops == NULL) {
+ /* Default operation */
+ fprintf(fp, "[%ld] %lx\n",
+ index, (ulong)ret);
+ } else {
+ /* Caller defined operation */
+ if (entry_ops((ulong)ret) != 0) {
+ error(FATAL, "do_radix_tree: "
+ "dump operation failed, "
+ "count: %ld\n", count);
+ return -EIO;
+ }
+ }
count++;
}
}
diff --git a/kernel.c b/kernel.c
index cb8084a..53d2e1d 100644
--- a/kernel.c
+++ b/kernel.c
@@ -5746,12 +5746,12 @@ get_irq_desc_addr(int irq)
return addr;
cnt = do_radix_tree(symbol_value("irq_desc_tree"),
- RADIX_TREE_COUNT, NULL);
+ RADIX_TREE_COUNT, NULL, NULL);
len = sizeof(struct radix_tree_pair) * (cnt+1);
rtp = (struct radix_tree_pair *)GETBUF(len);
rtp[0].index = cnt;
cnt = do_radix_tree(symbol_value("irq_desc_tree"),
- RADIX_TREE_GATHER, rtp);
+ RADIX_TREE_GATHER, rtp, NULL);
if (kt->highest_irq == 0)
kt->highest_irq = rtp[cnt-1].index;
diff --git a/memory.c b/memory.c
index 765732b..0102dbc 100644
--- a/memory.c
+++ b/memory.c
@@ -292,6 +292,7 @@ static void dump_per_cpu_offsets(void);
static void dump_page_flags(ulonglong);
static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
+static int dump_file_page(ulong);
/*
* Memory display modes specific to this file.
@@ -476,6 +477,9 @@ vm_init(void)
MEMBER_OFFSET_INIT(block_device_bd_list, "block_device", "bd_list");
MEMBER_OFFSET_INIT(block_device_bd_disk, "block_device", "bd_disk");
MEMBER_OFFSET_INIT(inode_i_mapping, "inode", "i_mapping");
+ MEMBER_OFFSET_INIT(address_space_page_tree, "address_space", "page_tree");
+ if (VALID_MEMBER(address_space_page_tree))
+ vt->flags |= AS_PAGE_TREE;
MEMBER_OFFSET_INIT(address_space_nrpages, "address_space", "nrpages");
if (INVALID_MEMBER(address_space_nrpages))
MEMBER_OFFSET_INIT(address_space_nrpages, "address_space", "__nrpages");
@@ -6465,6 +6469,75 @@ translate_page_flags(char *buffer, ulong flags)
}
/*
+ * Page tree dump ops.
+ */
+static int
+dump_file_page(ulong page)
+{
+ struct meminfo meminfo;
+
+ BZERO(&meminfo, sizeof(struct meminfo));
+ meminfo.spec_addr = page;
+ meminfo.memtype = KVADDR;
+ meminfo.flags = ADDRESS_SPECIFIED;
+ dump_mem_map(&meminfo);
+
+ return 0;
+}
+
+/*
+ * The address space file mapping radix tree walker.
+ */
+void
+dump_file_address_mappings(ulong i_mapping)
+{
+ ulong root_rnode;
+ ulong count;
+
+ root_rnode = i_mapping + OFFSET(address_space_page_tree);
+ count = get_page_tree_count(i_mapping);
+ fprintf(fp, "Address Space %lx, page tree %lx, %ld pages\n\n",
+ i_mapping, root_rnode, count);
+
+ /* Dump each pages in radix tree */
+ (void) do_radix_tree(root_rnode, RADIX_TREE_DUMP,
+ NULL, &dump_file_page);
+
+ return;
+}
+
+/*
+ * Get the page count for the specific mapping
+ */
+long
+get_page_tree_count(ulong i_mapping)
+{
+ ulong address_space = i_mapping;
+ char *address_space_buf;
+ ulong nrpages = 0;
+
+ address_space_buf = GETBUF(SIZE(address_space));
+
+ readmem(address_space, KVADDR, address_space_buf,
+ SIZE(address_space), "address_space buffer",
+ FAULT_ON_ERROR);
+ nrpages = ULONG(address_space_buf + OFFSET(address_space_nrpages));
+
+ FREEBUF(address_space_buf);
+
+ return nrpages;
+}
+
+/*
+ * Check the availability of address space page tree
+ */
+int
+is_as_page_tree_supported(void)
+{
+ return (IS_AS_PAGE_TREE() ? TRUE : FALSE);
+}
+
+/*
* dump_page_hash_table() displays the entries in each page_hash_table.
*/
diff --git a/task.c b/task.c
index 45be68c..4c95259 100644
--- a/task.c
+++ b/task.c
@@ -5612,7 +5612,7 @@ cmd_foreach(void)
BZERO(&foreach_data, sizeof(struct foreach_data));
fd = &foreach_data;
- while ((c = getopt(argcnt, args, "R:vomlgersStTpukcfFxhdaG")) != EOF) {
+ while ((c = getopt(argcnt, args, "R:vomMlgersStTpukcfFxhdaG")) != EOF) {
switch(c)
{
case 'R':
@@ -5636,6 +5636,10 @@ cmd_foreach(void)
fd->flags |= FOREACH_m_FLAG;
break;
+ case 'M':
+ fd->flags |= FOREACH_M_FLAG;
+ break;
+
case 'l':
fd->flags |= FOREACH_l_FLAG;
break;
@@ -6140,6 +6144,13 @@ foreach(struct foreach_data *fd)
print_header = FALSE;
break;
+ case FOREACH_FILES:
+ if (fd->flags & FOREACH_m_FLAG)
+ error(FATAL,
+ "foreach files command does not "
+ "support -m option\n");
+ break;
+
case FOREACH_TEST:
break;
}
@@ -6366,9 +6377,15 @@ foreach(struct foreach_data *fd)
case FOREACH_FILES:
pc->curcmd = "files";
- open_files_dump(tc->task,
- fd->flags & FOREACH_i_FLAG ?
- PRINT_INODES : 0,
+ cmdflags = 0;
+
+ if (fd->flags & FOREACH_i_FLAG)
+ cmdflags |= PRINT_INODES;
+ if (fd->flags & FOREACH_M_FLAG)
+ cmdflags |= PRINT_PAGES;
+
+ open_files_dump(tc->task,
+ cmdflags,
fd->reference ? ref : NULL);
break;
--
1.9.1
9 years, 5 months
[PATCH v3] files: support dump file memory mapping
by yangoliver
Dave,
This is the v3 of patch for memory mapping dump support in files cmd.
This version patch fixed following problems,
1. Rebased to latest crash upstream.
2. Cleanup unnecessary code in defs.h.
3. Add RADIX_TREE_DUMP_CB, make do_radix_tree API unchange for other flags.
4. Fixed line up problems on 32bit kernel
ADDR_SPACE got changed by MAPPING.
PAGE-COUNT got changed by PAGE-CNT.
5. New files CLI options
-m replaced -M, and -a replaced original -m
files -a use inode address instead of address space.
This change make files -a more separate with files -m.
I did see the debug scenario that use file -a separately.
6. Added basic helps information.
Will update exmaple info later, after review is done.
Below is the v3 patch, let me know your comments.
Added two options in files command,
1. -m option, which allows dump file mapping and
page count for each files
2. -a option, which could dump each pages within
the mapping for given inode address
The foreach command also could work with -m, so
that we can easily find which processes/files hold
biggest page cache within the system.
Signed-off-by: Yong Yang <yangoliver(a)gmail.com>
---
defs.h | 6 +++
filesys.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
help.c | 6 ++-
memory.c | 64 ++++++++++++++++++++++++++++
symbols.c | 2 +
task.c | 22 +++++++---
6 files changed, 218 insertions(+), 24 deletions(-)
diff --git a/defs.h b/defs.h
index b25b505..48dc59f 100644
--- a/defs.h
+++ b/defs.h
@@ -1940,6 +1940,7 @@ struct offset_table { /* stash of commonly-used offsets */
long task_struct_thread_reg31;
long pt_regs_regs;
long pt_regs_cp0_badvaddr;
+ long address_space_page_tree;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2598,6 +2599,7 @@ struct load_module {
#define PRINT_SINGLE_VMA (0x80)
#define PRINT_RADIX_10 (0x100)
#define PRINT_RADIX_16 (0x200)
+#define PRINT_PAGES (0x400)
#define MIN_PAGE_SIZE (4096)
@@ -4707,6 +4709,8 @@ void alter_stackbuf(struct bt_info *);
int vaddr_type(ulong, struct task_context *);
char *format_stack_entry(struct bt_info *bt, char *, ulong, ulong);
int in_user_stack(ulong, ulong);
+void dump_file_addr_mappings(ulong);
+long get_page_tree_count(ulong);
/*
* filesys.c
@@ -4743,6 +4747,7 @@ int is_readable(char *);
#define RADIX_TREE_SEARCH (2)
#define RADIX_TREE_DUMP (3)
#define RADIX_TREE_GATHER (4)
+#define RADIX_TREE_DUMP_CB (5)
struct radix_tree_pair {
ulong index;
void *value;
@@ -4753,6 +4758,7 @@ int file_dump(ulong, ulong, ulong, int, int);
#define DUMP_INODE_ONLY 2
#define DUMP_DENTRY_ONLY 4
#define DUMP_EMPTY_FILE 8
+#define DUMP_FILE_PAGE 16
#endif /* !GDB_COMMON */
int same_file(char *, char *);
#ifndef GDB_COMMON
diff --git a/filesys.c b/filesys.c
index 0573fe6..a83224f 100644
--- a/filesys.c
+++ b/filesys.c
@@ -49,6 +49,7 @@ static void *radix_tree_lookup(ulong, ulong, int);
static int match_file_string(char *, char *, char *);
static ulong get_root_vfsmount(char *);
static void check_live_arch_mismatch(void);
+static void dump_file_addr_space(ulong);
#define DENTRY_CACHE (20)
@@ -2167,6 +2168,23 @@ show_hit_rates:
}
}
+static void
+dump_file_addr_space(ulong inode)
+{
+ ulong address_space;
+ char *inode_buf;
+
+ inode_buf = GETBUF(SIZE(inode));
+ readmem(inode, KVADDR, inode_buf, SIZE(inode), "inode buffer",
+ FAULT_ON_ERROR);
+
+ address_space = ULONG(inode_buf + OFFSET(inode_i_mapping));
+
+ dump_file_addr_mappings(address_space);
+
+ FREEBUF(inode_buf);
+}
+
/*
* This command displays information about the open files of a context.
* For each open file descriptor the file descriptor number, a pointer
@@ -2187,11 +2205,12 @@ cmd_files(void)
int subsequent;
struct reference reference, *ref;
char *refarg;
+ int open_flags = 0;
ref = NULL;
refarg = NULL;
- while ((c = getopt(argcnt, args, "d:R:")) != EOF) {
+ while ((c = getopt(argcnt, args, "d:R:a:m")) != EOF) {
switch(c)
{
case 'R':
@@ -2210,6 +2229,24 @@ cmd_files(void)
display_dentry_info(value);
return;
+ case 'a':
+ if (VALID_MEMBER(address_space_page_tree) &&
+ VALID_MEMBER(inode_i_mapping)) {
+ value = htol(optarg, FAULT_ON_ERROR, NULL);
+ dump_file_addr_space(value);
+ } else {
+ option_not_supported('a');
+ }
+ return;
+
+ case 'm':
+ if (VALID_MEMBER(address_space_page_tree) &&
+ VALID_MEMBER(inode_i_mapping))
+ open_flags |= PRINT_PAGES;
+ else
+ option_not_supported('m');
+ break;
+
default:
argerrs++;
break;
@@ -2222,7 +2259,9 @@ cmd_files(void)
if (!args[optind]) {
if (!ref)
print_task_header(fp, CURRENT_CONTEXT(), 0);
- open_files_dump(CURRENT_TASK(), 0, ref);
+
+ open_files_dump(CURRENT_TASK(), open_flags, ref);
+
return;
}
@@ -2241,7 +2280,7 @@ cmd_files(void)
for (tc = pid_to_context(value); tc; tc = tc->tc_next) {
if (!ref)
print_task_header(fp, tc, subsequent);
- open_files_dump(tc->task, 0, ref);
+ open_files_dump(tc->task, open_flags, ref);
fprintf(fp, "\n");
}
break;
@@ -2249,7 +2288,7 @@ cmd_files(void)
case STR_TASK:
if (!ref)
print_task_header(fp, tc, subsequent);
- open_files_dump(tc->task, 0, ref);
+ open_files_dump(tc->task, open_flags, ref);
break;
case STR_INVALID:
@@ -2321,6 +2360,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
char buf4[BUFSIZE];
char root_pwd[BUFSIZE];
int root_pwd_printed = 0;
+ int file_dump_flags = 0;
BZERO(root_pathname, BUFSIZE);
BZERO(pwd_pathname, BUFSIZE);
@@ -2329,15 +2369,27 @@ open_files_dump(ulong task, int flags, struct reference *ref)
fdtable_buf = GETBUF(SIZE(fdtable));
fill_task_struct(task);
- sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
- space(MINSPACE),
- mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "FILE"),
- space(MINSPACE),
- mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "DENTRY"),
- space(MINSPACE),
- mkstring(buf3, VADDR_PRLEN, CENTER|LJUST, "INODE"),
- space(MINSPACE),
- space(MINSPACE));
+ if (flags & PRINT_PAGES) {
+ sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
+ space(MINSPACE),
+ mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "INODE"),
+ space(MINSPACE),
+ mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "MAPPING"),
+ space(MINSPACE),
+ mkstring(buf3, LONG_PRLEN, CENTER|LJUST, "PAGE-CNT"),
+ space(MINSPACE),
+ space(MINSPACE));
+ } else {
+ sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
+ space(MINSPACE),
+ mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "FILE"),
+ space(MINSPACE),
+ mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "DENTRY"),
+ space(MINSPACE),
+ mkstring(buf3, VADDR_PRLEN, CENTER|LJUST, "INODE"),
+ space(MINSPACE),
+ space(MINSPACE));
+ }
tc = task_to_context(task);
@@ -2420,9 +2472,10 @@ open_files_dump(ulong task, int flags, struct reference *ref)
root_pwd_printed = TRUE;
ref->cmdflags |= FILES_REF_FOUND;
}
- } else
+ } else {
fprintf(fp, "ROOT: %s CWD: %s\n",
root_pathname, pwd_pathname);
+ }
FREEBUF(fs_struct_buf);
}
@@ -2523,6 +2576,10 @@ open_files_dump(ulong task, int flags, struct reference *ref)
return;
}
+ file_dump_flags = DUMP_FULL_NAME | DUMP_EMPTY_FILE;
+ if (flags & PRINT_PAGES)
+ file_dump_flags |= DUMP_FILE_PAGE;
+
j = 0;
for (;;) {
unsigned long set;
@@ -2539,8 +2596,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
if (ref && file) {
open_tmpfile();
- if (file_dump(file, 0, 0, i,
- DUMP_FULL_NAME|DUMP_EMPTY_FILE)) {
+ if (file_dump(file, 0, 0, i, file_dump_flags)) {
BZERO(buf4, BUFSIZE);
rewind(pc->tmpfile);
ret = fgets(buf4, BUFSIZE,
@@ -2558,8 +2614,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
fprintf(fp, "%s", files_header);
header_printed = 1;
}
- file_dump(file, 0, 0, i,
- DUMP_FULL_NAME|DUMP_EMPTY_FILE);
+ file_dump(file, 0, 0, i, file_dump_flags);
}
}
i++;
@@ -2754,6 +2809,8 @@ file_dump(ulong file, ulong dentry, ulong inode, int fd, int flags)
char buf1[BUFSIZE];
char buf2[BUFSIZE];
char buf3[BUFSIZE];
+ ulong i_mapping = 0;
+ ulong count = 0;
file_buf = NULL;
@@ -2863,6 +2920,28 @@ file_dump(ulong file, ulong dentry, ulong inode, int fd, int flags)
type,
space(MINSPACE),
pathname+1);
+ } else if (flags & DUMP_FILE_PAGE) {
+ i_mapping = ULONG(inode_buf + OFFSET(inode_i_mapping));
+ count = get_page_tree_count(i_mapping);
+
+ fprintf(fp, "%3d%s%s%s%s%s%s%s%s%s%s\n",
+ fd,
+ space(MINSPACE),
+ mkstring(buf1, VADDR_PRLEN,
+ CENTER|RJUST|LONG_HEX,
+ MKSTR(inode)),
+ space(MINSPACE),
+ mkstring(buf2, VADDR_PRLEN,
+ CENTER|RJUST|LONG_HEX,
+ MKSTR(i_mapping)),
+ space(MINSPACE),
+ mkstring(buf3, LONG_PRLEN,
+ CENTER|RJUST|LONG_DEC,
+ MKSTR(count)),
+ space(MINSPACE),
+ type,
+ space(MINSPACE),
+ pathname);
} else {
fprintf(fp, "%3d%s%s%s%s%s%s%s%s%s%s\n",
fd,
@@ -3870,6 +3949,9 @@ ulong RADIX_TREE_MAP_MASK = UNINITIALIZED;
* limit the number of returned entries by putting the array size
* (max count) in the rtp->index field of the first structure
* in the passed-in array.
+ * RADIX_TREE_DUMP_CB - Similar with RADIX_TREE_DUMP, but for each
+ * radix tree entry, a user defined callback at rtp->value will
+ * be invoked.
*
* rtp: Unused by RADIX_TREE_COUNT and RADIX_TREE_DUMP.
* A pointer to a radix_tree_pair structure for RADIX_TREE_SEARCH.
@@ -3877,6 +3959,8 @@ ulong RADIX_TREE_MAP_MASK = UNINITIALIZED;
* RADIX_TREE_GATHER; the dimension (max count) of the array may
* be stored in the index field of the first structure to avoid
* any chance of an overrun.
+ * For RADIX_TREE_DUMP_CB, the rtp->value need to be initialized as
+ * callback function. The callback prototype must be int (*)(ulong);
*/
ulong
do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp)
@@ -3889,6 +3973,7 @@ do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp)
struct radix_tree_pair *r;
ulong root_rnode;
void *ret;
+ int (*cb)(ulong) = NULL;
count = 0;
@@ -3993,6 +4078,27 @@ do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp)
}
break;
+ case RADIX_TREE_DUMP_CB:
+ if (rtp->value == NULL) {
+ error(FATAL, "do_radix_tree: need set callback function");
+ return -EINVAL;
+ }
+ cb = (int (*)(ulong))rtp->value;
+ for (index = count = 0; index <= maxindex; index++) {
+ if ((ret =
+ radix_tree_lookup(root_rnode, index, height))) {
+ /* Caller defined operation */
+ if (cb((ulong)ret) != 0) {
+ error(FATAL, "do_radix_tree: dump "
+ "operation failed, count: %ld\n",
+ count);
+ return -EIO;
+ }
+ count++;
+ }
+ }
+ break;
+
default:
error(FATAL, "do_radix_tree: invalid flag: %lx\n", flag);
}
diff --git a/help.c b/help.c
index f36316f..1f7035d 100644
--- a/help.c
+++ b/help.c
@@ -6488,7 +6488,7 @@ NULL
char *help_files[] = {
"files",
"open files",
-"[-d dentry] | [-R reference] [pid | taskp] ... ",
+"[-d dentry] | [-a inode] | [-m] [-R reference] [pid | taskp] ... ",
" This command displays information about open files of a context.",
" It prints the context's current root directory and current working",
" directory, and then for each open file descriptor it prints a pointer",
@@ -6501,6 +6501,10 @@ char *help_files[] = {
" specific, and only shows the data requested.\n",
" -d dentry given a hexadecimal dentry address, display its inode,",
" super block, file type, and full pathname.",
+" -a inode given a hexadecimal inode address, dump all memory pages in",
+" its address space.",
+" -m show inode memory mapping information, including mapping",
+" address, page counts within the mapping.",
" -R reference search for references to this file descriptor number,",
" filename, or dentry, inode, or file structure address.",
" pid a process PID.",
diff --git a/memory.c b/memory.c
index 765732b..f3ebdcb 100644
--- a/memory.c
+++ b/memory.c
@@ -292,6 +292,7 @@ static void dump_per_cpu_offsets(void);
static void dump_page_flags(ulonglong);
static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
+static int dump_file_page(ulong);
/*
* Memory display modes specific to this file.
@@ -476,6 +477,7 @@ vm_init(void)
MEMBER_OFFSET_INIT(block_device_bd_list, "block_device", "bd_list");
MEMBER_OFFSET_INIT(block_device_bd_disk, "block_device", "bd_disk");
MEMBER_OFFSET_INIT(inode_i_mapping, "inode", "i_mapping");
+ MEMBER_OFFSET_INIT(address_space_page_tree, "address_space", "page_tree");
MEMBER_OFFSET_INIT(address_space_nrpages, "address_space", "nrpages");
if (INVALID_MEMBER(address_space_nrpages))
MEMBER_OFFSET_INIT(address_space_nrpages, "address_space", "__nrpages");
@@ -6465,6 +6467,68 @@ translate_page_flags(char *buffer, ulong flags)
}
/*
+ * Page tree dump ops.
+ */
+static int
+dump_file_page(ulong page)
+{
+ struct meminfo meminfo;
+
+ BZERO(&meminfo, sizeof(struct meminfo));
+ meminfo.spec_addr = page;
+ meminfo.memtype = KVADDR;
+ meminfo.flags = ADDRESS_SPECIFIED;
+ dump_mem_map(&meminfo);
+
+ return 0;
+}
+
+/*
+ * The address space file mapping radix tree walker.
+ */
+void
+dump_file_addr_mappings(ulong i_mapping)
+{
+ ulong root_rnode;
+ ulong count;
+ struct radix_tree_pair rtp;
+
+ root_rnode = i_mapping + OFFSET(address_space_page_tree);
+ count = get_page_tree_count(i_mapping);
+ fprintf(fp, "Address space %lx, %ld pages\n\n", i_mapping, count);
+
+ rtp.index = 0;
+ rtp.value = (void *)&dump_file_page;
+
+ /* Dump each pages in radix tree */
+ (void) do_radix_tree(root_rnode, RADIX_TREE_DUMP_CB, &rtp);
+
+ return;
+}
+
+/*
+ * Get the page count for the specific mapping
+ */
+long
+get_page_tree_count(ulong i_mapping)
+{
+ ulong address_space = i_mapping;
+ char *address_space_buf;
+ ulong nrpages = 0;
+
+ address_space_buf = GETBUF(SIZE(address_space));
+
+ readmem(address_space, KVADDR, address_space_buf,
+ SIZE(address_space), "address_space buffer",
+ FAULT_ON_ERROR);
+ nrpages = ULONG(address_space_buf + OFFSET(address_space_nrpages));
+
+ FREEBUF(address_space_buf);
+
+ return nrpages;
+}
+
+/*
* dump_page_hash_table() displays the entries in each page_hash_table.
*/
diff --git a/symbols.c b/symbols.c
index 6acfcae..984cb55 100644
--- a/symbols.c
+++ b/symbols.c
@@ -8634,6 +8634,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(block_device_bd_disk));
fprintf(fp, " address_space_nrpages: %ld\n",
OFFSET(address_space_nrpages));
+ fprintf(fp, " address_space_page_tree: %ld\n",
+ OFFSET(address_space_page_tree));
fprintf(fp, " gendisk_major: %ld\n",
OFFSET(gendisk_major));
fprintf(fp, " gendisk_fops: %ld\n",
diff --git a/task.c b/task.c
index 3a88d68..11679c8 100644
--- a/task.c
+++ b/task.c
@@ -5685,7 +5685,7 @@ cmd_foreach(void)
BZERO(&foreach_data, sizeof(struct foreach_data));
fd = &foreach_data;
- while ((c = getopt(argcnt, args, "R:vomlgersStTpukcfFxhdaG")) != EOF) {
+ while ((c = getopt(argcnt, args, "R:vomMlgersStTpukcfFxhdaG")) != EOF) {
switch(c)
{
case 'R':
@@ -5708,7 +5708,6 @@ cmd_foreach(void)
case 'm':
fd->flags |= FOREACH_m_FLAG;
break;
-
case 'l':
fd->flags |= FOREACH_l_FLAG;
break;
@@ -6234,6 +6233,13 @@ foreach(struct foreach_data *fd)
print_header = FALSE;
break;
+ case FOREACH_FILES:
+ if (fd->flags & FOREACH_a_FLAG)
+ error(FATAL,
+ "foreach files command does not "
+ "support -a option\n");
+ break;
+
case FOREACH_TEST:
break;
}
@@ -6460,9 +6466,15 @@ foreach(struct foreach_data *fd)
case FOREACH_FILES:
pc->curcmd = "files";
- open_files_dump(tc->task,
- fd->flags & FOREACH_i_FLAG ?
- PRINT_INODES : 0,
+ cmdflags = 0;
+
+ if (fd->flags & FOREACH_i_FLAG)
+ cmdflags |= PRINT_INODES;
+ if (fd->flags & FOREACH_m_FLAG)
+ cmdflags |= PRINT_PAGES;
+
+ open_files_dump(tc->task,
+ cmdflags,
fd->reference ? ref : NULL);
break;
--
1.9.1
9 years, 5 months
ps: issue with "waking" and "parked" task states?
by Michael Holzheu
Hi Dave,
I recently looked into a linux 4.0 dump where the "ps" command prints "??"
for the state field of several tasks:
crash> ps
741 660 1 7b408000 ?? 0.0 2152 2592 chcpu
748 2 24 20c284f00 IN 0.0 0 0 [migration/24]
752 2 22 20b1ccf00 ?? 0.0 0 0 [migration/25]
Looking at the "task_struct->state" I get the following:
crash> task_struct 7b408000 | grep state
state = 0x100,
crash> task_struct 20b1ccf00 | grep state
state = 0x200
Looking into include/linux/sched.h states are defined as follows:
#define TASK_RUNNING 0
...
#define TASK_WAKING 256
#define TASK_PARKED 512
When I issue "help -t" I get the following:
RUNNING: 0 (0x0)
INTERRUPTIBLE: 1 (0x1)
UNINTERRUPTIBLE: 2 (0x2)
STOPPED: 4 (0x4)
TRACING_STOPPED: 8 (0x8)
ZOMBIE: 32 (0x20)
DEAD: 16 and 32 (0x10 and 0x20)
WAKEKILL: 64 (0x40)
WAKING: 128 (0x80) <- Should be 256?
I have not digged deeper, but at least wanted to report this issue.
Perhaps you see the problem at once.
Michael
9 years, 5 months
[PATCH] Fix memory leaks in dump_mem_map when SPARSEMEM is enabled
by yangoliver
Signed-off-by: Yong Yang <yangoliver(a)gmail.com>
---
memory.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/memory.c b/memory.c
index 32427ea..765732b 100644
--- a/memory.c
+++ b/memory.c
@@ -5734,10 +5734,6 @@ dump_mem_map(struct meminfo *mi)
long buffersize;
char *outputbuffer;
int bufferindex;
-
- buffersize = 1024 * 1024;
- outputbuffer = GETBUF(buffersize + 512);
-
char style1[100];
char style2[100];
char style3[100];
@@ -5748,6 +5744,9 @@ dump_mem_map(struct meminfo *mi)
return;
}
+ buffersize = 1024 * 1024;
+ outputbuffer = GETBUF(buffersize + 512);
+
sprintf((char *)&style1, "%%lx%s%%%dllx%s%%%dlx%s%%8lx %%2d%s",
space(MINSPACE),
(int)MAX(PADDR_PRLEN, strlen("PHYSICAL")),
--
1.9.1
9 years, 5 months
[PATCH] files: support dump file page cache
by oliver yang
Dave,
This patch add -M and -m option for file commands, which allow to dump
page cache for a file.
Please review and let me know your comments. Thanks!
Here is the usage,
1. Dump a process page cache number, default is crash, also work with given pid,
crash> files -M
PID: 22710 TASK: ffff8801077153e0 CPU: 1 COMMAND: "crash"
ROOT: / CWD: /auto/home2/yango/workspace/crash
FD ADDR-SPACE PGCACHE-PGS INODE TYPE PATH
0 ffff8801031edbe8 0 ffff8801031edaa0 CHR /2
1 ffff8801031edbe8 0 ffff8801031edaa0 CHR /2
2 ffff8801031edbe8 0 ffff8801031edaa0 CHR /2
3 ffff880139bf8950 0 ffff880139bf8808 CHR /null
4 ffff88011e561390 0 ffff88011e561248 CHR /crash
5 ffff88012f8345f0 37910 ffff88012f8344a8 REG
/usr/lib/debug/lib/modules/3.11.10-301.fc20.x86_64/vmlinux
[snipped..........................]
2. Dump pages in a given addr-space, this exmaple is ffff88012f8345f0
from above output.
page flags could indicates the dirty pages for fsync stress debugging,
crash> files -m ffff88012f8345f0
Address Space ffff88012f8345f0 : 37910 pages in page cache
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea0001f5bc40 7d6f1000 ffff88012f8345f0 0 2 3ff0000000086c
referenced,uptodate,lru,active,private
ffffea0001f5bc80 7d6f2000 ffff88012f8345f0 1 2 3ff0000000082c
referenced,uptodate,lru,private
..............................[snipped...].........................................................................
ffffea00016226c0 5889b000 ffff88012f8345f0 9414 2 3ff0000000086c
referenced,uptodate,lru,active,private
ffffea000224f480 893d2000 ffff88012f8345f0 9415 2 3ff0000000086c
referenced,uptodate,lru,active,private
3. For each files doesn't work with -m but it work with -M
crash> foreach files -m
foreach: foreach files command does not support -m option
So we can use foreach to find which process or files have most page
cache number,
crash> foreach files -M | grep REG | sort -k3 -n | tail -10
20 ffff880137a70be0 2 ffff880137a70a98 REG /ffinLFoAy
4 ffff880037630de0 131 ffff880037630c98 REG
/var/log/audit/audit.log
4 ffff880037630de0 131 ffff880037630c98 REG
/var/log/audit/audit.log
36 ffff8801352e91d8 574 ffff8801352e9090 REG
/var/log/journal/2d6f0d3073ff4a60b1e52a8e38e48feb/user-530.journal
34 ffff8801352e81f8 590 ffff8801352e80b0 REG
/var/log/journal/2d6f0d3073ff4a60b1e52a8e38e48feb/user-42.journal
5 ffff8800a90219c8 9816 ffff8800a9021880 REG
/usr/lib/debug/lib/modules/3.11.10-301.fc20.x86_64/vmlinux
13 ffff880135267198 14051 ffff880135267050 REG
/var/log/journal/2d6f0d3073ff4a60b1e52a8e38e48feb/system.journal
5 ffff88012f8345f0 37910 ffff88012f8344a8 REG
/usr/lib/debug/lib/modules/3.11.10-301.fc20.x86_64/vmlinux
1 ffff8800704f3d80 59468 ffff8800704f3c38 REG
/ws/irqstat/nohup.out
2 ffff8800704f3d80 59468 ffff8800704f3c38 REG
/ws/irqstat/nohup.out
With these commands, we can easily to debug some page cache flush
stress issue, and find out which process or files had the problem.
--
------------------
Oliver Yang
9 years, 5 months
[PATCH] s390x: fix interrupt stack address calculation
by Michael Holzheu
The kernel commit 2f859d0dad8 ("s390/smp: reduce size of struct pcpu")
for linux-4.0 removed the "async_stack" and "panic_stack" members from
"struct pcpu".
So now the only option to find out the stack address is using "struct
lowcore" (again). Unfortunately we don't get the exact address because
since commit dc7ee00d477 ("s390: lowcore stack pointer offsets") the stack
frame overhead is already subtracted from the addresses. Therefore we
have to roundup the stack address to PAGE_SIZE.
Signed-off-by: Michael Holzheu <holzheu(a)linux.vnet.ibm.com>
---
s390x.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
--- a/s390x.c
+++ b/s390x.c
@@ -1027,8 +1027,6 @@ static unsigned long get_int_stack_pcpu(
{
unsigned long addr;
- if (!MEMBER_EXISTS("pcpu", stack_name))
- return 0;
addr = symbol_value("pcpu_devices") +
cpu * STRUCT_SIZE("pcpu") + MEMBER_OFFSET("pcpu", stack_name);
return readmem_ul(addr) + INT_STACK_SIZE;
@@ -1041,7 +1039,8 @@ static unsigned long get_int_stack_lc(ch
{
if (!MEMBER_EXISTS(lc_struct, stack_name))
return 0;
- return ULONG(lc + MEMBER_OFFSET(lc_struct, stack_name));
+ return roundup(ULONG(lc + MEMBER_OFFSET(lc_struct, stack_name)),
+ PAGESIZE());
}
/*
@@ -1057,7 +1056,7 @@ static void get_int_stack(char *stack_na
stack_addr = symbol_value("restart_stack");
stack_addr = readmem_ul(stack_addr);
} else {
- if (symbol_exists("pcpu_devices"))
+ if (symbol_exists("pcpu_devices") && MEMBER_EXISTS("pcpu", stack_name))
stack_addr = get_int_stack_pcpu(stack_name, cpu);
else
stack_addr = get_int_stack_lc(stack_name, lc);
9 years, 5 months