Dave,


Some detailed information about validate the results.


This implementation relies on the inode struct and address_space struct definitions.

The struct inode must have i_mapping member, this definition could be found 2.4.37.

The address_space must have page_tree member, this could be found 2.6.8.

 

I used code check the page_tree member to make sure we could detect the old kernel and exit with error.

 

We also can compare the output between files ¨Cm and tree command against page_tree defined in address_space.

If you found any issues, you could use this way to validate it is the bug introduced by this patch or the original radix tree

code issues.

 

Below are the commands, just FYI,

 

crash> files -M

PID: 9408   TASK: ffff88003ec33d00  CPU: 3   COMMAND: "crash"

ROOT: /    CWD: /ws/crash

 FD    ADDR-SPACE       PAGE-COUNT         INODE       TYPE PATH

  0 ffff8801031edbe8         0        ffff8801031edaa0 CHR  /dev/pts/2

  1 ffff8801031edbe8         0        ffff8801031edaa0 CHR  /dev/pts/2

  2 ffff8801031edbe8         0        ffff8801031edaa0 CHR  /dev/pts/2

  3 ffff880139bf8950         0        ffff880139bf8808 CHR  /dev/null

  4 ffff88012cb27770         0        ffff88012cb27628 CHR  /dev/crash

  5 ffff88012f8345f0       36127      ffff88012f8344a8 REG  /usr/lib/debug/lib/modules/3.11.10-301.fc20.x86_64/vmlinux

  6 ffff880103236290         0        ffff880103236148 FIFO

  7 ffff880103236290         0        ffff880103236148 FIFO

  8 ffff8801375cdb40         0        ffff8801375cd9f8 REG  /tmp/tmpfK2wC4I

 10 ffff8801032359b0         0        ffff880103235868 FIFO

crash>

crash> files -m ffff88012f8345f0 | head -n1

Address Space ffff88012f8345f0, page tree ffff88012f8345f8, 36127 pages

crash> files -m ffff88012f8345f0 | wc -l

36130

crash> files -m ffff88012f8345f0 | awk '{print $1}' > file_out.txt

crash> address_space.page_tree ffff88012f8345f0

  page_tree = {

    height = 3,

    gfp_mask = 32,

    rnode = 0xffff880112783309

  }

crash> tree -t radix -N 0xffff880112783309 | wc -l

36127

crash> tree -t radix -N 0xffff880112783309 > tree_out.txt

 

 

You can see I logged two outputs and compared them by diff. They are same.

 

$ diff -u tree_out.txt file_out.txt

--- tree_out.txt        2015-06-17 12:02:04.306812703 +0800

+++ file_out.txt        2015-06-17 12:01:19.036815634 +0800

@@ -1,3 +1,6 @@

+Address

+

+PAGE

 ffffea0001f5bc40

 ffffea0002ad3240

 ffffea0002ad3280


> From: yang_oliver@hotmail.com
> To: crash-utility@redhat.com
> Date: Wed, 17 Jun 2015 23:49:02 +0800
> Subject: [Crash-utility] [PATCH v2] files: support dump file pages from its address space
>
> Dave,
>
> Sorry, I can't reply the mail to orignial thread, as my vpn got
> blocked and I can't access my gmail account.
>
> This is the v2 version of files -M and -m support. Following issues got
> fixed in this version,
>
> 1. Patch is based on current git HEAD
>
> 2. Fixed warnings during make warn
>
> 3. files -M show full file path
>
> 4. Change PGCACHE-PGS to PAGE-COUNT
>
> Some background informtion: Each Linux fd points to a file struct.
> And each file struct or inode had a address space. If the file is
> a regular file, the address space defined a page tree, which are the
> page caches for this file. Some of file types don't use page cache.
> Page tree support was from 2.6 kernel, I couldn't access early Linux
> commit log. But at least 2.6.8 already had the definition.
>
> 5. Fixied page tree dump bugs, make sure page dump matched page number
>
> This is a bug in my radix tree dump code.
> I will share a method to valid page dumps later.
>
> 6. Use common dump_mem_map api instead of SPARSEMEM version
>
> Original dump_mem_map had a bug, I use SPARSEMEM version as workaround.
> But forgot switch it back.
>
> 7. Fixed the page tree dump bugs on 32bit kernel
>
> This is a bug in my radix tree dump code.
> I had fixed and verified it on Fedora 20 32bit kernel.
>
> 8. Check address space page tree number, will print error kernel is old
>
> I checked struct address_space.page_tree member.
>
> 9. Reused existing radix tree dump api, and extended it.
>
> RADIX_TREE_DUMP has no users in crash, this patch is first user.
> I add a call back to make it more flexible.
>
> 10. Page count is gotten from address_space.nrpages
>
> Original way(loop in radix tree) is not efficient.
>
> Let me know your comments, thanks.
>
>
> files: support dump file pages from its address space
>
> Added two options in files command,
>
> 1. -M option, which allows dump address space and page number for each files
> 2. -m option, which could dump each pages in given address mapping
>
> The foreach command also could work with -M, so that we can easily
> find which processes/files hold most page cache pages within the system.
>
> Signed-off-by: Yong Yang <yangoliver@gmail.com>
> ---
> defs.h | 11 ++++++-
> filesys.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++-----------
> kernel.c | 4 +--
> memory.c | 73 +++++++++++++++++++++++++++++++++++++++++
> task.c | 25 +++++++++++---
> 5 files changed, 197 insertions(+), 27 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index b25b505..608e09f 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -1111,6 +1111,7 @@ extern struct machdep_table *machdep;
> #define FOREACH_a_FLAG (0x4000000)
> #define FOREACH_G_FLAG (0x8000000)
> #define FOREACH_F_FLAG2 (0x10000000)
> +#define FOREACH_M_FLAG (0x20000000)
>
> #define FOREACH_PS_EXCLUSIVE \
> (FOREACH_g_FLAG|FOREACH_a_FLAG|FOREACH_t_FLAG|FOREACH_c_FLAG|FOREACH_p_FLAG|FOREACH_l_FLAG|FOREACH_r_FLAG|FOREACH_m_FLAG)
> @@ -1416,6 +1417,7 @@ struct offset_table { /* stash of commonly-used offsets */
> long inode_i_flock;
> long inode_i_fop;
> long inode_i_mapping;
> + long address_space_page_tree;
> long address_space_nrpages;
> long vfsmount_mnt_next;
> long vfsmount_mnt_devname;
> @@ -2286,11 +2288,13 @@ struct vm_table { /* kernel VM-related data */
> #define PAGEFLAGS (0x4000000)
> #define SLAB_OVERLOAD_PAGE (0x8000000)
> #define SLAB_CPU_CACHE (0x10000000)
> +#define AS_PAGE_TREE (0x20000000)
>
> #define IS_FLATMEM() (vt->flags & FLATMEM)
> #define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
> #define IS_SPARSEMEM() (vt->flags & SPARSEMEM)
> #define IS_SPARSEMEM_EX() (vt->flags & SPARSEMEM_EX)
> +#define IS_AS_PAGE_TREE() (vt->flags & AS_PAGE_TREE)
>
> #define COMMON_VADDR_SPACE() (vt->flags & COMMON_VADDR)
> #define PADDR_PRLEN (vt->paddr_prlen)
> @@ -2598,6 +2602,7 @@ struct load_module {
> #define PRINT_SINGLE_VMA (0x80)
> #define PRINT_RADIX_10 (0x100)
> #define PRINT_RADIX_16 (0x200)
> +#define PRINT_PAGES (0x400)
>
> #define MIN_PAGE_SIZE (4096)
>
> @@ -4707,6 +4712,9 @@ void alter_stackbuf(struct bt_info *);
> int vaddr_type(ulong, struct task_context *);
> char *format_stack_entry(struct bt_info *bt, char *, ulong, ulong);
> int in_user_stack(ulong, ulong);
> +void dump_file_address_mappings(ulong);
> +long get_page_tree_count(ulong i_mapping);
> +int is_as_page_tree_supported(void);
>
> /*
> * filesys.c
> @@ -4747,12 +4755,13 @@ struct radix_tree_pair {
> ulong index;
> void *value;
> };
> -ulong do_radix_tree(ulong, int, struct radix_tree_pair *);
> +ulong do_radix_tree(ulong, int, struct radix_tree_pair *, int (*)(ulong));
> int file_dump(ulong, ulong, ulong, int, int);
> #define DUMP_FULL_NAME 1
> #define DUMP_INODE_ONLY 2
> #define DUMP_DENTRY_ONLY 4
> #define DUMP_EMPTY_FILE 8
> +#define DUMP_FILE_PAGE 16
> #endif /* !GDB_COMMON */
> int same_file(char *, char *);
> #ifndef GDB_COMMON
> diff --git a/filesys.c b/filesys.c
> index 0573fe6..f0ec78b 100644
> --- a/filesys.c
> +++ b/filesys.c
> @@ -2187,11 +2187,12 @@ cmd_files(void)
> int subsequent;
> struct reference reference, *ref;
> char *refarg;
> + int open_flags = 0;
>
> ref = NULL;
> refarg = NULL;
>
> - while ((c = getopt(argcnt, args, "d:R:")) != EOF) {
> + while ((c = getopt(argcnt, args, "d:R:m:M")) != EOF) {
> switch(c)
> {
> case 'R':
> @@ -2209,7 +2210,20 @@ cmd_files(void)
> value = htol(optarg, FAULT_ON_ERROR, NULL);
> display_dentry_info(value);
> return;
> -
> + case 'm':
> + if (is_as_page_tree_supported()) {
> + value = htol(optarg, FAULT_ON_ERROR, NULL);
> + dump_file_address_mappings(value);
> + } else {
> + option_not_supported('m');
> + }
> + return;
> + case 'M':
> + if (is_as_page_tree_supported())
> + open_flags |= PRINT_PAGES;
> + else
> + option_not_supported('M');
> + break;
> default:
> argerrs++;
> break;
> @@ -2222,7 +2236,9 @@ cmd_files(void)
> if (!args[optind]) {
> if (!ref)
> print_task_header(fp, CURRENT_CONTEXT(), 0);
> - open_files_dump(CURRENT_TASK(), 0, ref);
> +
> + open_files_dump(CURRENT_TASK(), open_flags, ref);
> +
> return;
> }
>
> @@ -2241,7 +2257,7 @@ cmd_files(void)
> for (tc = pid_to_context(value); tc; tc = tc->tc_next) {
> if (!ref)
> print_task_header(fp, tc, subsequent);
> - open_files_dump(tc->task, 0, ref);
> + open_files_dump(tc->task, open_flags, ref);
> fprintf(fp, "\n");
> }
> break;
> @@ -2249,7 +2265,7 @@ cmd_files(void)
> case STR_TASK:
> if (!ref)
> print_task_header(fp, tc, subsequent);
> - open_files_dump(tc->task, 0, ref);
> + open_files_dump(tc->task, open_flags, ref);
> break;
>
> case STR_INVALID:
> @@ -2321,6 +2337,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
> char buf4[BUFSIZE];
> char root_pwd[BUFSIZE];
> int root_pwd_printed = 0;
> + int file_dump_flags = 0;
>
> BZERO(root_pathname, BUFSIZE);
> BZERO(pwd_pathname, BUFSIZE);
> @@ -2329,15 +2346,27 @@ open_files_dump(ulong task, int flags, struct reference *ref)
> fdtable_buf = GETBUF(SIZE(fdtable));
> fill_task_struct(task);
>
> - sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
> - space(MINSPACE),
> - mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "FILE"),
> - space(MINSPACE),
> - mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "DENTRY"),
> - space(MINSPACE),
> - mkstring(buf3, VADDR_PRLEN, CENTER|LJUST, "INODE"),
> - space(MINSPACE),
> - space(MINSPACE));
> + if (flags & PRINT_PAGES) {
> + sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
> + space(MINSPACE),
> + mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "ADDR-SPACE"),
> + space(MINSPACE),
> + mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "PAGE-COUNT"),
> + space(MINSPACE),
> + mkstring(buf3, VADDR_PRLEN, CENTER|LJUST, "INODE"),
> + space(MINSPACE),
> + space(MINSPACE));
> + } else {
> + sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n",
> + space(MINSPACE),
> + mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "FILE"),
> + space(MINSPACE),
> + mkstring(buf2, VADDR_PRLEN, CENTER|LJUST, "DENTRY"),
> + space(MINSPACE),
> + mkstring(buf3, VADDR_PRLEN, CENTER|LJUST, "INODE"),
> + space(MINSPACE),
> + space(MINSPACE));
> + }
>
> tc = task_to_context(task);
>
> @@ -2523,6 +2552,10 @@ open_files_dump(ulong task, int flags, struct reference *ref)
> return;
> }
>
> + file_dump_flags = DUMP_FULL_NAME | DUMP_EMPTY_FILE;
> + if (flags & PRINT_PAGES)
> + file_dump_flags |= DUMP_FILE_PAGE;
> +
> j = 0;
> for (;;) {
> unsigned long set;
> @@ -2539,8 +2572,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
>
> if (ref && file) {
> open_tmpfile();
> - if (file_dump(file, 0, 0, i,
> - DUMP_FULL_NAME|DUMP_EMPTY_FILE)) {
> + if (file_dump(file, 0, 0, i, file_dump_flags)) {
> BZERO(buf4, BUFSIZE);
> rewind(pc->tmpfile);
> ret = fgets(buf4, BUFSIZE,
> @@ -2558,8 +2590,7 @@ open_files_dump(ulong task, int flags, struct reference *ref)
> fprintf(fp, "%s", files_header);
> header_printed = 1;
> }
> - file_dump(file, 0, 0, i,
> - DUMP_FULL_NAME|DUMP_EMPTY_FILE);
> + file_dump(file, 0, 0, i, file_dump_flags);
> }
> }
> i++;
> @@ -2754,6 +2785,8 @@ file_dump(ulong file, ulong dentry, ulong inode, int fd, int flags)
> char buf1[BUFSIZE];
> char buf2[BUFSIZE];
> char buf3[BUFSIZE];
> + ulong i_mapping = 0;
> + ulong count = 0;
>
> file_buf = NULL;
>
> @@ -2863,6 +2896,28 @@ file_dump(ulong file, ulong dentry, ulong inode, int fd, int flags)
> type,
> space(MINSPACE),
> pathname+1);
> + } else if (flags & DUMP_FILE_PAGE) {
> + i_mapping = ULONG(inode_buf + OFFSET(inode_i_mapping));
> + count = get_page_tree_count(i_mapping);
> +
> + fprintf(fp, "%3d%s%s%s%s%s%s%s%s%s%s\n",
> + fd,
> + space(MINSPACE),
> + mkstring(buf1, VADDR_PRLEN,
> + CENTER|RJUST|LONG_HEX,
> + MKSTR(i_mapping)),
> + space(MINSPACE),
> + mkstring(buf2, VADDR_PRLEN,
> + CENTER|RJUST|LONG_DEC,
> + MKSTR(count)),
> + space(MINSPACE),
> + mkstring(buf3, VADDR_PRLEN,
> + CENTER|RJUST|LONG_HEX,
> + MKSTR(inode)),
> + space(MINSPACE),
> + type,
> + space(MINSPACE),
> + pathname);
> } else {
> fprintf(fp, "%3d%s%s%s%s%s%s%s%s%s%s\n",
> fd,
> @@ -3877,9 +3932,13 @@ ulong RADIX_TREE_MAP_MASK = UNINITIALIZED;
> * RADIX_TREE_GATHER; the dimension (max count) of the array may
> * be stored in the index field of the first structure to avoid
> * any chance of an overrun.
> + *
> + * entry_ops: The operation against each of returned entry value.
> + * Only used by RADIX_TREE_DUMP.
> */
> ulong
> -do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp)
> +do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp,
> + int (*entry_ops)(ulong))
> {
> int i, ilen, height;
> long nlen;
> @@ -3970,7 +4029,19 @@ do_radix_tree(ulong root, int flag, struct radix_tree_pair *rtp)
> for (index = count = 0; index <= maxindex; index++) {
> if ((ret =
> radix_tree_lookup(root_rnode, index, height))) {
> - fprintf(fp, "[%ld] %lx\n", index, (ulong)ret);
> + if (entry_ops == NULL) {
> + /* Default operation */
> + fprintf(fp, "[%ld] %lx\n",
> + index, (ulong)ret);
> + } else {
> + /* Caller defined operation */
> + if (entry_ops((ulong)ret) != 0) {
> + error(FATAL, "do_radix_tree: "
> + "dump operation failed, "
> + "count: %ld\n", count);
> + return -EIO;
> + }
> + }
> count++;
> }
> }
> diff --git a/kernel.c b/kernel.c
> index cb8084a..53d2e1d 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -5746,12 +5746,12 @@ get_irq_desc_addr(int irq)
> return addr;
>
> cnt = do_radix_tree(symbol_value("irq_desc_tree"),
> - RADIX_TREE_COUNT, NULL);
> + RADIX_TREE_COUNT, NULL, NULL);
> len = sizeof(struct radix_tree_pair) * (cnt+1);
> rtp = (struct radix_tree_pair *)GETBUF(len);
> rtp[0].index = cnt;
> cnt = do_radix_tree(symbol_value("irq_desc_tree"),
> - RADIX_TREE_GATHER, rtp);
> + RADIX_TREE_GATHER, rtp, NULL);
>
> if (kt->highest_irq == 0)
> kt->highest_irq = rtp[cnt-1].index;
> diff --git a/memory.c b/memory.c
> index 765732b..0102dbc 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -292,6 +292,7 @@ static void dump_per_cpu_offsets(void);
> static void dump_page_flags(ulonglong);
> static ulong kmem_cache_nodelists(ulong);
> static void dump_hstates(void);
> +static int dump_file_page(ulong);
>
> /*
> * Memory display modes specific to this file.
> @@ -476,6 +477,9 @@ vm_init(void)
> MEMBER_OFFSET_INIT(block_device_bd_list, "block_device", "bd_list");
> MEMBER_OFFSET_INIT(block_device_bd_disk, "block_device", "bd_disk");
> MEMBER_OFFSET_INIT(inode_i_mapping, "inode", "i_mapping");
> + MEMBER_OFFSET_INIT(address_space_page_tree, "address_space", "page_tree");
> + if (VALID_MEMBER(address_space_page_tree))
> + vt->flags |= AS_PAGE_TREE;
> MEMBER_OFFSET_INIT(address_space_nrpages, "address_space", "nrpages");
> if (INVALID_MEMBER(address_space_nrpages))
> MEMBER_OFFSET_INIT(address_space_nrpages, "address_space", "__nrpages");
> @@ -6465,6 +6469,75 @@ translate_page_flags(char *buffer, ulong flags)
> }
>
> /*
> + * Page tree dump ops.
> + */
> +static int
> +dump_file_page(ulong page)
> +{
> + struct meminfo meminfo;
> +
> + BZERO(&meminfo, sizeof(struct meminfo));
> + meminfo.spec_addr = page;
> + meminfo.memtype = KVADDR;
> + meminfo.flags = ADDRESS_SPECIFIED;
> + dump_mem_map(&meminfo);
> +
> + return 0;
> +}
> +
> +/*
> + * The address space file mapping radix tree walker.
> + */
> +void
> +dump_file_address_mappings(ulong i_mapping)
> +{
> + ulong root_rnode;
> + ulong count;
> +
> + root_rnode = i_mapping + OFFSET(address_space_page_tree);
> + count = get_page_tree_count(i_mapping);
> + fprintf(fp, "Address Space %lx, page tree %lx, %ld pages\n\n",
> + i_mapping, root_rnode, count);
> +
> + /* Dump each pages in radix tree */
> + (void) do_radix_tree(root_rnode, RADIX_TREE_DUMP,
> + NULL, &dump_file_page);
> +
> + return;
> +}
> +
> +/*
> + * Get the page count for the specific mapping
> + */
> +long
> +get_page_tree_count(ulong i_mapping)
> +{
> + ulong address_space = i_mapping;
> + char *address_space_buf;
> + ulong nrpages = 0;
> +
> + address_space_buf = GETBUF(SIZE(address_space));
> +
> + readmem(address_space, KVADDR, address_space_buf,
> + SIZE(address_space), "address_space buffer",
> + FAULT_ON_ERROR);
> + nrpages = ULONG(address_space_buf + OFFSET(address_space_nrpages));
> +
> + FREEBUF(address_space_buf);
> +
> + return nrpages;
> +}
> +
> +/*
> + * Check the availability of address space page tree
> + */
> +int
> +is_as_page_tree_supported(void)
> +{
> + return (IS_AS_PAGE_TREE() ? TRUE : FALSE);
> +}
> +
> +/*
> * dump_page_hash_table() displays the entries in each page_hash_table.
> */
>
> diff --git a/task.c b/task.c
> index 45be68c..4c95259 100644
> --- a/task.c
> +++ b/task.c
> @@ -5612,7 +5612,7 @@ cmd_foreach(void)
> BZERO(&foreach_data, sizeof(struct foreach_data));
> fd = &foreach_data;
>
> - while ((c = getopt(argcnt, args, "R:vomlgersStTpukcfFxhdaG")) != EOF) {
> + while ((c = getopt(argcnt, args, "R:vomMlgersStTpukcfFxhdaG")) != EOF) {
> switch(c)
> {
> case 'R':
> @@ -5636,6 +5636,10 @@ cmd_foreach(void)
> fd->flags |= FOREACH_m_FLAG;
> break;
>
> + case 'M':
> + fd->flags |= FOREACH_M_FLAG;
> + break;
> +
> case 'l':
> fd->flags |= FOREACH_l_FLAG;
> break;
> @@ -6140,6 +6144,13 @@ foreach(struct foreach_data *fd)
> print_header = FALSE;
> break;
>
> + case FOREACH_FILES:
> + if (fd->flags & FOREACH_m_FLAG)
> + error(FATAL,
> + "foreach files command does not "
> + "support -m option\n");
> + break;
> +
> case FOREACH_TEST:
> break;
> }
> @@ -6366,9 +6377,15 @@ foreach(struct foreach_data *fd)
>
> case FOREACH_FILES:
> pc->curcmd = "files";
> - open_files_dump(tc->task,
> - fd->flags & FOREACH_i_FLAG ?
> - PRINT_INODES : 0,
> + cmdflags = 0;
> +
> + if (fd->flags & FOREACH_i_FLAG)
> + cmdflags |= PRINT_INODES;
> + if (fd->flags & FOREACH_M_FLAG)
> + cmdflags |= PRINT_PAGES;
> +
> + open_files_dump(tc->task,
> + cmdflags,
> fd->reference ? ref : NULL);
> break;
>
> --
> 1.9.1
>
> --
> Crash-utility mailing list
> Crash-utility@redhat.com
> https://www.redhat.com/mailman/listinfo/crash-utility