On Thu, Nov 2, 2023 at 9:35 AM Huang Shijie <shijie@os.amperecomputing.com> wrote:
In the NUMA machine, it is useful to know the memory distribution of
an inode page cache:
        How many pages in the node 0?
        How many pages in the node 1?

Add "files -n" command to get the memory distribution information:
        1.) Add new argument for dump_inode_page_cache_info()
        2.) make page_to_nid() a global function.
        3.) Add summary_inode_page() to check each page's node
            information.
        4.) Use print_inode_summary_info() to print the
            memory distribution information of an inode.

Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
---
v2 --> v3:
        1.) Always return 1 for summary_inode_page().
        2.) Add more comment for help_files.


Thank you for the update, Shijie.
 
---
 defs.h    |  1 +
 filesys.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 help.c    | 14 +++++++++++++-
 memory.c  |  3 +--
 4 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/defs.h b/defs.h
index 788f63a..1fe2d0b 100644
--- a/defs.h
+++ b/defs.h
@@ -5750,6 +5750,7 @@ int dump_inode_page(ulong);
 ulong valid_section_nr(ulong);
 void display_memory_from_file_offset(ulonglong, long, void *);
 void swap_info_init(void);
+int page_to_nid(ulong);

 /*
  *  filesys.c
diff --git a/filesys.c b/filesys.c
index 1d0ee7f..2c7cc74 100644
--- a/filesys.c
+++ b/filesys.c
@@ -49,7 +49,7 @@ static int match_file_string(char *, char *, char *);
 static ulong get_root_vfsmount(char *);
 static void check_live_arch_mismatch(void);
 static long get_inode_nrpages(ulong);
-static void dump_inode_page_cache_info(ulong);
+static void dump_inode_page_cache_info(ulong, void *callback);

 #define DENTRY_CACHE (20)
 #define INODE_CACHE  (20)
@@ -2192,8 +2192,31 @@ get_inode_nrpages(ulong i_mapping)
        return nrpages;
 }

+/* Used to collect the numa information for an inode */
+static ulong *numa_node;
+
+static void
+print_inode_summary_info(void)
+{
+       int i;
+
+       fprintf(fp, "     NODE           PAGES\n");
+       for (i = 0; i < vt->numnodes; i++)
+               fprintf(fp, "     %2d          %8ld\n", i, numa_node[i]);
+}
+
+static int
+summary_inode_page(ulong page)
+{
+       int node = page_to_nid(page);
+
+       if (0 <= node && node < vt->numnodes)
+               numa_node[node]++;
+       return 1;
+}

A clear error message would be nice when the "files -n" command fails. What's your opinion on the following changes?

+static int
+summary_inode_page(ulong page)
+{
+       int node;
+
+       if (!is_page_ptr(page, NULL))
+               error(FATAL, "Invalid inode page(0x%lx)\n", page);
+
+       node = page_to_nid(page);
+       if (node < 0 || node >= vt->numnodes)
+               error(FATAL, "Invalid node(%d) for page(0x%lx)\n", node, page);
+
+       numa_node[node]++;
+
+       return 1;
+}
 

Without the above changes, it will print a lot of failures of "invalid page" once the corresponding inode page is invalid, unless that is expected behavior.

crash> files -n ffff8ea84c130938
     INODE        NRPAGES
ffff8ea84c130938    62527

files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 0
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 10
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 20
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 30
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 40
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 50
files: page_to_nid: invalid page: 60
files: page_to_nid: invalid page: 60
...


And also says that in help.c: 
+"     -n inode   given a hexadecimal inode address, check all the pages",
+"                in the page cache, and display a NUMA node distribution",
+"                if the inode page is valid, otherwise it will fail.",

Just my suggestions, any thoughts?

Thanks.
Lianbo

+
 static void
-dump_inode_page_cache_info(ulong inode)
+dump_inode_page_cache_info(ulong inode, void *callback)
 {
        char *inode_buf;
        ulong i_mapping, nrpages, root_rnode, xarray, count;
@@ -2236,7 +2259,7 @@ dump_inode_page_cache_info(ulong inode)
                root_rnode = i_mapping + OFFSET(address_space_page_tree);

        lp.index = 0;
-       lp.value = (void *)&dump_inode_page;
+       lp.value = callback;

        if (root_rnode)
                count = do_radix_tree(root_rnode, RADIX_TREE_DUMP_CB, &lp);
@@ -2276,7 +2299,7 @@ cmd_files(void)
         ref = NULL;
         refarg = NULL;

-        while ((c = getopt(argcnt, args, "d:R:p:c")) != EOF) {
+        while ((c = getopt(argcnt, args, "d:n:R:p:c")) != EOF) {
                 switch(c)
                {
                case 'R':
@@ -2295,11 +2318,31 @@ cmd_files(void)
                        display_dentry_info(value);
                        return;

+               case 'n':
+                       if (VALID_MEMBER(address_space_page_tree) &&
+                           VALID_MEMBER(inode_i_mapping)) {
+                               value = htol(optarg, FAULT_ON_ERROR, NULL);
+
+                               /* Allocate the array for this inode */
+                               numa_node = malloc(sizeof(ulong) * vt->numnodes);
+                               BZERO(numa_node, sizeof(ulong) * vt->numnodes);
+
+                               dump_inode_page_cache_info(value, (void *)&summary_inode_page);
+
+                               /* Print out the NUMA node information for this inode */
+                               print_inode_summary_info();
+
+                               free(numa_node);
+                               numa_node = NULL;
+                       } else
+                               option_not_supported('n');
+                       return;
+
                case 'p':
                        if (VALID_MEMBER(address_space_page_tree) &&
                            VALID_MEMBER(inode_i_mapping)) {
                                value = htol(optarg, FAULT_ON_ERROR, NULL);
-                               dump_inode_page_cache_info(value);
+                               dump_inode_page_cache_info(value, (void *)&dump_inode_page);
                        } else
                                option_not_supported('p');
                        return;
diff --git a/help.c b/help.c
index cc7ab20..e9e28b7 100644
--- a/help.c
+++ b/help.c
@@ -7850,7 +7850,7 @@ NULL
 char *help_files[] = {
 "files",
 "open files",
-"[-d dentry] | [-p inode] | [-c] [-R reference] [pid | taskp] ... ",
+"[-d dentry] | [-p inode] | [-n inode] | [-c] [-R reference] [pid | taskp] ... ",
 "  This command displays information about open files of a context.",
 "  It prints the context's current root directory and current working",
 "  directory, and then for each open file descriptor it prints a pointer",
@@ -7863,6 +7863,8 @@ char *help_files[] = {
 "  specific, and only shows the data requested.\n",
 "     -d dentry  given a hexadecimal dentry address, display its inode,",
 "                super block, file type, and full pathname.",
+"     -n inode   given a hexadecimal inode address, check all the pages",
+"                in the page cache, and display a NUMA node distribution.",
 "     -p inode   given a hexadecimal inode address, dump all of its pages",
 "                that are in the page cache.",
 "     -c         for each open file descriptor, prints a pointer to its",
@@ -7974,6 +7976,16 @@ char *help_files[] = {
 "    ca1ddde0  2eeef000  f59b91ac        3  2 82c referenced,uptodate,lru,private",
 "    ca36b300  3b598000  f59b91ac        4  2 82c referenced,uptodate,lru,private",
 "    ca202680  30134000  f59b91ac        5  2 82c referenced,uptodate,lru,private",
+"    ",
+"  For the inode at address ffff07ff8c6f97f8, display the NUMA node",
+"  distribution of its pages that are in the page cache:",
+"    %s> files -n ffff07ff8c6f97f8",
+"      INODE        NRPAGES",
+" ffff07ff8c6f97f8    25240",
+"    ",
+"      NODE           PAGES",
+"       0             25240",
+"       1                 0",
 " ",
 NULL               
 };
diff --git a/memory.c b/memory.c
index 86ccec5..ed1a4fb 100644
--- a/memory.c
+++ b/memory.c
@@ -300,7 +300,6 @@ static int dump_vm_event_state(void);
 static int dump_page_states(void);
 static int generic_read_dumpfile(ulonglong, void *, long, char *, ulong);
 static int generic_write_dumpfile(ulonglong, void *, long, char *, ulong);
-static int page_to_nid(ulong);
 static int get_kmem_cache_list(ulong **);
 static int get_kmem_cache_root_list(ulong **);
 static int get_kmem_cache_child_list(ulong **, ulong);
@@ -19846,7 +19845,7 @@ is_kmem_cache_addr_common(ulong vaddr, char *kbuf)
 /*
  *  Kernel-config-neutral page-to-node evaluator.
  */
-static int
+int
 page_to_nid(ulong page)
 {
         int i;
--
2.40.1