Hi Huang,
On Thu, Mar 19, 2026 at 1:24 AM Huang Shijie <huangsj(a)hygon.cn> wrote:
We may meet a large folio at two cases:
1.) when height is 1, the real data layout may looks like this:
--------------------------------------------
crash> p *(struct xa_node*)0xffff889883a6a910
$5 = {
shift = 0 '\000',
offset = 0 '\000',
count = 64 '@',
nr_values = 0 '\000',
.................
slots = { 0xffffea0061d21000, 0x2, 0x2, 0x2,
0xffffea0121d85c00, 0x12, 0x12, 0x12,
0xffffea0121d9f400, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22,
0xffffea0121583800, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42,
0x42,
0xffffea0121833e00, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62,
0x62,
0xffffea0121eb3800, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82,
0x82,
0x82, 0x82, 0x82, 0x82, 0x82, 0x82,
0x82,
0x82, 0x82, 0x82, 0x82, 0x82, 0x82,
0x82,
0x82, 0x82, 0x82, 0x82, 0x82, 0x82,
0x82,
0x82, 0x82, 0x82
},
.................
}
--------------------------------------------
The page orders for above folios:
0xffffea0061d21000 --> order 2
0xffffea0121d85c00 --> order 2
0xffffea0121d9f400 --> order 3
0xffffea0121583800 --> order 3
0xffffea0121833e00 --> order 3
0xffffea0121eb3800 --> order 5
2.) when height is not 1, the real data layout may looks like this:
--------------------------------------------
slot: { 0xffff8a2c866b745a, 0xffff8a2c866b16d2, 0xfffff9fcc4ea4000,
0xfffff9fcc4eb9000, ..}
--------------------------------------------
The 0xffff8a2c866b745a and 0xffff8a2c866b16d2 is a internal node, not a folio.
The page orders for above folios:
0xfffff9fcc4ea4000 --> order 6
Current code does not work correctly with large folio page cache.
This patch adds the large folio support with following:
1.) changed the do_xarray_iter() to detect the order of a folio,
and skip the proper dummy slots for a large folio.
2.) updated do_xarray_count/do_xarray_dump/do_xarray_dump_cb
with correct page number if we meet a large folio.
Signed-off-by: Huang Shijie <huangsj(a)hygon.cn>
---
filesys.c | 25 +++++++++++++++++++++----
tools.c | 16 +++++++++++++---
2 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/filesys.c b/filesys.c
index 2d3b272..f0ac1bb 100644
--- a/filesys.c
+++ b/filesys.c
@@ -4223,7 +4223,11 @@ static void do_xarray_count(ulong node, ulong slot, const char
*path,
ulong index, void *private)
{
struct do_xarray_info *info = private;
- info->count++;
+
+ if (info->type == XARRAY_TYPE_PAGE_CACHE)
+ info->count += 1 << folio_order(slot);
+ else
+ info->count++;
}
Do you think if it is better to decouple xarray with folio? Because
xarray is a common tool, and folio is a special case/user of xarray,
so to me it is better not to integrate folio specific info into
xarray.
How about the following? (This is just to illustrate my idea, there
must be a better approach.)
struct do_xarray_info {
ulong maxcount;
ulong count;
void *data;
ulong (*update_count)(ulong);
};
void do_xarray_count() {
struct do_xarray_info *info = private;
if (info->update_count) // special case
info->count += info->update_count(slot);
else // default case
info->count++;
}
ulong folio_update_count(ulong slot) {
return 1 << folio_order(slot);
}
and let:
info->update_count = folio_update_count;
during folio initialization.
Same to the following functions.
static void do_xarray_search(ulong node, ulong slot, const char
*path,
ulong index, void *private)
@@ -4240,8 +4244,16 @@ static void do_xarray_dump(ulong node, ulong slot, const char
*path,
ulong index, void *private)
{
struct do_xarray_info *info = private;
- fprintf(fp, "[%ld] %lx\n", index, slot);
- info->count++;
+
+ if (info->type == XARRAY_TYPE_PAGE_CACHE) {
+ int order = folio_order(slot);
+
+ fprintf(fp, "[%ld] %lx, order:%d\n", index, slot, order);
+ info->count += 1 << order;
+ } else {
+ fprintf(fp, "[%ld] %lx\n", index, slot);
+ info->count++;
+ }
}
static void do_xarray_gather(ulong node, ulong slot, const char *path,
ulong index, void *private)
@@ -4275,7 +4287,12 @@ static void do_xarray_dump_cb(ulong node, ulong slot, const char
*path,
"operation failed: entry: %ld item: %lx\n",
info->count, slot);
}
- info->count++;
+
+ if (info->type == XARRAY_TYPE_PAGE_CACHE) {
+ info->count += 1 << folio_order(slot);
+ } else {
+ info->count++;
+ }
}
/*
diff --git a/tools.c b/tools.c
index 69250c4..6b51423 100644
--- a/tools.c
+++ b/tools.c
@@ -4737,15 +4737,19 @@ do_xarray_iter(ulong node, uint height, char *path,
ulong index, struct xarray_ops *ops)
{
uint off;
+ uint order, is_folio;
if (!hq_enter(node))
error(FATAL,
"\nduplicate tree node: %lx\n", node);
- for (off = 0; off < XA_CHUNK_SIZE; off++) {
+ for (off = 0; off < XA_CHUNK_SIZE; off += 1 << order) {
ulong slot;
ulong shift = (height - 1) * XA_CHUNK_SHIFT;
+ order = 0;
+ is_folio = 0;
+
readmem(node + OFFSET(xa_node_slots) +
sizeof(void *) * off, KVADDR, &slot, sizeof(void *),
"xa_node.slots[off]", FAULT_ON_ERROR);
@@ -4754,10 +4758,16 @@ do_xarray_iter(ulong node, uint height, char *path,
if ((slot & XARRAY_TAG_MASK) == XARRAY_TAG_INTERNAL)
slot &= ~XARRAY_TAG_INTERNAL;
+ else if ((slot & XARRAY_TAG_MASK) == 0) {
+ if (ops->type == XARRAY_TYPE_PAGE_CACHE)
+ is_folio = 1;
+ }
- if (height == 1)
+ if (height == 1 || is_folio) {
ops->entry(node, slot, path, index | off, ops->private);
- else {
+ if (ops->type == XARRAY_TYPE_PAGE_CACHE)
+ order = folio_order(slot) % XA_CHUNK_SHIFT;
+ } else {
ulong child_index = index | (off << shift);
char child_path[BUFSIZE];
sprintf(child_path, "%s/%d", path, off);
--
2.43.0