There are 2 ways to iterate vm_area_struct: 1) by rbtree,
aka vma.vm_rb; 2) by linked list, aka vma.vm_prev/next.
However for linux maple tree patch[1][2], vm_rb and vm_prev/next
are removed from vm_area_struct. For memory.c:vm_area_dump
of crash, it mainly uses linked list as a way of vma iteration,
which will not work for this case. So maple tree iteration
need to be ported to crash.
For crash, currently it only iteratively read the maple tree,
no more rcu safe or maple tree modification features
needed. So we only port a subset of kernel maple tree
features. In addition, we need to modify the ported kernel
source code, making it compatible with crash.
The formal crash way of vmcore struct member resolving is:
readmem(node, KVADDR, buf, SIZE(buf), "", flag);
return buf + OFFSET(member);
which is the reimplementation of kernel way of member resolving:
return node->member;
The 1st one is arch independent, it uses gdb to resolve the OFFSET
of members, so crash don't need to know what the inside of the
struct is, even if the struct changes for new kernel version. The 2nd
one is arch dependent, the struct need to be ported to crash, and the
OFFSET of members may differ between crash and kernel due to padding/
alignment or optimization reasons.
This patch deals with the 2 issues: 1) Poring for_each_vma() macro, and
all its dependencies from kernel source[3] to crash, to enable crash
maple tree vma iteration, 2) adapting the ported code with crash.
[1]:
https://github.com/oracle/linux-uek/commit/d19703645b80abe35dff1a88449d07...
[2]:
https://github.com/oracle/linux-uek/commit/91dee01f1ebb6b6587463b6ee6f7bb...
[3]:
https://github.com/oracle/linux-uek, maple/mainline branch
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
Makefile | 10 +-
defs.h | 19 ++
maple_tree.c | 833 +++++++++++++++++++++++++++++++++++++++++++++++++++
maple_tree.h | 214 +++++++++++++
4 files changed, 1073 insertions(+), 3 deletions(-)
create mode 100644 maple_tree.c
create mode 100644 maple_tree.h
diff --git a/Makefile b/Makefile
index 79aef17..6f19b77 100644
--- a/Makefile
+++ b/Makefile
@@ -59,6 +59,7 @@ IBM_HFILES=ibm_common.h
SADUMP_HFILES=sadump.h
UNWIND_HFILES=unwind.h unwind_i.h rse.h unwind_x86.h unwind_x86_64.h
VMWARE_HFILES=vmware_vmss.h
+MAPLE_TREE_HFILES=maple_tree.h
CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \
kernel.c test.c gdb_interface.c configure.c net.c dev.c bpf.c \
@@ -73,12 +74,12 @@ CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c
\
xen_hyper.c xen_hyper_command.c xen_hyper_global_data.c \
xen_hyper_dump_tables.c kvmdump.c qemu.c qemu-load.c sadump.c ipcs.c \
ramdump.c vmware_vmss.c vmware_guestdump.c \
- xen_dom0.c kaslr_helper.c sbitmap.c
+ xen_dom0.c kaslr_helper.c sbitmap.c maple_tree.c
SOURCE_FILES=${CFILES} ${GENERIC_HFILES} ${MCORE_HFILES} \
${REDHAT_CFILES} ${REDHAT_HFILES} ${UNWIND_HFILES} \
${LKCD_DUMP_HFILES} ${LKCD_TRACE_HFILES} ${LKCD_OBSOLETE_HFILES}\
- ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES}
+ ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES} ${MAPLE_TREE_HFILES}
OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \
build_data.o kernel.o test.o gdb_interface.o net.o dev.o bpf.o \
@@ -93,7 +94,7 @@ OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o
task.o \
xen_hyper.o xen_hyper_command.o xen_hyper_global_data.o \
xen_hyper_dump_tables.o kvmdump.o qemu.o qemu-load.o sadump.o ipcs.o \
ramdump.o vmware_vmss.o vmware_guestdump.o \
- xen_dom0.o kaslr_helper.o sbitmap.o
+ xen_dom0.o kaslr_helper.o sbitmap.o maple_tree.o
MEMORY_DRIVER_FILES=memory_driver/Makefile memory_driver/crash.c memory_driver/README
@@ -536,6 +537,9 @@ kaslr_helper.o: ${GENERIC_HFILES} kaslr_helper.c
bpf.o: ${GENERIC_HFILES} bpf.c
${CC} -c ${CRASH_CFLAGS} bpf.c ${WARNING_OPTIONS} ${WARNING_ERROR}
+maple_tree.o: ${GENERIC_HFILES} ${MAPLE_TREE_HFILES} maple_tree.c
+ ${CC} -c ${CRASH_CFLAGS} maple_tree.c ${WARNING_OPTIONS} ${WARNING_ERROR}
+
${PROGRAM}: force
@$(MAKE) all
diff --git a/defs.h b/defs.h
index afdcf6c..2978cec 100644
--- a/defs.h
+++ b/defs.h
@@ -2181,6 +2181,22 @@ struct offset_table { /* stash of commonly-used
offsets */
long blk_mq_tags_nr_reserved_tags;
long blk_mq_tags_rqs;
long request_queue_hctx_table;
+ long mm_struct_mm_mt;
+ long maple_tree_ma_root;
+ long maple_tree_ma_flags;
+ long maple_node_parent;
+ long maple_node_ma64;
+ long maple_node_mr64;
+ long maple_node_slot;
+ long maple_arange_64_parent;
+ long maple_arange_64_pivot;
+ long maple_arange_64_slot;
+ long maple_arange_64_meta;
+ long maple_range_64_parent;
+ long maple_range_64_pivot;
+ long maple_range_64_slot;
+ long maple_range_64_meta;
+ long maple_metadata_end;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2351,6 +2367,8 @@ struct size_table { /* stash of commonly-used sizes */
long sbitmap_queue;
long sbq_wait_state;
long blk_mq_tags;
+ long maple_tree_struct;
+ long maple_node_struct;
};
struct array_table {
@@ -5557,6 +5575,7 @@ int file_dump(ulong, ulong, ulong, int, int);
int same_file(char *, char *);
int cleanup_memory_driver(void);
+void maple_init(void);
/*
* help.c
diff --git a/maple_tree.c b/maple_tree.c
new file mode 100644
index 0000000..058b769
--- /dev/null
+++ b/maple_tree.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Maple Tree implementation
+ * Copyright (c) 2018-2022 Oracle Corporation
+ * Authors: Liam R. Howlett <Liam.Howlett(a)oracle.com>
+ * Matthew Wilcox <willy(a)infradead.org>
+ *
+ * The following are copied and modified from lib/maple_tree.c
+ */
+
+#include "maple_tree.h"
+#include "defs.h"
+
+/* Bit 1 indicates the root is a node */
+#define MAPLE_ROOT_NODE 0x02
+/* maple_type stored bit 3-6 */
+#define MAPLE_ENODE_TYPE_SHIFT 0x03
+/* Bit 2 means a NULL somewhere below */
+#define MAPLE_ENODE_NULL 0x04
+
+#define MA_ROOT_PARENT 1
+
+#define MAPLE_PARENT_ROOT 0x01
+
+#define MAPLE_PARENT_SLOT_SHIFT 0x03
+#define MAPLE_PARENT_SLOT_MASK 0xF8
+
+#define MAPLE_PARENT_16B_SLOT_SHIFT 0x02
+#define MAPLE_PARENT_16B_SLOT_MASK 0xFC
+
+#define MAPLE_PARENT_RANGE64 0x06
+#define MAPLE_PARENT_RANGE32 0x04
+#define MAPLE_PARENT_NOT_RANGE16 0x02
+
+unsigned char *mt_slots = NULL;
+unsigned char *mt_pivots = NULL;
+
+#define MAPLE_BUFSIZE 512
+
+#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
+#define ma_enode_ptr(x) ((struct maple_enode *)(x))
+
+static inline bool mas_is_ptr(struct ma_state *mas)
+{
+ return mas->node == MAS_ROOT;
+}
+
+static inline bool mas_is_start(struct ma_state *mas)
+{
+ return mas->node == MAS_START;
+}
+
+static inline void *mte_safe_root(const struct maple_enode *node)
+{
+ return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
+}
+
+static inline struct maple_node *mte_to_node(const struct maple_enode *entry)
+{
+ return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
+}
+
+static inline enum maple_type mte_node_type(const struct maple_enode *entry)
+{
+ return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
+ MAPLE_NODE_TYPE_MASK;
+}
+
+static inline void *mas_root(struct ma_state *mas)
+{
+ char tree[MAPLE_BUFSIZE];
+ assert(SIZE(maple_tree_struct) <= MAPLE_BUFSIZE);
+
+ readmem((ulonglong)(mas->tree), KVADDR, tree, SIZE(maple_tree_struct),
+ "mas_root read maple_tree", FAULT_ON_ERROR);
+ return *(void **)(tree + OFFSET(maple_tree_ma_root));
+}
+
+static inline struct maple_enode *mas_start(struct ma_state *mas)
+{
+ if (mas_is_start(mas)) {
+ struct maple_enode *root;
+
+ mas->node = MAS_NONE;
+ mas->min = 0;
+ mas->max = ULONG_MAX;
+ mas->depth = 0;
+ mas->offset = 0;
+
+ root = mas_root(mas);
+ /* Tree with nodes */
+ if (xa_is_node(root)) {
+ mas->node = mte_safe_root(root);
+ return NULL;
+ }
+
+ /* empty tree */
+ if (!root) {
+ // mas->offset = MAPLE_NODE_SLOTS;
+ mas->offset = mt_slots[maple_dense];
+ return NULL;
+ }
+
+ /* Single entry tree */
+ mas->node = MAS_ROOT;
+ // mas->offset = MAPLE_NODE_SLOTS;
+ mas->offset = mt_slots[maple_dense];
+
+ /* Single entry tree. */
+ if (mas->index > 0)
+ return NULL;
+
+ return root;
+ }
+
+ return NULL;
+}
+
+static inline unsigned long *ma_pivots(struct maple_node *node,
+ enum maple_type type)
+{
+ switch (type) {
+ case maple_arange_64:
+ return (unsigned long *)((char *)node + OFFSET(maple_node_ma64)
+ + OFFSET(maple_arange_64_pivot));
+ case maple_range_64:
+ case maple_leaf_64:
+ return (unsigned long *)((char *)node + OFFSET(maple_node_mr64)
+ + OFFSET(maple_range_64_pivot));
+ case maple_dense:
+ return NULL;
+ }
+ return NULL;
+}
+
+static inline struct maple_metadata *ma_meta(struct maple_node *mn,
+ enum maple_type mt)
+{
+ switch (mt) {
+ case maple_arange_64:
+ return (struct maple_metadata *)(
+ (char *)mn + OFFSET(maple_node_ma64)
+ + OFFSET(maple_arange_64_meta));
+ default:
+ return (struct maple_metadata *)(
+ (char *)mn + OFFSET(maple_node_mr64)
+ + OFFSET(maple_range_64_meta));
+ }
+}
+
+static inline unsigned char ma_meta_end(struct maple_node *mn,
+ enum maple_type mt)
+{
+ struct maple_metadata *meta = ma_meta(mn, mt);
+
+ return *((char *)meta + OFFSET(maple_metadata_end));
+}
+
+static inline unsigned char ma_data_end(struct maple_node *node,
+ enum maple_type type,
+ unsigned long *pivots,
+ unsigned long max)
+{
+ unsigned char offset;
+
+ if (type == maple_arange_64)
+ return ma_meta_end(node, type);
+
+ offset = mt_pivots[type] - 1;
+ if (!pivots[offset])
+ return ma_meta_end(node, type);
+
+ if (pivots[offset] == max)
+ return offset;
+
+ return mt_pivots[type];
+}
+
+static inline bool ma_dead_node(const struct maple_node *node,
+ const struct maple_node *orig_node)
+{
+ struct maple_node *parent = (struct maple_node *)
+ (*(unsigned long *)((char *)node
+ + OFFSET(maple_node_parent))
+ & ~MAPLE_NODE_MASK);
+ return (parent == orig_node);
+}
+
+static inline void **ma_slots(struct maple_node *mn, enum maple_type mt)
+{
+ switch (mt) {
+ default:
+ case maple_arange_64:
+ return (void **)((char *)mn + OFFSET(maple_node_ma64)
+ + OFFSET(maple_arange_64_slot));
+ case maple_range_64:
+ case maple_leaf_64:
+ return (void **)((char *)mn + OFFSET(maple_node_mr64)
+ + OFFSET(maple_range_64_slot));
+ case maple_dense:
+ return (void **)((char *)mn + OFFSET(maple_node_slot));
+ }
+}
+
+static inline void *mt_slot(const struct maple_tree *mt,
+ void **slots, unsigned char offset)
+{
+ return slots[offset];
+}
+
+static inline bool ma_is_leaf(const enum maple_type type)
+{
+ return type < maple_range_64;
+}
+
+static inline void *mtree_range_walk(struct ma_state *mas)
+{
+ unsigned long *pivots;
+ unsigned char offset;
+ struct maple_node *node;
+ struct maple_enode *next, *last;
+ enum maple_type type;
+ void **slots;
+ unsigned char end;
+ unsigned long max, min;
+ unsigned long prev_max, prev_min;
+
+ char tmp_node[MAPLE_BUFSIZE];
+ assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
+
+ last = next = mas->node;
+ prev_min = min = mas->min;
+ max = mas->max;
+ do {
+ offset = 0;
+ last = next;
+ node = mte_to_node(next);
+ type = mte_node_type(next);
+ readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
+ "mtree_range_walk read maple_node", FAULT_ON_ERROR);
+ pivots = ma_pivots((struct maple_node *)tmp_node, type);
+ end = ma_data_end((struct maple_node *)tmp_node, type, pivots, max);
+ if (ma_dead_node((struct maple_node *)tmp_node, node))
+ goto dead_node;
+
+ if (pivots[offset] >= mas->index) {
+ prev_max = max;
+ prev_min = min;
+ max = pivots[offset];
+ goto next;
+ }
+
+ do {
+ offset++;
+ } while ((offset < end) && (pivots[offset] < mas->index));
+
+ prev_min = min;
+ min = pivots[offset - 1] + 1;
+ prev_max = max;
+ if (offset < end && pivots[offset])
+ max = pivots[offset];
+
+next:
+ slots = ma_slots((struct maple_node *)tmp_node, type);
+ next = mt_slot(mas->tree, slots, offset);
+ if (ma_dead_node((struct maple_node *)tmp_node, node))
+ goto dead_node;
+ } while (!ma_is_leaf(type));
+
+ mas->offset = offset;
+ mas->index = min;
+ mas->last = max;
+ mas->min = prev_min;
+ mas->max = prev_max;
+ mas->node = last;
+ return (void *) next;
+
+dead_node:
+ mas_reset(mas);
+ return NULL;
+}
+
+static inline void *mas_state_walk(struct ma_state *mas)
+{
+ void *entry;
+
+ entry = mas_start(mas);
+ if (mas_is_none(mas))
+ return NULL;
+
+ if (mas_is_ptr(mas))
+ return entry;
+
+ return mtree_range_walk(mas);
+}
+
+static inline bool mas_searchable(struct ma_state *mas)
+{
+ if (mas_is_none(mas))
+ return false;
+
+ if (mas_is_ptr(mas))
+ return false;
+
+ return true;
+}
+
+static inline struct maple_node *mas_mn(const struct ma_state *mas)
+{
+ return mte_to_node(mas->node);
+}
+
+static inline unsigned long
+mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
+{
+ if (offset)
+ return pivots[offset - 1] + 1;
+
+ return mas->min;
+}
+
+static inline void *mas_slot(struct ma_state *mas, void **slots,
+ unsigned char offset)
+{
+ return mt_slot(mas->tree, slots, offset);
+}
+
+static inline unsigned long
+mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
+ unsigned char piv, enum maple_type type)
+{
+ if (piv >= mt_pivots[type])
+ return mas->max;
+
+ return pivots[piv];
+}
+
+static inline void *mas_next_nentry(struct ma_state *mas,
+ struct maple_node *node, unsigned long max,
+ enum maple_type type, struct maple_node *orig_node)
+{
+ unsigned char count;
+ unsigned long pivot;
+ unsigned long *pivots;
+ void **slots;
+ void *entry;
+
+ if (mas->last == mas->max) {
+ mas->index = mas->max;
+ return NULL;
+ }
+
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+ mas->index = mas_safe_min(mas, pivots, mas->offset);
+ if (ma_dead_node(node, orig_node))
+ return NULL;
+
+ if (mas->index > max)
+ return NULL;
+
+ count = ma_data_end(node, type, pivots, mas->max);
+ if (mas->offset > count)
+ return NULL;
+
+ while (mas->offset < count) {
+ pivot = pivots[mas->offset];
+ entry = mas_slot(mas, slots, mas->offset);
+ if (ma_dead_node(node, orig_node))
+ return NULL;
+
+ if (entry)
+ goto found;
+
+ if (pivot >= max)
+ return NULL;
+
+ mas->index = pivot + 1;
+ mas->offset++;
+ }
+
+ if (mas->index > mas->max) {
+ mas->index = mas->last;
+ return NULL;
+ }
+
+ pivot = mas_safe_pivot(mas, pivots, mas->offset, type);
+ entry = mas_slot(mas, slots, mas->offset);
+ if (ma_dead_node(node, orig_node))
+ return NULL;
+
+ if (!pivot)
+ return NULL;
+
+ if (!entry)
+ return NULL;
+
+found:
+ mas->last = pivot;
+ return entry;
+}
+
+static inline void mas_rewalk(struct ma_state *mas, unsigned long index)
+{
+
+retry:
+ mas_set(mas, index);
+ mas_state_walk(mas);
+ if (mas_is_start(mas))
+ goto retry;
+
+ return;
+}
+
+static inline bool ma_is_root(struct maple_node *node)
+{
+ return (*(unsigned long *)((char *)node
+ + OFFSET(maple_node_parent))
+ & MA_ROOT_PARENT);
+}
+
+static inline struct maple_node *mte_parent(const struct maple_node *node)
+{
+ return (void *)(*(unsigned long *)((char *)node
+ + OFFSET(maple_node_parent))
+ & ~MAPLE_NODE_MASK);
+}
+
+static inline unsigned long mte_parent_slot_mask(unsigned long parent)
+{
+ /* Note bit 1 == 0 means 16B */
+ if (parent & MAPLE_PARENT_NOT_RANGE16)
+ return MAPLE_PARENT_SLOT_MASK;
+
+ return MAPLE_PARENT_16B_SLOT_MASK;
+}
+
+static inline bool mt_is_alloc(struct maple_tree *mt)
+{
+ return (*(unsigned int *)((char *)mt
+ + OFFSET(maple_tree_ma_flags))
+ & MT_FLAGS_ALLOC_RANGE);
+}
+
+static inline
+enum maple_type mte_parent_enum(struct maple_enode *p_enode,
+ struct maple_tree *mt)
+{
+ unsigned long p_type;
+ char tmp_tree[MAPLE_BUFSIZE];
+ assert(SIZE(maple_tree_struct) <= MAPLE_BUFSIZE);
+
+ p_type = (unsigned long)p_enode;
+ if (p_type & MAPLE_PARENT_ROOT)
+ return 0; /* Validated in the caller. */
+
+ p_type &= MAPLE_NODE_MASK;
+ p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type));
+
+ switch (p_type) {
+ case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
+ readmem((ulonglong)mt, KVADDR, tmp_tree, SIZE(maple_tree_struct),
+ "mte_parent_enum read maple_tree", FAULT_ON_ERROR);
+ if (mt_is_alloc((struct maple_tree *)tmp_tree))
+ return maple_arange_64;
+ return maple_range_64;
+ }
+
+ return 0;
+}
+
+static inline
+enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_node *node)
+{
+ return mte_parent_enum(ma_enode_ptr(*(struct maple_pnode **)
+ ((char *)node + OFFSET(maple_node_parent))),
+ mas->tree);
+}
+
+static inline unsigned long mte_parent_shift(unsigned long parent)
+{
+ /* Note bit 1 == 0 means 16B */
+ if (parent & MAPLE_PARENT_NOT_RANGE16)
+ return MAPLE_PARENT_SLOT_SHIFT;
+
+ return MAPLE_PARENT_16B_SLOT_SHIFT;
+}
+
+static inline unsigned int mte_parent_slot(const struct maple_node *node)
+{
+ unsigned long val = *(unsigned long *)((char *)node
+ + OFFSET(maple_node_parent));
+
+ /* Root. */
+ if (val & 1)
+ return 0;
+
+ /*
+ * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
+ * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
+ */
+ return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
+}
+
+static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
+ enum maple_type type)
+{
+ return (void *)((unsigned long)node |
+ (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
+}
+
+static inline bool mte_is_root(struct maple_node *node)
+{
+ return ma_is_root(node);
+}
+
+static int mas_ascend(struct ma_state *mas)
+{
+ struct maple_enode *p_enode; /* parent enode. */
+ struct maple_enode *a_enode; /* ancestor enode. */
+ struct maple_node *a_node; /* ancestor node. */
+ struct maple_node *p_node; /* parent node. */
+ unsigned char a_slot;
+ enum maple_type a_type;
+ unsigned long min, max;
+ unsigned long *pivots;
+ unsigned char offset;
+ bool set_max = false, set_min = false;
+
+ char tmp_node[MAPLE_BUFSIZE];
+ assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
+
+ a_node = mas_mn(mas);
+ readmem((ulonglong)a_node, KVADDR, tmp_node, SIZE(maple_node_struct),
+ "mas_ascend read maple_node", FAULT_ON_ERROR);
+ if (ma_is_root((struct maple_node *)tmp_node)) {
+ mas->offset = 0;
+ return 0;
+ }
+
+ readmem((ulonglong)(mte_to_node(mas->node)), KVADDR, tmp_node,
+ SIZE(maple_node_struct), "mas_ascend read maple_node", FAULT_ON_ERROR);
+ p_node = mte_parent((struct maple_node *)tmp_node);
+ if (a_node == p_node)
+ return 1;
+ a_type = mas_parent_enum(mas, (struct maple_node *)tmp_node);
+ offset = mte_parent_slot((struct maple_node *)tmp_node);
+ a_enode = mt_mk_node(p_node, a_type);
+
+ /* Check to make sure all parent information is still accurate */
+ if (p_node != mte_parent((struct maple_node *)tmp_node))
+ return 1;
+
+ mas->node = a_enode;
+ mas->offset = offset;
+
+ readmem((ulonglong)(mte_to_node(a_enode)), KVADDR, tmp_node,
+ SIZE(maple_node_struct), "mas_ascend read maple_node", FAULT_ON_ERROR);
+ if (mte_is_root((struct maple_node *)tmp_node)) {
+ mas->max = ULONG_MAX;
+ mas->min = 0;
+ return 0;
+ }
+
+ min = 0;
+ max = ULONG_MAX;
+ do {
+ p_enode = a_enode;
+ readmem((ulonglong)(mte_to_node(p_enode)), KVADDR, tmp_node,
+ SIZE(maple_node_struct),
+ "mas_ascend read maple_node", FAULT_ON_ERROR);
+ a_type = mas_parent_enum(mas, (struct maple_node *)tmp_node);
+ a_node = mte_parent((struct maple_node *)tmp_node);
+ a_slot = mte_parent_slot((struct maple_node *)tmp_node);
+ readmem((ulonglong)a_node, KVADDR, tmp_node, SIZE(maple_node_struct),
+ "mas_ascend read maple_node", FAULT_ON_ERROR);
+ pivots = ma_pivots((struct maple_node *)tmp_node, a_type);
+ a_enode = mt_mk_node((struct maple_node *)tmp_node, a_type);
+
+ if (!set_min && a_slot) {
+ set_min = true;
+ min = pivots[a_slot - 1] + 1;
+ }
+
+ if (!set_max && a_slot < mt_pivots[a_type]) {
+ set_max = true;
+ max = pivots[a_slot];
+ }
+
+ if (ma_dead_node((struct maple_node *)tmp_node, a_node))
+ return 1;
+
+ if (ma_is_root((struct maple_node *)tmp_node))
+ break;
+
+ } while (!set_min || !set_max);
+
+ mas->max = max;
+ mas->min = min;
+ return 0;
+}
+
+static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
+ unsigned long max)
+{
+ unsigned long min, pivot;
+ unsigned long *pivots;
+ struct maple_enode *enode;
+ int level = 0;
+ unsigned char offset;
+ enum maple_type mt;
+ void **slots;
+
+ char tmp_node[MAPLE_BUFSIZE];
+ assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
+
+ if (mas->max >= max)
+ goto no_entry;
+
+ level = 0;
+ readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
+ "mas_next_node read maple_node", FAULT_ON_ERROR);
+ do {
+ if (ma_is_root((struct maple_node *)tmp_node))
+ goto no_entry;
+
+ min = mas->max + 1;
+ if (min > max)
+ goto no_entry;
+
+ if (mas_ascend(mas))
+ return 1;
+
+ offset = mas->offset;
+ level++;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
+ "mas_next_node read maple_node", FAULT_ON_ERROR);
+ pivots = ma_pivots((struct maple_node *)tmp_node, mt);
+ } while (offset == ma_data_end((struct maple_node *)tmp_node, mt, pivots,
mas->max));
+
+ slots = ma_slots((struct maple_node *)tmp_node, mt);
+ pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
+ while (level > 1) {
+ /* Descend, if necessary */
+ enode = mas_slot(mas, slots, offset);
+ if (ma_dead_node((struct maple_node *)tmp_node, node))
+ return 1;
+
+ mas->node = enode;
+ level--;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
+ "mas_next_node read maple_node", FAULT_ON_ERROR);
+ slots = ma_slots((struct maple_node *)tmp_node, mt);
+ pivots = ma_pivots((struct maple_node *)tmp_node, mt);
+ offset = 0;
+ pivot = pivots[0];
+ }
+
+ enode = mas_slot(mas, slots, offset);
+ if (ma_dead_node((struct maple_node *)tmp_node, node))
+ return 1;
+
+ mas->node = enode;
+ mas->min = min;
+ mas->max = pivot;
+ return 0;
+
+no_entry:
+ if (ma_dead_node((struct maple_node *)tmp_node, node))
+ return 1;
+
+ mas->node = MAS_NONE;
+ return 0;
+}
+
+static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
+{
+ void *entry = NULL;
+ struct maple_enode *prev_node;
+ struct maple_node *node;
+ unsigned char offset;
+ unsigned long last;
+ enum maple_type mt;
+
+ char tmp_node[MAPLE_BUFSIZE];
+ assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
+
+ last = mas->last;
+retry:
+ offset = mas->offset;
+ prev_node = mas->node;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ mas->offset++;
+ if (mas->offset >= mt_slots[mt]) {
+ mas->offset = mt_slots[mt] - 1;
+ goto next_node;
+ }
+
+ while (!mas_is_none(mas)) {
+ readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
+ "mas_next_entry read maple_node", FAULT_ON_ERROR);
+ entry = mas_next_nentry(mas, (struct maple_node *)tmp_node, limit, mt, node);
+ if (ma_dead_node((struct maple_node *)tmp_node, node)) {
+ mas_rewalk(mas, last);
+ goto retry;
+ }
+
+ if (entry)
+ return entry;
+
+ if ((mas->index > limit))
+ break;
+
+next_node:
+ prev_node = mas->node;
+ offset = mas->offset;
+ if (mas_next_node(mas, node, limit)) {
+ mas_rewalk(mas, last);
+ goto retry;
+ }
+ mas->offset = 0;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ }
+
+ mas->index = mas->last = limit;
+ mas->offset = offset;
+ mas->node = prev_node;
+ return NULL;
+}
+
+static void *mas_walk(struct ma_state *mas)
+{
+ void *entry;
+
+retry:
+ entry = mas_state_walk(mas);
+ if (mas_is_start(mas))
+ goto retry;
+
+ if (mas_is_ptr(mas)) {
+ if (!mas->index) {
+ mas->last = 0;
+ } else {
+ mas->index = 1;
+ mas->last = ULONG_MAX;
+ }
+ return entry;
+ }
+
+ if (mas_is_none(mas)) {
+ mas->index = 0;
+ mas->last = ULONG_MAX;
+ }
+
+ return entry;
+}
+
+void *mas_find(struct ma_state *mas, unsigned long max)
+{
+ if (mas_is_paused(mas)) {
+ if (mas->last == ULONG_MAX) {
+ mas->node = MAS_NONE;
+ return NULL;
+ }
+ mas->node = MAS_START;
+ mas->index = ++mas->last;
+ }
+
+ if (mas_is_start(mas)) {
+ /* First run or continue */
+ void *entry;
+
+ if (mas->index > max)
+ return NULL;
+
+ entry = mas_walk(mas);
+ if (entry)
+ return entry;
+ }
+
+ if (!mas_searchable(mas))
+ return NULL;
+
+ /* Retries on dead nodes handled by mas_next_entry */
+ return mas_next_entry(mas, max);
+}
+
+/***********************************************/
+void maple_init(void)
+{
+ int array_len;
+
+ STRUCT_SIZE_INIT(maple_tree_struct, "maple_tree");
+ STRUCT_SIZE_INIT(maple_node_struct, "maple_node");
+
+ MEMBER_OFFSET_INIT(maple_tree_ma_root, "maple_tree", "ma_root");
+ MEMBER_OFFSET_INIT(maple_tree_ma_flags, "maple_tree", "ma_flags");
+
+ MEMBER_OFFSET_INIT(maple_node_parent, "maple_node", "parent");
+ MEMBER_OFFSET_INIT(maple_node_ma64, "maple_node", "ma64");
+ MEMBER_OFFSET_INIT(maple_node_mr64, "maple_node", "mr64");
+ MEMBER_OFFSET_INIT(maple_node_slot, "maple_node", "slot");
+
+ MEMBER_OFFSET_INIT(maple_arange_64_parent, "maple_arange_64",
"parent");
+ MEMBER_OFFSET_INIT(maple_arange_64_pivot, "maple_arange_64",
"pivot");
+ MEMBER_OFFSET_INIT(maple_arange_64_slot, "maple_arange_64",
"slot");
+ MEMBER_OFFSET_INIT(maple_arange_64_meta, "maple_arange_64",
"meta");
+
+ MEMBER_OFFSET_INIT(maple_range_64_parent, "maple_range_64",
"parent");
+ MEMBER_OFFSET_INIT(maple_range_64_pivot, "maple_range_64",
"pivot");
+ MEMBER_OFFSET_INIT(maple_range_64_slot, "maple_range_64", "slot");
+ MEMBER_OFFSET_INIT(maple_range_64_meta, "maple_range_64", "meta");
+
+ MEMBER_OFFSET_INIT(maple_metadata_end, "maple_metadata", "end");
+
+ array_len = get_array_length("mt_slots", NULL, sizeof(char));
+ mt_slots = calloc(array_len, sizeof(char));
+ readmem(symbol_value("mt_slots"), KVADDR, mt_slots,
+ array_len * sizeof(char), "maple_init read mt_slots",
+ FAULT_ON_ERROR);
+
+ array_len = get_array_length("mt_pivots", NULL, sizeof(char));
+ mt_pivots = calloc(array_len, sizeof(char));
+ readmem(symbol_value("mt_pivots"), KVADDR, mt_pivots,
+ array_len * sizeof(char), "maple_init read mt_pivots",
+ FAULT_ON_ERROR);
+}
\ No newline at end of file
diff --git a/maple_tree.h b/maple_tree.h
new file mode 100644
index 0000000..8b36c29
--- /dev/null
+++ b/maple_tree.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _MAPLE_TREE_H
+#define _MAPLE_TREE_H
+/*
+ * Maple Tree - An RCU-safe adaptive tree for storing ranges
+ * Copyright (c) 2018-2022 Oracle
+ * Authors: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
+ * Matthew Wilcox <willy(a)infradead.org>
+ *
+ * eXtensible Arrays
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <willy(a)infradead.org>
+ *
+ * See Documentation/core-api/xarray.rst for how to use the XArray.
+ */
+#include <stdbool.h>
+#include <limits.h>
+
+/*
+ * The following are copied and modified from include/linux/maple_tree.h
+ *
+ * Keep the empty struct names for debug purpose of compiler warning.
+ * And can also keep the function prototypes consistent with kernel's
+ * corresponding functions.
+ */
+struct maple_tree { };
+struct maple_metadata { };
+struct maple_range_64 { };
+struct maple_arange_64 { };
+struct maple_alloc { };
+struct maple_node { };
+
+struct ma_state {
+ struct maple_tree *tree; /* The tree we're operating in */
+ unsigned long index; /* The index we're operating on - range start */
+ unsigned long last; /* The last index we're operating on - range end */
+ struct maple_enode *node; /* The node containing this entry */
+ unsigned long min; /* The minimum index of this node - implied pivot min */
+ unsigned long max; /* The maximum index of this node - implied pivot max */
+ struct maple_alloc *alloc; /* Allocated nodes for this operation */
+ unsigned char depth; /* depth of tree descent during write */
+ unsigned char offset;
+ unsigned char mas_flags;
+};
+
+enum maple_type {
+ maple_dense,
+ maple_leaf_64,
+ maple_range_64,
+ maple_arange_64,
+};
+
+#define MAS_START ((struct maple_enode *)1UL)
+#define MAS_ROOT ((struct maple_enode *)5UL)
+#define MAS_NONE ((struct maple_enode *)9UL)
+#define MAS_PAUSE ((struct maple_enode *)17UL)
+#define MA_ERROR(err) \
+ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
+
+#define MA_STATE(name, mt, first, end) \
+ struct ma_state name = { \
+ .tree = mt, \
+ .index = first, \
+ .last = end, \
+ .node = MAS_START, \
+ .min = 0, \
+ .max = ULONG_MAX, \
+ }
+
+#define MAPLE_NODE_MASK 255UL
+
+#define MT_FLAGS_ALLOC_RANGE 0x01
+#define MT_FLAGS_USE_RCU 0x02
+#define MT_FLAGS_HEIGHT_OFFSET 0x02
+#define MT_FLAGS_HEIGHT_MASK 0x7C
+#define MT_FLAGS_LOCK_MASK 0x300
+#define MT_FLAGS_LOCK_IRQ 0x100
+#define MT_FLAGS_LOCK_BH 0x200
+#define MT_FLAGS_LOCK_EXTERN 0x300
+
+#define MAPLE_HEIGHT_MAX 31
+
+#define MAPLE_NODE_TYPE_MASK 0x0F
+#define MAPLE_NODE_TYPE_SHIFT 0x03
+
+#define MAPLE_RESERVED_RANGE 4096
+
+static inline void mas_reset(struct ma_state *mas)
+{
+ mas->node = MAS_START;
+}
+
+static inline
+void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last)
+{
+ mas->index = start;
+ mas->last = last;
+ mas->node = MAS_START;
+}
+
+static inline void mas_set(struct ma_state *mas, unsigned long index)
+{
+ mas_set_range(mas, index, index);
+}
+
+static inline bool mas_is_none(struct ma_state *mas)
+{
+ return mas->node == MAS_NONE;
+}
+
+static inline bool mas_is_paused(struct ma_state *mas)
+{
+ return mas->node == MAS_PAUSE;
+}
+
+/*
+ * The following are copied and modified from include/linux/xarray.h
+ */
+
+#define MAX_ERRNO 4095
+#define XA_ZERO_ENTRY xa_mk_internal(257)
+#define XA_RETRY_ENTRY xa_mk_internal(256)
+
+static inline void *xa_mk_internal(unsigned long v)
+{
+ return (void *)((v << 2) | 2);
+}
+
+static inline bool xa_is_internal(const void *entry)
+{
+ return ((unsigned long)entry & 3) == 2;
+}
+
+static inline bool xa_is_err(const void *entry)
+{
+ return xa_is_internal(entry) &&
+ entry >= xa_mk_internal(-MAX_ERRNO);
+}
+
+static inline int xa_err(void *entry)
+{
+ /* xa_to_internal() would not do sign extension. */
+ if (xa_is_err(entry))
+ return (long)entry >> 2;
+ return 0;
+}
+
+static inline bool xa_is_node(const void *entry)
+{
+ return xa_is_internal(entry) && (unsigned long)entry > 4096;
+}
+
+static inline bool xa_is_value(const void *entry)
+{
+ return (unsigned long)entry & 1;
+}
+
+static inline bool xa_is_zero(const void *entry)
+{
+ return entry == XA_ZERO_ENTRY;
+}
+
+static inline unsigned long xa_to_internal(const void *entry)
+{
+ return (unsigned long)entry >> 2;
+}
+
+static inline unsigned long xa_to_value(const void *entry)
+{
+ return (unsigned long)entry >> 1;
+}
+
+static inline bool xa_is_advanced(const void *entry)
+{
+ return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY);
+}
+
+/*
+ * The following are copied and modified from include/linux/mm.h
+ */
+
+struct vma_iterator {
+ struct ma_state mas;
+};
+
+#define VMA_ITERATOR(name, mm_mt, addr) \
+ struct vma_iterator name = { \
+ .mas = { \
+ .tree = mm_mt, \
+ .index = addr, \
+ .node = MAS_START, \
+ }, \
+ }
+
+void *mas_find(struct ma_state *, unsigned long);
+
+static void *vma_find(struct vma_iterator *vmi, unsigned long max)
+{
+ return mas_find(&vmi->mas, max);
+}
+
+ __attribute__((unused)) static void *vma_next(struct vma_iterator *vmi)
+{
+ /*
+ * Uses vma_find() to get the first VMA when the iterator starts.
+ * Calling mas_next() could skip the first entry.
+ */
+ return vma_find(vmi, ULONG_MAX);
+}
+
+#define for_each_vma(vmi, vma) \
+ while ((vma = (ulong)vma_next(&(vmi))) != 0)
+
+#endif /* _MAPLE_TREE_H */
\ No newline at end of file
--
2.33.1