Hi Lijiang,
Sorry for the late reply...
On Tue, Nov 1, 2022 at 2:51 PM lijiang <lijiang(a)redhat.com> wrote:
On Tue, Oct 25, 2022 at 8:39 PM <crash-utility-request(a)redhat.com> wrote:
> Date: Tue, 25 Oct 2022 20:38:20 +0800
> From: Tao Liu <ltao(a)redhat.com>
> To: crash-utility(a)redhat.com
> Subject: [Crash-utility] [PATCH v2 1/6] Port the maple tree data
> structures and main functions
> Message-ID: <20221025123825.36421-2-ltao(a)redhat.com>
> Content-Type: text/plain; charset="US-ASCII"; x-default=true
>
> There are 2 ways to iterate vm_area_struct: 1) by rbtree,
> aka vma.vm_rb; 2) by linked list, aka vma.vm_prev/next.
> However for linux maple tree patch[1][2], vm_rb and vm_prev/next
> are removed from vm_area_struct. For memory.c:vm_area_dump
> of crash, it mainly uses linked list as a way of vma iteration,
> which will not work for this case. So maple tree iteration
> need to be ported to crash.
>
> For crash, currently it only iteratively read the maple tree,
> no more rcu safe or maple tree modification features
> needed. So we only port a subset of kernel maple tree
> features. In addition, we need to modify the ported kernel
> source code, making it compatible with crash.
>
> The formal crash way of vmcore struct member resolving is:
>
> readmem(node, KVADDR, buf, SIZE(buf), "", flag);
> return buf + OFFSET(member);
>
> which is the reimplementation of kernel way of member resolving:
>
> return node->member;
>
> The 1st one is arch independent, it uses gdb to resolve the OFFSET
> of members, so crash don't need to know what the inside of the
> struct is, even if the struct changes for new kernel version. The 2nd
> one is arch dependent, the struct need to be ported to crash, and the
> OFFSET of members may differ between crash and kernel due to padding/
> alignment or optimization reasons.
>
> This patch deals with the 2 issues: 1) Poring for_each_vma() macro, and
> all its dependencies from kernel source[3] to crash, to enable crash
> maple tree vma iteration, 2) adapting the ported code with crash.
>
> [1]:
https://github.com/oracle/linux-uek/commit/d19703645b80abe35dff1a88449d07...
> [2]:
https://github.com/oracle/linux-uek/commit/91dee01f1ebb6b6587463b6ee6f7bb...
> [3]:
https://github.com/oracle/linux-uek, maple/mainline branch
>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> Makefile | 10 +-
> defs.h | 19 ++
> maple_tree.c | 833 +++++++++++++++++++++++++++++++++++++++++++++++++++
> maple_tree.h | 214 +++++++++++++
> 4 files changed, 1073 insertions(+), 3 deletions(-)
> create mode 100644 maple_tree.c
> create mode 100644 maple_tree.h
>
> diff --git a/Makefile b/Makefile
> index 79aef17..6f19b77 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -59,6 +59,7 @@ IBM_HFILES=ibm_common.h
> SADUMP_HFILES=sadump.h
> UNWIND_HFILES=unwind.h unwind_i.h rse.h unwind_x86.h unwind_x86_64.h
> VMWARE_HFILES=vmware_vmss.h
> +MAPLE_TREE_HFILES=maple_tree.h
>
> CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \
> kernel.c test.c gdb_interface.c configure.c net.c dev.c bpf.c \
> @@ -73,12 +74,12 @@ CFILES=main.c tools.c global_data.c memory.c filesys.c help.c
task.c \
> xen_hyper.c xen_hyper_command.c xen_hyper_global_data.c \
> xen_hyper_dump_tables.c kvmdump.c qemu.c qemu-load.c sadump.c ipcs.c \
> ramdump.c vmware_vmss.c vmware_guestdump.c \
> - xen_dom0.c kaslr_helper.c sbitmap.c
> + xen_dom0.c kaslr_helper.c sbitmap.c maple_tree.c
>
> SOURCE_FILES=${CFILES} ${GENERIC_HFILES} ${MCORE_HFILES} \
> ${REDHAT_CFILES} ${REDHAT_HFILES} ${UNWIND_HFILES} \
> ${LKCD_DUMP_HFILES} ${LKCD_TRACE_HFILES} ${LKCD_OBSOLETE_HFILES}\
> - ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES}
> + ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES} ${MAPLE_TREE_HFILES}
>
> OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \
> build_data.o kernel.o test.o gdb_interface.o net.o dev.o bpf.o \
> @@ -93,7 +94,7 @@ OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o
help.o task.o \
> xen_hyper.o xen_hyper_command.o xen_hyper_global_data.o \
> xen_hyper_dump_tables.o kvmdump.o qemu.o qemu-load.o sadump.o ipcs.o \
> ramdump.o vmware_vmss.o vmware_guestdump.o \
> - xen_dom0.o kaslr_helper.o sbitmap.o
> + xen_dom0.o kaslr_helper.o sbitmap.o maple_tree.o
>
> MEMORY_DRIVER_FILES=memory_driver/Makefile memory_driver/crash.c
memory_driver/README
>
> @@ -536,6 +537,9 @@ kaslr_helper.o: ${GENERIC_HFILES} kaslr_helper.c
> bpf.o: ${GENERIC_HFILES} bpf.c
> ${CC} -c ${CRASH_CFLAGS} bpf.c ${WARNING_OPTIONS} ${WARNING_ERROR}
>
> +maple_tree.o: ${GENERIC_HFILES} ${MAPLE_TREE_HFILES} maple_tree.c
> + ${CC} -c ${CRASH_CFLAGS} maple_tree.c ${WARNING_OPTIONS} ${WARNING_ERROR}
> +
> ${PROGRAM}: force
> @$(MAKE) all
>
> diff --git a/defs.h b/defs.h
> index afdcf6c..2978cec 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2181,6 +2181,22 @@ struct offset_table { /* stash of
commonly-used offsets */
> long blk_mq_tags_nr_reserved_tags;
> long blk_mq_tags_rqs;
> long request_queue_hctx_table;
> + long mm_struct_mm_mt;
> + long maple_tree_ma_root;
> + long maple_tree_ma_flags;
> + long maple_node_parent;
> + long maple_node_ma64;
> + long maple_node_mr64;
> + long maple_node_slot;
> + long maple_arange_64_parent;
> + long maple_arange_64_pivot;
> + long maple_arange_64_slot;
> + long maple_arange_64_meta;
> + long maple_range_64_parent;
> + long maple_range_64_pivot;
> + long maple_range_64_slot;
> + long maple_range_64_meta;
> + long maple_metadata_end;
> };
>
> struct size_table { /* stash of commonly-used sizes */
> @@ -2351,6 +2367,8 @@ struct size_table { /* stash of commonly-used sizes
*/
> long sbitmap_queue;
> long sbq_wait_state;
> long blk_mq_tags;
> + long maple_tree_struct;
> + long maple_node_struct;
> };
>
> struct array_table {
> @@ -5557,6 +5575,7 @@ int file_dump(ulong, ulong, ulong, int, int);
> int same_file(char *, char *);
> int cleanup_memory_driver(void);
>
> +void maple_init(void);
>
> /*
> * help.c
> diff --git a/maple_tree.c b/maple_tree.c
> new file mode 100644
> index 0000000..058b769
> --- /dev/null
> +++ b/maple_tree.c
> @@ -0,0 +1,833 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Maple Tree implementation
> + * Copyright (c) 2018-2022 Oracle Corporation
> + * Authors: Liam R. Howlett <Liam.Howlett(a)oracle.com>
> + * Matthew Wilcox <willy(a)infradead.org>
> + *
> + * The following are copied and modified from lib/maple_tree.c
> + */
> +
> +#include "maple_tree.h"
> +#include "defs.h"
> +
> +/* Bit 1 indicates the root is a node */
> +#define MAPLE_ROOT_NODE 0x02
> +/* maple_type stored bit 3-6 */
> +#define MAPLE_ENODE_TYPE_SHIFT 0x03
> +/* Bit 2 means a NULL somewhere below */
> +#define MAPLE_ENODE_NULL 0x04
> +
> +#define MA_ROOT_PARENT 1
> +
> +#define MAPLE_PARENT_ROOT 0x01
> +
> +#define MAPLE_PARENT_SLOT_SHIFT 0x03
> +#define MAPLE_PARENT_SLOT_MASK 0xF8
> +
> +#define MAPLE_PARENT_16B_SLOT_SHIFT 0x02
> +#define MAPLE_PARENT_16B_SLOT_MASK 0xFC
> +
> +#define MAPLE_PARENT_RANGE64 0x06
> +#define MAPLE_PARENT_RANGE32 0x04
> +#define MAPLE_PARENT_NOT_RANGE16 0x02
> +
> +unsigned char *mt_slots = NULL;
> +unsigned char *mt_pivots = NULL;
> +
> +#define MAPLE_BUFSIZE 512
> +
> +#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
Unused.
Thanks, removed the unused variable/macros in v3.
> +#define ma_enode_ptr(x) ((struct maple_enode *)(x))
> +
> +static inline bool mas_is_ptr(struct ma_state *mas)
> +{
> + return mas->node == MAS_ROOT;
> +}
> +
> +static inline bool mas_is_start(struct ma_state *mas)
> +{
> + return mas->node == MAS_START;
> +}
> +
> +static inline void *mte_safe_root(const struct maple_enode *node)
> +{
> + return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
> +}
> +
> +static inline struct maple_node *mte_to_node(const struct maple_enode *entry)
> +{
> + return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
> +}
> +
> +static inline enum maple_type mte_node_type(const struct maple_enode *entry)
> +{
> + return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
> + MAPLE_NODE_TYPE_MASK;
> +}
> +
> +static inline void *mas_root(struct ma_state *mas)
> +{
> + char tree[MAPLE_BUFSIZE];
> + assert(SIZE(maple_tree_struct) <= MAPLE_BUFSIZE);
The assert() may cause the crash coredump, could you please replace
the assert() with the similar code block as below?
if (SIZE(maple_tree_struct) <= MAPLE_BUFSIZE)
error(FATAL, "");
Agreed, all assert() been removed.
> +
> + readmem((ulonglong)(mas->tree), KVADDR, tree, SIZE(maple_tree_struct),
> + "mas_root read maple_tree", FAULT_ON_ERROR);
> + return *(void **)(tree + OFFSET(maple_tree_ma_root));
> +}
> +
> +static inline struct maple_enode *mas_start(struct ma_state *mas)
> +{
> + if (mas_is_start(mas)) {
> + struct maple_enode *root;
> +
> + mas->node = MAS_NONE;
> + mas->min = 0;
> + mas->max = ULONG_MAX;
> + mas->depth = 0;
> + mas->offset = 0;
> +
> + root = mas_root(mas);
> + /* Tree with nodes */
> + if (xa_is_node(root)) {
> + mas->node = mte_safe_root(root);
> + return NULL;
> + }
> +
> + /* empty tree */
> + if (!root) {
> + // mas->offset = MAPLE_NODE_SLOTS;
> + mas->offset = mt_slots[maple_dense];
> + return NULL;
> + }
> +
> + /* Single entry tree */
> + mas->node = MAS_ROOT;
> + // mas->offset = MAPLE_NODE_SLOTS;
> + mas->offset = mt_slots[maple_dense];
> +
> + /* Single entry tree. */
> + if (mas->index > 0)
> + return NULL;
> +
> + return root;
> + }
> +
> + return NULL;
> +}
> +
> +static inline unsigned long *ma_pivots(struct maple_node *node,
> + enum maple_type type)
> +{
> + switch (type) {
> + case maple_arange_64:
> + return (unsigned long *)((char *)node + OFFSET(maple_node_ma64)
> + + OFFSET(maple_arange_64_pivot));
> + case maple_range_64:
> + case maple_leaf_64:
> + return (unsigned long *)((char *)node + OFFSET(maple_node_mr64)
> + + OFFSET(maple_range_64_pivot));
> + case maple_dense:
> + return NULL;
> + }
> + return NULL;
> +}
> +
> +static inline struct maple_metadata *ma_meta(struct maple_node *mn,
> + enum maple_type mt)
> +{
> + switch (mt) {
> + case maple_arange_64:
> + return (struct maple_metadata *)(
> + (char *)mn + OFFSET(maple_node_ma64)
> + + OFFSET(maple_arange_64_meta));
> + default:
> + return (struct maple_metadata *)(
> + (char *)mn + OFFSET(maple_node_mr64)
> + + OFFSET(maple_range_64_meta));
> + }
> +}
> +
> +static inline unsigned char ma_meta_end(struct maple_node *mn,
> + enum maple_type mt)
> +{
> + struct maple_metadata *meta = ma_meta(mn, mt);
> +
> + return *((char *)meta + OFFSET(maple_metadata_end));
> +}
> +
> +static inline unsigned char ma_data_end(struct maple_node *node,
> + enum maple_type type,
> + unsigned long *pivots,
> + unsigned long max)
> +{
> + unsigned char offset;
> +
> + if (type == maple_arange_64)
> + return ma_meta_end(node, type);
> +
> + offset = mt_pivots[type] - 1;
> + if (!pivots[offset])
> + return ma_meta_end(node, type);
> +
> + if (pivots[offset] == max)
> + return offset;
> +
> + return mt_pivots[type];
> +}
> +
> +static inline bool ma_dead_node(const struct maple_node *node,
> + const struct maple_node *orig_node)
> +{
> + struct maple_node *parent = (struct maple_node *)
> + (*(unsigned long *)((char *)node
> + +
OFFSET(maple_node_parent))
> + & ~MAPLE_NODE_MASK);
> + return (parent == orig_node);
> +}
> +
> +static inline void **ma_slots(struct maple_node *mn, enum maple_type mt)
> +{
> + switch (mt) {
> + default:
> + case maple_arange_64:
> + return (void **)((char *)mn + OFFSET(maple_node_ma64)
> + + OFFSET(maple_arange_64_slot));
> + case maple_range_64:
> + case maple_leaf_64:
> + return (void **)((char *)mn + OFFSET(maple_node_mr64)
> + + OFFSET(maple_range_64_slot));
> + case maple_dense:
> + return (void **)((char *)mn + OFFSET(maple_node_slot));
> + }
> +}
> +
> +static inline void *mt_slot(const struct maple_tree *mt,
> + void **slots, unsigned char offset)
> +{
> + return slots[offset];
> +}
> +
> +static inline bool ma_is_leaf(const enum maple_type type)
> +{
> + return type < maple_range_64;
> +}
> +
> +static inline void *mtree_range_walk(struct ma_state *mas)
> +{
> + unsigned long *pivots;
> + unsigned char offset;
> + struct maple_node *node;
> + struct maple_enode *next, *last;
> + enum maple_type type;
> + void **slots;
> + unsigned char end;
> + unsigned long max, min;
> + unsigned long prev_max, prev_min;
> +
> + char tmp_node[MAPLE_BUFSIZE];
> + assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
Ditto.
> +
> + last = next = mas->node;
> + prev_min = min = mas->min;
> + max = mas->max;
> + do {
> + offset = 0;
> + last = next;
> + node = mte_to_node(next);
> + type = mte_node_type(next);
> + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> + "mtree_range_walk read maple_node",
FAULT_ON_ERROR);
> + pivots = ma_pivots((struct maple_node *)tmp_node, type);
> + end = ma_data_end((struct maple_node *)tmp_node, type, pivots,
max);
> + if (ma_dead_node((struct maple_node *)tmp_node, node))
> + goto dead_node;
> +
> + if (pivots[offset] >= mas->index) {
> + prev_max = max;
> + prev_min = min;
> + max = pivots[offset];
> + goto next;
> + }
> +
> + do {
> + offset++;
> + } while ((offset < end) && (pivots[offset] <
mas->index));
> +
> + prev_min = min;
> + min = pivots[offset - 1] + 1;
> + prev_max = max;
> + if (offset < end && pivots[offset])
> + max = pivots[offset];
> +
> +next:
> + slots = ma_slots((struct maple_node *)tmp_node, type);
> + next = mt_slot(mas->tree, slots, offset);
> + if (ma_dead_node((struct maple_node *)tmp_node, node))
> + goto dead_node;
> + } while (!ma_is_leaf(type));
> +
> + mas->offset = offset;
> + mas->index = min;
> + mas->last = max;
> + mas->min = prev_min;
> + mas->max = prev_max;
> + mas->node = last;
> + return (void *) next;
> +
> +dead_node:
> + mas_reset(mas);
> + return NULL;
> +}
> +
> +static inline void *mas_state_walk(struct ma_state *mas)
> +{
> + void *entry;
> +
> + entry = mas_start(mas);
> + if (mas_is_none(mas))
> + return NULL;
> +
> + if (mas_is_ptr(mas))
> + return entry;
> +
> + return mtree_range_walk(mas);
> +}
> +
> +static inline bool mas_searchable(struct ma_state *mas)
> +{
> + if (mas_is_none(mas))
> + return false;
> +
> + if (mas_is_ptr(mas))
> + return false;
> +
> + return true;
> +}
> +
> +static inline struct maple_node *mas_mn(const struct ma_state *mas)
> +{
> + return mte_to_node(mas->node);
> +}
> +
> +static inline unsigned long
> +mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
> +{
> + if (offset)
> + return pivots[offset - 1] + 1;
> +
> + return mas->min;
> +}
> +
> +static inline void *mas_slot(struct ma_state *mas, void **slots,
> + unsigned char offset)
> +{
> + return mt_slot(mas->tree, slots, offset);
> +}
> +
> +static inline unsigned long
> +mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
> + unsigned char piv, enum maple_type type)
> +{
> + if (piv >= mt_pivots[type])
> + return mas->max;
> +
> + return pivots[piv];
> +}
> +
> +static inline void *mas_next_nentry(struct ma_state *mas,
> + struct maple_node *node, unsigned long max,
> + enum maple_type type, struct maple_node *orig_node)
> +{
> + unsigned char count;
> + unsigned long pivot;
> + unsigned long *pivots;
> + void **slots;
> + void *entry;
> +
> + if (mas->last == mas->max) {
> + mas->index = mas->max;
> + return NULL;
> + }
> +
> + pivots = ma_pivots(node, type);
> + slots = ma_slots(node, type);
> + mas->index = mas_safe_min(mas, pivots, mas->offset);
> + if (ma_dead_node(node, orig_node))
> + return NULL;
> +
> + if (mas->index > max)
> + return NULL;
> +
> + count = ma_data_end(node, type, pivots, mas->max);
> + if (mas->offset > count)
> + return NULL;
> +
> + while (mas->offset < count) {
> + pivot = pivots[mas->offset];
> + entry = mas_slot(mas, slots, mas->offset);
> + if (ma_dead_node(node, orig_node))
> + return NULL;
> +
> + if (entry)
> + goto found;
> +
> + if (pivot >= max)
> + return NULL;
> +
> + mas->index = pivot + 1;
> + mas->offset++;
> + }
> +
> + if (mas->index > mas->max) {
> + mas->index = mas->last;
> + return NULL;
> + }
> +
> + pivot = mas_safe_pivot(mas, pivots, mas->offset, type);
> + entry = mas_slot(mas, slots, mas->offset);
> + if (ma_dead_node(node, orig_node))
> + return NULL;
> +
> + if (!pivot)
> + return NULL;
> +
> + if (!entry)
> + return NULL;
> +
> +found:
> + mas->last = pivot;
> + return entry;
> +}
> +
> +static inline void mas_rewalk(struct ma_state *mas, unsigned long index)
> +{
> +
> +retry:
> + mas_set(mas, index);
> + mas_state_walk(mas);
> + if (mas_is_start(mas))
> + goto retry;
> +
> + return;
The "return" is redundant?
> +}
> +
> +static inline bool ma_is_root(struct maple_node *node)
> +{
> + return (*(unsigned long *)((char *)node
> + + OFFSET(maple_node_parent))
> + & MA_ROOT_PARENT);
> +}
> +
> +static inline struct maple_node *mte_parent(const struct maple_node *node)
> +{
> + return (void *)(*(unsigned long *)((char *)node
> + + OFFSET(maple_node_parent))
> + & ~MAPLE_NODE_MASK);
> +}
> +
> +static inline unsigned long mte_parent_slot_mask(unsigned long parent)
> +{
> + /* Note bit 1 == 0 means 16B */
> + if (parent & MAPLE_PARENT_NOT_RANGE16)
> + return MAPLE_PARENT_SLOT_MASK;
> +
> + return MAPLE_PARENT_16B_SLOT_MASK;
> +}
> +
> +static inline bool mt_is_alloc(struct maple_tree *mt)
> +{
> + return (*(unsigned int *)((char *)mt
> + + OFFSET(maple_tree_ma_flags))
> + & MT_FLAGS_ALLOC_RANGE);
> +}
> +
> +static inline
> +enum maple_type mte_parent_enum(struct maple_enode *p_enode,
> + struct maple_tree *mt)
> +{
> + unsigned long p_type;
> + char tmp_tree[MAPLE_BUFSIZE];
> + assert(SIZE(maple_tree_struct) <= MAPLE_BUFSIZE);
Ditto.
> +
> + p_type = (unsigned long)p_enode;
> + if (p_type & MAPLE_PARENT_ROOT)
> + return 0; /* Validated in the caller. */
Does this indicate that it is the enum maple_type: maple_dense?
> +
> + p_type &= MAPLE_NODE_MASK;
> + p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type));
> +
> + switch (p_type) {
> + case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
> + readmem((ulonglong)mt, KVADDR, tmp_tree, SIZE(maple_tree_struct),
> + "mte_parent_enum read maple_tree",
FAULT_ON_ERROR);
> + if (mt_is_alloc((struct maple_tree *)tmp_tree))
> + return maple_arange_64;
> + return maple_range_64;
> + }
> +
> + return 0;
> +}
> +
> +static inline
> +enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_node *node)
> +{
> + return mte_parent_enum(ma_enode_ptr(*(struct maple_pnode **)
> + ((char *)node +
OFFSET(maple_node_parent))),
> + mas->tree);
> +}
> +
> +static inline unsigned long mte_parent_shift(unsigned long parent)
> +{
> + /* Note bit 1 == 0 means 16B */
> + if (parent & MAPLE_PARENT_NOT_RANGE16)
> + return MAPLE_PARENT_SLOT_SHIFT;
> +
> + return MAPLE_PARENT_16B_SLOT_SHIFT;
> +}
> +
> +static inline unsigned int mte_parent_slot(const struct maple_node *node)
> +{
> + unsigned long val = *(unsigned long *)((char *)node
> + + OFFSET(maple_node_parent));
> +
> + /* Root. */
> + if (val & 1)
> + return 0;
> +
> + /*
> + * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
> + * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
> + */
> + return (val & MAPLE_PARENT_16B_SLOT_MASK) >>
mte_parent_shift(val);
> +}
> +
> +static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
> + enum maple_type type)
> +{
> + return (void *)((unsigned long)node |
> + (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
> +}
> +
> +static inline bool mte_is_root(struct maple_node *node)
> +{
> + return ma_is_root(node);
> +}
> +
> +static int mas_ascend(struct ma_state *mas)
> +{
> + struct maple_enode *p_enode; /* parent enode. */
> + struct maple_enode *a_enode; /* ancestor enode. */
> + struct maple_node *a_node; /* ancestor node. */
> + struct maple_node *p_node; /* parent node. */
> + unsigned char a_slot;
> + enum maple_type a_type;
> + unsigned long min, max;
> + unsigned long *pivots;
> + unsigned char offset;
> + bool set_max = false, set_min = false;
> +
> + char tmp_node[MAPLE_BUFSIZE];
> + assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
Ditto.
> +
> + a_node = mas_mn(mas);
> + readmem((ulonglong)a_node, KVADDR, tmp_node, SIZE(maple_node_struct),
> + "mas_ascend read maple_node", FAULT_ON_ERROR);
> + if (ma_is_root((struct maple_node *)tmp_node)) {
> + mas->offset = 0;
> + return 0;
> + }
> +
> + readmem((ulonglong)(mte_to_node(mas->node)), KVADDR, tmp_node,
> + SIZE(maple_node_struct), "mas_ascend read maple_node",
FAULT_ON_ERROR);
> + p_node = mte_parent((struct maple_node *)tmp_node);
> + if (a_node == p_node)
> + return 1;
> + a_type = mas_parent_enum(mas, (struct maple_node *)tmp_node);
> + offset = mte_parent_slot((struct maple_node *)tmp_node);
> + a_enode = mt_mk_node(p_node, a_type);
> +
> + /* Check to make sure all parent information is still accurate */
> + if (p_node != mte_parent((struct maple_node *)tmp_node))
> + return 1;
> +
> + mas->node = a_enode;
> + mas->offset = offset;
> +
> + readmem((ulonglong)(mte_to_node(a_enode)), KVADDR, tmp_node,
> + SIZE(maple_node_struct), "mas_ascend read maple_node",
FAULT_ON_ERROR);
> + if (mte_is_root((struct maple_node *)tmp_node)) {
> + mas->max = ULONG_MAX;
> + mas->min = 0;
> + return 0;
> + }
> +
> + min = 0;
> + max = ULONG_MAX;
> + do {
> + p_enode = a_enode;
> + readmem((ulonglong)(mte_to_node(p_enode)), KVADDR, tmp_node,
> + SIZE(maple_node_struct),
> + "mas_ascend read maple_node", FAULT_ON_ERROR);
> + a_type = mas_parent_enum(mas, (struct maple_node *)tmp_node);
> + a_node = mte_parent((struct maple_node *)tmp_node);
> + a_slot = mte_parent_slot((struct maple_node *)tmp_node);
> + readmem((ulonglong)a_node, KVADDR, tmp_node,
SIZE(maple_node_struct),
> + "mas_ascend read maple_node", FAULT_ON_ERROR);
> + pivots = ma_pivots((struct maple_node *)tmp_node, a_type);
> + a_enode = mt_mk_node((struct maple_node *)tmp_node, a_type);
> +
> + if (!set_min && a_slot) {
> + set_min = true;
> + min = pivots[a_slot - 1] + 1;
> + }
> +
> + if (!set_max && a_slot < mt_pivots[a_type]) {
> + set_max = true;
> + max = pivots[a_slot];
> + }
> +
> + if (ma_dead_node((struct maple_node *)tmp_node, a_node))
> + return 1;
> +
> + if (ma_is_root((struct maple_node *)tmp_node))
> + break;
> +
> + } while (!set_min || !set_max);
> +
> + mas->max = max;
> + mas->min = min;
> + return 0;
> +}
> +
> +static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
> + unsigned long max)
> +{
> + unsigned long min, pivot;
> + unsigned long *pivots;
> + struct maple_enode *enode;
> + int level = 0;
> + unsigned char offset;
> + enum maple_type mt;
> + void **slots;
> +
> + char tmp_node[MAPLE_BUFSIZE];
> + assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
Ditto.
> +
> + if (mas->max >= max)
> + goto no_entry;
> +
> + level = 0;
> + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> + "mas_next_node read maple_node", FAULT_ON_ERROR);
> + do {
> + if (ma_is_root((struct maple_node *)tmp_node))
> + goto no_entry;
> +
> + min = mas->max + 1;
> + if (min > max)
> + goto no_entry;
> +
> + if (mas_ascend(mas))
> + return 1;
> +
> + offset = mas->offset;
> + level++;
> + node = mas_mn(mas);
> + mt = mte_node_type(mas->node);
> + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> + "mas_next_node read maple_node", FAULT_ON_ERROR);
> + pivots = ma_pivots((struct maple_node *)tmp_node, mt);
> + } while (offset == ma_data_end((struct maple_node *)tmp_node, mt, pivots,
mas->max));
> +
> + slots = ma_slots((struct maple_node *)tmp_node, mt);
> + pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
> + while (level > 1) {
> + /* Descend, if necessary */
> + enode = mas_slot(mas, slots, offset);
> + if (ma_dead_node((struct maple_node *)tmp_node, node))
> + return 1;
> +
> + mas->node = enode;
> + level--;
> + node = mas_mn(mas);
> + mt = mte_node_type(mas->node);
> + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> + "mas_next_node read maple_node", FAULT_ON_ERROR);
> + slots = ma_slots((struct maple_node *)tmp_node, mt);
> + pivots = ma_pivots((struct maple_node *)tmp_node, mt);
> + offset = 0;
> + pivot = pivots[0];
> + }
> +
> + enode = mas_slot(mas, slots, offset);
> + if (ma_dead_node((struct maple_node *)tmp_node, node))
> + return 1;
> +
> + mas->node = enode;
> + mas->min = min;
> + mas->max = pivot;
> + return 0;
> +
> +no_entry:
> + if (ma_dead_node((struct maple_node *)tmp_node, node))
> + return 1;
> +
> + mas->node = MAS_NONE;
> + return 0;
> +}
> +
> +static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
> +{
> + void *entry = NULL;
> + struct maple_enode *prev_node;
> + struct maple_node *node;
> + unsigned char offset;
> + unsigned long last;
> + enum maple_type mt;
> +
> + char tmp_node[MAPLE_BUFSIZE];
> + assert(SIZE(maple_node_struct) <= MAPLE_BUFSIZE);
Ditto.
> +
> + last = mas->last;
> +retry:
> + offset = mas->offset;
> + prev_node = mas->node;
> + node = mas_mn(mas);
> + mt = mte_node_type(mas->node);
> + mas->offset++;
> + if (mas->offset >= mt_slots[mt]) {
> + mas->offset = mt_slots[mt] - 1;
> + goto next_node;
> + }
> +
> + while (!mas_is_none(mas)) {
> + readmem((ulonglong)node, KVADDR, tmp_node, SIZE(maple_node_struct),
> + "mas_next_entry read maple_node",
FAULT_ON_ERROR);
> + entry = mas_next_nentry(mas, (struct maple_node *)tmp_node, limit,
mt, node);
> + if (ma_dead_node((struct maple_node *)tmp_node, node)) {
> + mas_rewalk(mas, last);
> + goto retry;
> + }
> +
> + if (entry)
> + return entry;
> +
> + if ((mas->index > limit))
> + break;
> +
> +next_node:
> + prev_node = mas->node;
> + offset = mas->offset;
> + if (mas_next_node(mas, node, limit)) {
> + mas_rewalk(mas, last);
> + goto retry;
> + }
> + mas->offset = 0;
> + node = mas_mn(mas);
> + mt = mte_node_type(mas->node);
> + }
> +
> + mas->index = mas->last = limit;
> + mas->offset = offset;
> + mas->node = prev_node;
> + return NULL;
> +}
> +
> +static void *mas_walk(struct ma_state *mas)
> +{
> + void *entry;
> +
> +retry:
> + entry = mas_state_walk(mas);
> + if (mas_is_start(mas))
> + goto retry;
> +
> + if (mas_is_ptr(mas)) {
> + if (!mas->index) {
> + mas->last = 0;
> + } else {
> + mas->index = 1;
> + mas->last = ULONG_MAX;
> + }
> + return entry;
> + }
> +
> + if (mas_is_none(mas)) {
> + mas->index = 0;
> + mas->last = ULONG_MAX;
> + }
> +
> + return entry;
> +}
> +
> +void *mas_find(struct ma_state *mas, unsigned long max)
> +{
> + if (mas_is_paused(mas)) {
> + if (mas->last == ULONG_MAX) {
> + mas->node = MAS_NONE;
> + return NULL;
> + }
> + mas->node = MAS_START;
> + mas->index = ++mas->last;
> + }
> +
> + if (mas_is_start(mas)) {
> + /* First run or continue */
> + void *entry;
> +
> + if (mas->index > max)
> + return NULL;
> +
> + entry = mas_walk(mas);
> + if (entry)
> + return entry;
> + }
> +
> + if (!mas_searchable(mas))
> + return NULL;
> +
> + /* Retries on dead nodes handled by mas_next_entry */
> + return mas_next_entry(mas, max);
> +}
> +
> +/***********************************************/
> +void maple_init(void)
> +{
> + int array_len;
> +
> + STRUCT_SIZE_INIT(maple_tree_struct, "maple_tree");
> + STRUCT_SIZE_INIT(maple_node_struct, "maple_node");
> +
> + MEMBER_OFFSET_INIT(maple_tree_ma_root, "maple_tree",
"ma_root");
> + MEMBER_OFFSET_INIT(maple_tree_ma_flags, "maple_tree",
"ma_flags");
> +
> + MEMBER_OFFSET_INIT(maple_node_parent, "maple_node",
"parent");
> + MEMBER_OFFSET_INIT(maple_node_ma64, "maple_node",
"ma64");
> + MEMBER_OFFSET_INIT(maple_node_mr64, "maple_node",
"mr64");
> + MEMBER_OFFSET_INIT(maple_node_slot, "maple_node",
"slot");
> +
> + MEMBER_OFFSET_INIT(maple_arange_64_parent, "maple_arange_64",
"parent");
> + MEMBER_OFFSET_INIT(maple_arange_64_pivot, "maple_arange_64",
"pivot");
> + MEMBER_OFFSET_INIT(maple_arange_64_slot, "maple_arange_64",
"slot");
> + MEMBER_OFFSET_INIT(maple_arange_64_meta, "maple_arange_64",
"meta");
> +
> + MEMBER_OFFSET_INIT(maple_range_64_parent, "maple_range_64",
"parent");
> + MEMBER_OFFSET_INIT(maple_range_64_pivot, "maple_range_64",
"pivot");
> + MEMBER_OFFSET_INIT(maple_range_64_slot, "maple_range_64",
"slot");
> + MEMBER_OFFSET_INIT(maple_range_64_meta, "maple_range_64",
"meta");
> +
> + MEMBER_OFFSET_INIT(maple_metadata_end, "maple_metadata",
"end");
> +
> + array_len = get_array_length("mt_slots", NULL, sizeof(char));
> + mt_slots = calloc(array_len, sizeof(char));
> + readmem(symbol_value("mt_slots"), KVADDR, mt_slots,
> + array_len * sizeof(char), "maple_init read mt_slots",
> + FAULT_ON_ERROR);
> +
> + array_len = get_array_length("mt_pivots", NULL, sizeof(char));
> + mt_pivots = calloc(array_len, sizeof(char));
> + readmem(symbol_value("mt_pivots"), KVADDR, mt_pivots,
> + array_len * sizeof(char), "maple_init read mt_pivots",
> + FAULT_ON_ERROR);
> +}
> \ No newline at end of file
> diff --git a/maple_tree.h b/maple_tree.h
> new file mode 100644
> index 0000000..8b36c29
> --- /dev/null
> +++ b/maple_tree.h
> @@ -0,0 +1,214 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +#ifndef _MAPLE_TREE_H
> +#define _MAPLE_TREE_H
> +/*
> + * Maple Tree - An RCU-safe adaptive tree for storing ranges
> + * Copyright (c) 2018-2022 Oracle
> + * Authors: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
> + * Matthew Wilcox <willy(a)infradead.org>
> + *
> + * eXtensible Arrays
> + * Copyright (c) 2017 Microsoft Corporation
> + * Author: Matthew Wilcox <willy(a)infradead.org>
> + *
> + * See Documentation/core-api/xarray.rst for how to use the XArray.
> + */
> +#include <stdbool.h>
> +#include <limits.h>
> +
> +/*
> + * The following are copied and modified from include/linux/maple_tree.h
> + *
> + * Keep the empty struct names for debug purpose of compiler warning.
> + * And can also keep the function prototypes consistent with kernel's
> + * corresponding functions.
> + */
> +struct maple_tree { };
> +struct maple_metadata { };
> +struct maple_range_64 { };
> +struct maple_arange_64 { };
> +struct maple_alloc { };
> +struct maple_node { };
> +
Is it possible to define them with the "void *" ? For example:
struct ma_state {
void *tree;
unsigned long index;
...
void *node;
...
};
Kernel may change them or move them out of the structure in the
future, in this situation crash won't have to change accordingly.
Good suggestion! I have removed the empty structures in v3, and used
void * instead. In addition, since mas_find() no longer used in v3, a
lot of functions have been removed for this patch.
Thanks,
Tao Liu
> +struct ma_state {
> + struct maple_tree *tree; /* The tree we're operating in */
> + unsigned long index; /* The index we're operating on - range
start */
> + unsigned long last; /* The last index we're operating on -
range end */
> + struct maple_enode *node; /* The node containing this entry */
> + unsigned long min; /* The minimum index of this node - implied
pivot min */
> + unsigned long max; /* The maximum index of this node - implied
pivot max */
> + struct maple_alloc *alloc; /* Allocated nodes for this operation */
> + unsigned char depth; /* depth of tree descent during write */
> + unsigned char offset;
> + unsigned char mas_flags;
> +};
> +
> +enum maple_type {
> + maple_dense,
> + maple_leaf_64,
> + maple_range_64,
> + maple_arange_64,
> +};
> +
> +#define MAS_START ((struct maple_enode *)1UL)
> +#define MAS_ROOT ((struct maple_enode *)5UL)
> +#define MAS_NONE ((struct maple_enode *)9UL)
> +#define MAS_PAUSE ((struct maple_enode *)17UL)
> +#define MA_ERROR(err) \
> + ((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
> +
> +#define MA_STATE(name, mt, first, end) \
> + struct ma_state name = { \
> + .tree = mt, \
> + .index = first, \
> + .last = end, \
> + .node = MAS_START, \
> + .min = 0, \
> + .max = ULONG_MAX, \
> + }
The above two macros are unused.
> +
> +#define MAPLE_NODE_MASK 255UL
> +
> +#define MT_FLAGS_ALLOC_RANGE 0x01
> +#define MT_FLAGS_USE_RCU 0x02
Unused.
> +#define MT_FLAGS_HEIGHT_OFFSET 0x02
> +#define MT_FLAGS_HEIGHT_MASK 0x7C
> +#define MT_FLAGS_LOCK_MASK 0x300
> +#define MT_FLAGS_LOCK_IRQ 0x100
> +#define MT_FLAGS_LOCK_BH 0x200
> +#define MT_FLAGS_LOCK_EXTERN 0x300
The above four macros are unused.
> +
> +#define MAPLE_HEIGHT_MAX 31
Unused.
> +
> +#define MAPLE_NODE_TYPE_MASK 0x0F
> +#define MAPLE_NODE_TYPE_SHIFT 0x03
> +
> +#define MAPLE_RESERVED_RANGE 4096
> +
> +static inline void mas_reset(struct ma_state *mas)
> +{
> + mas->node = MAS_START;
> +}
> +
> +static inline
> +void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last)
> +{
> + mas->index = start;
> + mas->last = last;
> + mas->node = MAS_START;
> +}
> +
> +static inline void mas_set(struct ma_state *mas, unsigned long index)
> +{
> + mas_set_range(mas, index, index);
> +}
> +
> +static inline bool mas_is_none(struct ma_state *mas)
> +{
()> + return mas->node == MAS_NONE;
> +}
> +
> +static inline bool mas_is_paused(struct ma_state *mas)
> +{
> + return mas->node == MAS_PAUSE;
> +}
> +
> +/*
> + * The following are copied and modified from include/linux/xarray.h
> + */
> +
> +#define MAX_ERRNO 4095
The macro MAX_ERRNO is unused because the xa_is_err() is not called.
> +#define XA_ZERO_ENTRY xa_mk_internal(257)
> +#define XA_RETRY_ENTRY xa_mk_internal(256)
The macro XA_RETRY_ENTRY is unused because the xa_is_advanced() is not
called by other functions.
> +
> +static inline void *xa_mk_internal(unsigned long v)
> +{
> + return (void *)((v << 2) | 2);
> +}
> +
> +static inline bool xa_is_internal(const void *entry)
> +{
> + return ((unsigned long)entry & 3) == 2;
> +}
> +
> +static inline bool xa_is_err(const void *entry)
> +{
> + return xa_is_internal(entry) &&
> + entry >= xa_mk_internal(-MAX_ERRNO);
> +}
The xa_is_err() is unused.
> +
> +static inline int xa_err(void *entry)
> +{
> + /* xa_to_internal() would not do sign extension. */
> + if (xa_is_err(entry))
> + return (long)entry >> 2;
> + return 0;
> +}
Ditto.
> +
> +static inline bool xa_is_node(const void *entry)
> +{
> + return xa_is_internal(entry) && (unsigned long)entry > 4096;
> +}
> +
> +static inline bool xa_is_value(const void *entry)
> +{
> + return (unsigned long)entry & 1;
> +}
> +
> +static inline bool xa_is_zero(const void *entry)
> +{
> + return entry == XA_ZERO_ENTRY;
> +}
> +
> +static inline unsigned long xa_to_internal(const void *entry)
> +{
> + return (unsigned long)entry >> 2;
> +}
> +
> +static inline unsigned long xa_to_value(const void *entry)
> +{
> + return (unsigned long)entry >> 1;
> +}
> +
> +static inline bool xa_is_advanced(const void *entry)
> +{
> + return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY);
> +}
Ditto.
Thanks.
Lianbo
> +
> +/*
> + * The following are copied and modified from include/linux/mm.h
> + */
> +
> +struct vma_iterator {
> + struct ma_state mas;
> +};
> +
> +#define VMA_ITERATOR(name, mm_mt, addr) \
> + struct vma_iterator name = { \
> + .mas = { \
> + .tree = mm_mt, \
> + .index = addr, \
> + .node = MAS_START, \
> + }, \
> + }
> +
> +void *mas_find(struct ma_state *, unsigned long);
> +
> +static void *vma_find(struct vma_iterator *vmi, unsigned long max)
> +{
> + return mas_find(&vmi->mas, max);
> +}
> +
> + __attribute__((unused)) static void *vma_next(struct vma_iterator *vmi)
> +{
> + /*
> + * Uses vma_find() to get the first VMA when the iterator starts.
> + * Calling mas_next() could skip the first entry.
> + */
> + return vma_find(vmi, ULONG_MAX);
> +}
> +
> +#define for_each_vma(vmi, vma) \
> + while ((vma = (ulong)vma_next(&(vmi))) != 0)
> +
> +#endif /* _MAPLE_TREE_H */
> \ No newline at end of file
> --
> 2.33.1