On 2023/05/23 23:03, lijiang wrote:
On Thu, May 11, 2023 at 12:35 PM HAGIO KAZUHITO(萩尾 一仁)
<k-hagio-ab(a)nec.com>
wrote:
> Support:
> - sym -l
> - sym -M
> - sym -m module
>
> Signed-off-by: Kazuhito Hagio <k-hagio-ab(a)nec.com>
> ---
> symbols.c | 234 +++++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 225 insertions(+), 9 deletions(-)
>
> diff --git a/symbols.c b/symbols.c
> index 88849833bada..669fa2e2f3da 100644
> --- a/symbols.c
> +++ b/symbols.c
> @@ -103,9 +103,16 @@ static void free_structure(struct struct_elem *);
> static unsigned char is_right_brace(const char *);
> static struct struct_elem *find_node(struct struct_elem *, char *);
> static void dump_node(struct struct_elem *, char *, unsigned char,
> unsigned char);
> -static int _in_module(ulong, struct load_module *, int);
> +
> +static int module_mem_type(ulong, struct load_module *);
> static ulong module_mem_end(ulong, struct load_module *);
> +static int _in_module(ulong, struct load_module *, int);
> +struct syment *value_search_module_v2(ulong, ulong *);
>
> +static const char *module_start_tags[];
> +static const char *module_start_strs[];
> +static const char *module_end_tags[];
> +static const char *module_end_strs[];
>
> /*
> * structure/union printing stuff
> @@ -1270,10 +1277,14 @@ symname_hash_search(struct syment *table[], char
> *name)
> * Output for sym -[lL] command.
> */
>
> +#define MODULE_PSEUDO_SYMBOL(sp) (STRNEQ((sp)->name,
"_MODULE_"))
> +
>
Good understanding. But I'm concerned if the name "_MODULE_" is too short
to distinguish kernel symbols from (pseudo)module symbols, although I do
not see any kernel symbols with the prefix "_MODULE_".
I see, but there has been no problem for a long time and its change
needs to change the old pseudo symbols too, so I would like to leave
this as it is for now.
# objdump -t vmlinux|grep _MODULE_
And in crash-utility, most of the time it uses the strncmp()/strcmp() to
compare pseudo symbols, but sometimes it also uses strstr() to search for
the pseudo symbols. Maybe the following symbols may not be loaded by
crash-utility, only some strings.
# objdump -s vmlinux |grep _MODULE_
015a20 52494659 494e475f 4d4f4455 4c455f53 RIFYING_MODULE_S
03ee70 574e5f4d 4f44554c 455f5349 474e4154 WN_MODULE_SIGNAT
03f180 444f574e 5f4d4f44 554c455f 50415241 DOWN_MODULE_PARA
1bb530 45544854 4f4f4c5f 4d4f4455 4c455f50 ETHTOOL_MODULE_P
1bba20 4f4f4c5f 4d4f4455 4c455f50 4f574552 OOL_MODULE_POWER
1bba80 54455f4d 4f44554c 455f434d 49535f4e TE_MODULE_CMIS_N
...
+/*
> #define MODULE_PSEUDO_SYMBOL(sp) \
> ((STRNEQ((sp)->name, "_MODULE_START_") || STRNEQ((sp)->name,
> "_MODULE_END_")) || \
> (STRNEQ((sp)->name, "_MODULE_INIT_START_") ||
STRNEQ((sp)->name,
> "_MODULE_INIT_END_")) || \
> (STRNEQ((sp)->name, "_MODULE_SECTION_")))
> +*/
>
> #define MODULE_START(sp) (STRNEQ((sp)->name, "_MODULE_START_"))
> #define MODULE_END(sp) (STRNEQ((sp)->name, "_MODULE_END_"))
> @@ -1282,6 +1293,93 @@ symname_hash_search(struct syment *table[], char
> *name)
> #define MODULE_SECTION_START(sp) (STRNEQ((sp)->name,
> "_MODULE_SECTION_START"))
> #define MODULE_SECTION_END(sp) (STRNEQ((sp)->name,
> "_MODULE_SECTION_END"))
>
> +#define MODULE_MEM_START(sp,i) (STRNEQ((sp)->name, module_start_tags[i]))
> +#define MODULE_MEM_END(sp,i) (STRNEQ((sp)->name, module_end_tags[i]))
> +
> +/* For 6.4 and later */
> +static void
> +module_symbol_dump(char *module)
> +{
> + int i, j, start, percpu_syms;
> + struct syment *sp, *sp_end;
>
Indent issues.
Oh thanks, just copied from symbol_dump().
> + struct load_module *lm;
> + const char *p1, *p2;
> +
> +#define TBD 1
> +#define DISPLAYED 2
> +
> + for (i = 0; i < st->mods_installed; i++) {
> +
> + lm = &st->load_modules[i];
> + if (module && !STREQ(module, lm->mod_name))
> + continue;
> +
> + if (received_SIGINT() || output_closed())
> + return;
> +
>
The variable "j" should be defined as enum mod_mem_type.
hmm, so can we change the enums to macros for now?
#define MOD_TEXT 0
#define MOD_DATA 1
...
I don't see enum's good point here in crash.
> + for (j = MOD_TEXT; j < MOD_MEM_NUM_TYPES; j++) {
> +
> + if (!lm->symtable[j])
> + continue;
> +
> + sp = lm->symtable[j];
> + sp_end = lm->symend[j];
> + percpu_syms = 0;
> +
>
The variable start is set to FALSE, looks strange, why not just set it to
zero?
copied from symbol_dump(), and removed in 07/15.
https://listman.redhat.com/archives/crash-utility/2023-May/010649.html
> + for (start = FALSE; sp <= sp_end; sp++) {
> + /* TODO
> + if (IN_MODULE_PERCPU(sp->value, lm)) {
> + if (percpu_syms == DISPLAYED)
> + continue;
> + if (!start) {
> + percpu_syms = TBD;
> + continue;
> + }
> + dump_percpu_symbols(lm);
> + percpu_syms = DISPLAYED;
> + }
> + */
> + if (MODULE_PSEUDO_SYMBOL(sp)) {
> + if (MODULE_SECTION_START(sp)) {
> + p1 = sp->name +
> strlen("_MODULE_SECTION_START ");
> + p2 = "section start";
> + } else if (MODULE_SECTION_END(sp))
> {
> + p1 = sp->name +
> strlen("_MODULE_SECTION_END ");
> + p2 = "section end";
> + } else if (STRNEQ(sp->name,
> module_start_tags[j])) {
>
MODULE_MEM_START(sp, i)
+ p1 = module_start_strs[j];
> + p2 = sp->name +
> strlen(module_start_tags[j]);
> + start = TRUE;
> + } else if (STRNEQ(sp->name,
> module_end_tags[j])) {
>
MODULE_MEM_END(sp, i)
Indeed.
> + p1 = module_end_strs[j];
> + p2 = sp->name +
> strlen(module_end_tags[j]);
> + /* TODO
> + if
> (MODULE_PERCPU_SYMS_LOADED(lm) &&
> + !percpu_syms) {
> +
> dump_percpu_symbols(lm);
> + percpu_syms =
> DISPLAYED;
> + }
> + */
> + } else {
> + p1 = "unknown tag";
> + p2 = sp->name;
> + }
> +
> + fprintf(fp, "%lx %s: %s\n",
> sp->value, p1, p2);
> +
> + /* TODO
> + if (percpu_syms == TBD) {
> + dump_percpu_symbols(lm);
> + percpu_syms = DISPLAYED;
> + }
> + */
> + } else
> + show_symbol(sp, 0, SHOW_RADIX());
> + }
> + }
> + }
> +}
> +
> static void
> symbol_dump(ulong flags, char *module)
> {
> @@ -1304,6 +1402,11 @@ symbol_dump(ulong flags, char *module)
> if (!(flags & MODULE_SYMS))
> return;
>
> + if (MODULE_MEMORY()) { /* 6.4 and later */
> + module_symbol_dump(module);
> + return;
> + }
> +
> for (i = 0; i < st->mods_installed; i++) {
>
> lm = &st->load_modules[i];
> @@ -1808,6 +1911,24 @@ static const char *module_end_tags[] = {
> "_MODULE_INIT_DATA_END_",
> "_MODULE_INIT_RODATA_END_"
> };
> +static const char *module_start_strs[] = {
> + "MODULE TEXT START",
> + "MODULE DATA START",
> + "MODULE RODATA START",
> + "MODULE RO_AFTER_INIT START",
> + "MODULE INIT_TEXT START",
> + "MODULE INIT_DATA START",
> + "MODULE INIT_RODATA START"
> +};
> +static const char *module_end_strs[] = {
> + "MODULE TEXT END",
> + "MODULE DATA END",
> + "MODULE RODATA END",
> + "MODULE RO_AFTER_INIT END",
> + "MODULE INIT_TEXT END",
> + "MODULE INIT_DATA END",
> + "MODULE INIT_RODATA END"
> +};
>
>
As I mentioned in the [PATCH 01/15], similarly the above strings are in
pairs, so they can be defined as one array or macro substitution.
Yes, how about this?
struct module_tag {
char *start;
char *end;
char *start_str;
char *end_str;
};
#define MODULE_TAG(type, suffix) ("_MODULE_" #type "_"
#suffix)
#define MODULE_STR(type, suffix) ( "MODULE " #type " "
#suffix)
#define MODULE_TAGS(type) { \
.start = MODULE_TAG(type, START), \
.end = MODULE_TAG(type, END), \
.start_str = MODULE_STR(type, START), \
.end_str = MODULE_STR(type, END) \
}
static const struct module_tag module_tag[] = {
MODULE_TAGS(TEXT),
MODULE_TAGS(DATA),
MODULE_TAGS(RODATA),
MODULE_TAGS(RO_AFTER_INIT),
MODULE_TAGS(INIT_TEXT),
MODULE_TAGS(INIT_DATA),
MODULE_TAGS(INIT_RODATA),
};
/*
> * Linux 6.4 introduced module.mem memory layout
> @@ -5278,6 +5399,85 @@ module_symbol(ulong value,
> return FALSE;
> }
>
> +/* Linux 6.4 and later */
> +struct syment *
> +value_search_module_v2(ulong value, ulong *offset)
> +{
> + int i, j;
> + struct syment *sp, *sp_end, *spnext, *splast;
>
Indent issues.
will fix.
> + struct load_module *lm;
> +
> + for (i = 0; i < st->mods_installed; i++) {
> + lm = &st->load_modules[i];
> +
> + if (!IN_MODULE(value, lm) && !IN_MODULE_INIT(value, lm))
> + continue;
> +
>
The variable "j" should be defined as enum mod_mem_type.
> + for (j = MOD_TEXT; j < MOD_MEM_NUM_TYPES; j++) {
> + sp = lm->symtable[j];
> + sp_end = lm->symend[j];
> +
> + if (value < sp->value)
> + continue;
> +
> + splast = NULL;
> + for ( ; sp <= sp_end; sp++) {
> + /* TODO
> + if (machine_type("ARM64") &&
> + IN_MODULE_PERCPU(sp->value, lm) &&
> + !IN_MODULE_PERCPU(value, lm))
> + continue;
> + */
> +
>
Currently, I'm still building the kernel for aarch64, will check it next
time.
+ if (value == sp->value) {
> + if (MODULE_MEM_END(sp, j))
> + break;
> +
> + if (MODULE_PSEUDO_SYMBOL(sp)) {
> + spnext = sp + 1;
> + if
> (MODULE_PSEUDO_SYMBOL(spnext))
> + continue;
> + if (spnext->value == value)
> + sp = spnext;
> + }
> + /* TODO: probably not needed
> anymore? */
>
It's hard to track the change log for an old kernel and crash-utility. But
for the kernel 6.4 and later, it could be safe to drop it, crash won't go
into this code path.
Yes, will remove the insmod related codes.
> + if (is_insmod_builtin(lm, sp)) {
> + spnext = sp+1;
> + if ((spnext < sp_end) &&
> + (value ==
> spnext->value))
> + sp = spnext;
> + }
> + if (sp->name[0] == '.') {
> + spnext = sp+1;
> + if (spnext->value == value)
> + sp = spnext;
> + }
> + if (offset)
> + *offset = 0;
> + return sp;
> + }
> +
> + if (sp->value > value) {
> + sp = splast ? splast : sp - 1;
> + if (offset)
> + *offset = value -
> sp->value;
> + return sp;
> + }
> +
>
Why is it needed? The splast will also store the "__insmod_"-type symbols?
To return the previous sp and offset, when value is lower than the
current sp.
+ if (!MODULE_PSEUDO_SYMBOL(sp)) {
> + if (is_insmod_builtin(lm, sp)) {
> + if (!splast || (sp->value
>> splast->value))
> + splast = sp;
> + } else
> + splast = sp;
> + }
> + }
> + }
> + }
> +
> + return NULL;
> +}
> +
> struct syment *
> value_search_module(ulong value, ulong *offset)
> {
> @@ -5286,6 +5486,9 @@ value_search_module(ulong value, ulong *offset)
> struct load_module *lm;
> int search_init_sections, search_init;
>
> + if (MODULE_MEMORY()) /* Linux 6.4 and later */
> + return value_search_module_v2(value, offset);
> +
>
Here, the suffix _v2 also looks fine to me, but if it can be aligned with
the kernel version suffix, that will keep the same code style as the [patch
01/15].
Sounds good.
> search_init = FALSE;
> search_init_sections = 0;
>
> @@ -13958,19 +14161,32 @@ _in_module(ulong addr, struct load_module *lm,
> int type)
> return ((addr >= base) && (addr < (base + size)));
> }
>
> -/* Returns the end address of the module memory region. */
> -static ulong
> -module_mem_end(ulong addr, struct load_module *lm)
> +/* Returns module memory type, otherwise MOD_INVALID(-1) */
> +static int
> +module_mem_type(ulong addr, struct load_module *lm)
> {
> - ulong base, end;
> + ulong base;
> int i;
>
The variable "i" should be defined as enum mod_mem_type, the compiler will
be helpful to check for any potential errors(warnings).
+
> for (i = MOD_TEXT; i < MOD_MEM_NUM_TYPES; i++) {
> base = lm->mem[i].base;
> if (!base)
> continue;
>
Kernel usually tends to check the lm->mem[i].size instead of
lm->mem[i].base, for example:
ulong size;
...
size = lm->mem[i].size;
if (!size)
continue;
ok, will try.
> - end = base + lm->mem[i].size;
> - if ((addr >= base) && (addr < end))
> - return end;
> + if ((addr >= base) && (addr < base +
lm->mem[i].size))
> + return i;
>
base = lm->mem[i].base;
if ((addr >= base) && (addr < base + size))
> }
> - return 0;
> +
> + return MOD_INVALID;
> +}
> +
> +/* Returns the end address of the module memory region. */
> +static ulong
> +module_mem_end(ulong addr, struct load_module *lm)
> +{
> + int type = module_mem_type(addr, lm);
> +
>
The module_mem_type() will ensure that the type does not exceed the range
of type, just like the code comment. So no need to check for the condition
"type >= MOD_MEM_NUM_TYPES" as below.
ok, will remove it.
Thanks,
Kazu