Hi Philipp,
Thanks for reviewing the patch and the comments!
On Mon, Aug 30, 2021 at 05:49:32PM +0200, Philipp Rudo wrote:
Hi Tao,
great patch. I have some comments and questions though.
On Sun, 22 Aug 2021 09:51:12 +0800
Tao Liu <ltao(a)redhat.com> wrote:
> Currently the sequence for crash searching a symbol is: 1) kernel symname
> hash table, 2) iterate all kernel symbols, 3) iterate all kernel modules
> and their symbols. In the worst case, if a non-exist symbol been searched,
> all 3 stages will be went through. The time consuming status for each stage
> is like:
>
> stage 1 stage 2 stage 3
> 0.007000(ms) 0.593000(ms) 2.421000(ms)
>
> stage 3 takes too much time when comparing to stage 1. So let's introduce a
> symname hash table for kernel modules to improve the performance of symbol
> searching.
>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
> v1 -> v2:
> - Removed unused variables within the modified functions.
>
> ---
> defs.h | 1 +
> kernel.c | 1 +
> symbols.c | 189 +++++++++++++++++++++++++++++++-----------------------
> 3 files changed, 111 insertions(+), 80 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index eb1c71b..58b8546 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2751,6 +2751,7 @@ struct symbol_table_data {
> double val_hash_searches;
> double val_hash_iterations;
> struct syment *symname_hash[SYMNAME_HASH];
> + struct syment *mod_symname_hash[SYMNAME_HASH];
^^^^^^^^
there are quite some whitespace damages throughout the patch. Most
of them seem to origin from old code that gets copy & pasted. It
would be great if you could fix them on the lines you are touching.
OK, I will replace the whitespace with tabs.
> struct symbol_namespace kernel_namespace;
> struct syment *ext_module_symtable;
> struct syment *ext_module_symend;
> diff --git a/kernel.c b/kernel.c
> index 36fdea2..c56cc34 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -4663,6 +4663,7 @@ reinit_modules(void)
> kt->mods_installed = 0;
> clear_text_value_cache();
>
> + memset(st->mod_symname_hash, 0, sizeof(st->mod_symname_hash));
> module_init();
> }
>
> diff --git a/symbols.c b/symbols.c
> index bf6d94d..9b64734 100644
> --- a/symbols.c
> +++ b/symbols.c
> @@ -64,8 +64,9 @@ static int namespace_ctl(int, struct symbol_namespace *, void *,
void *);
> static void symval_hash_init(void);
> static struct syment *symval_hash_search(ulong);
> static void symname_hash_init(void);
> -static void symname_hash_install(struct syment *);
> -static struct syment *symname_hash_search(char *);
> +static void symname_hash_install(struct syment *[], struct syment *);
> +static void symname_hash_remove(struct syment *[], struct syment *);
> +static struct syment *symname_hash_search(struct syment *[], char *, int (*)(struct
syment *, char *));
> static void gnu_qsort(bfd *, void *, long, unsigned int, asymbol *, asymbol *);
> static int check_gnu_debuglink(bfd *);
> static int separate_debug_file_exists(const char *, unsigned long, int *);
> @@ -1119,7 +1120,7 @@ symname_hash_init(void)
> struct syment *sp;
>
> for (sp = st->symtable; sp < st->symend; sp++)
> - symname_hash_install(sp);
> + symname_hash_install(st->symname_hash, sp);
>
> if ((sp = symbol_search("__per_cpu_start")))
> st->__per_cpu_start = sp->value;
> @@ -1127,21 +1128,48 @@ symname_hash_init(void)
> st->__per_cpu_end = sp->value;
> }
>
> +static void
> +mod_symtable_hash_install_range(struct syment *from, struct syment *to)
> +{
> + struct syment *sp;
> +
> + for (sp = from; sp <= to; sp++) {
> + if (sp != NULL) {
> + symname_hash_install(st->mod_symname_hash, sp);
> + }
> + }
> +}
> +
> +static void
> +mod_symtable_hash_remove_range(struct syment *from, struct syment *to)
> +{
> + struct syment *sp;
> +
> + for (sp = from; sp <= to; sp++) {
> + if (sp != NULL) {
> + symname_hash_remove(st->mod_symname_hash, sp);
> + }
> + }
> +}
1. 'if (sp)' is the same like 'if (sp != NULL)' but shorter.
That's why personally I prefer the first version :)
Agreed.
2. Wouldn't it make sense to move this check to the beginning of
symname_hash_{install,remove}? Then also the other users like
symname_hash_init would benefit.
Good suggestion, agreed.
> /*
> * Install a single static kernel symbol into the symname_hash.
> */
> static void
> -symname_hash_install(struct syment *spn)
> +symname_hash_install(struct syment *table[], struct syment *spn)
> {
> struct syment *sp;
> int index;
>
> index = SYMNAME_HASH_INDEX(spn->name);
> + index = index > 0 ? index : -index;
true, in theory index could be negative which would be really bad. But
isn't that an independent problem from the rest of the patch? If so
this change should go in an extra patch.
Furthermore the fix should be done in SYMNAME_HASH_INDEX so that all of
its users are fixed. The way I see it this should be enough (using abs
from stdlib.h)
#define SYMNAME_HASH_INDEX(name) \
- ((name[0] ^ (name[strlen(name)-1] * name[strlen(name)/2])) % SYMNAME_HASH)
+ (abs((name[0] ^ (name[strlen(name)-1] * name[strlen(name)/2])) % SYMNAME_HASH))
The index can be negative, though rare, but it is essential to make this patch run
properly. I have encountered a few cases, though I couldn't remenber clearly, which
contains
symbols like: "\x77mod". As a result, I got a negative index. I prefer to keep
it in
this patch, and I agree with your abs version.
> spn->cnt = 1;
>
> - if ((sp = st->symname_hash[index]) == NULL)
> - st->symname_hash[index] = spn;
> - else {
> + if ((sp = table[index]) == NULL) {
> + table[index] = spn;
> + spn->name_hash_next = NULL;
similar to above, isn't explicitly setting spn->name_hash_next = NULL
an independent problem from the rest of the changes and thus should go
to a separate patch?
Agreed.
> + } else {
> while (sp) {
> if (STREQ(sp->name, spn->name)) {
> sp->cnt++;
> @@ -1151,23 +1179,67 @@ symname_hash_install(struct syment *spn)
> sp = sp->name_hash_next;
> else {
> sp->name_hash_next = spn;
> + spn->name_hash_next = NULL;
> break;
> }
> }
> }
> }
>
> +static void
> +symname_hash_remove(struct syment *table[], struct syment *spn)
> +{
> + struct syment *sp, **tmp;
> + int index, first_encounter = 1;
> +
> + index = SYMNAME_HASH_INDEX(spn->name);
> + index = index > 0 ? index : -index;
> +
> + if ((sp = table[index]) == NULL)
> + return;
> +
> + for (tmp = &table[index], sp = table[index]; sp; ) {
> + if (STREQ(sp->name, spn->name)) {
> + if (sp != spn) {
> + sp->cnt--;
> + spn->cnt--;
> + } else if (!first_encounter) {
> + sp->cnt--;
> + } else {
> + *tmp = sp->name_hash_next;
> + first_encounter = 0;
> +
> + tmp = &(*tmp)->name_hash_next;
> + sp = sp->name_hash_next;
> + spn->name_hash_next = NULL;
> + continue;
> + }
> + }
> + tmp = &sp->name_hash_next;
> + sp = sp->name_hash_next;
What do you need tmp for? The way I see it you only assign to it but
never really use it.
Since the elements arranged by the hash table are singly linked list.
If we want to remove a specific element out of the list, we need to update
the field which the previous element used to point to the next element. To
do that, I keep the address of the previous-element-pointing-to-the-next field
into tmp variable,
You can see it is used in code: *tmp = sp->name_hash_next;
> + }
> +}
> +
> /*
> * Static kernel symbol value search
> */
> static struct syment *
> -symname_hash_search(char *name)
> +symname_hash_search(struct syment *table[], char *name,
> + int (*skip_condition)(struct syment *, char *))
this line should be indented to match the open parentheses after the
function name.
OK.
> {
> struct syment *sp;
> + int index;
>
> - sp = st->symname_hash[SYMNAME_HASH_INDEX(name)];
> + index = SYMNAME_HASH_INDEX(name);
> + index = index > 0 ? index : -index;
> + sp = table[index];
>
> while (sp) {
> + if (skip_condition && skip_condition(sp, name)) {
> + sp = sp->name_hash_next;
> + continue;
> + }
> +
> if (STREQ(sp->name, name))
> return sp;
> sp = sp->name_hash_next;
> @@ -1595,6 +1667,7 @@ store_module_symbols_v1(ulong total, int mods_installed)
> lm->mod_symend = sp;
> }
> }
> + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
> }
>
> st->flags |= MODULE_SYMS;
> @@ -2075,6 +2148,8 @@ store_module_symbols_v2(ulong total, int mods_installed)
> lm->mod_init_symend = sp;
> }
> }
> + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
> + mod_symtable_hash_install_range(lm->mod_init_symtable,
lm->mod_init_symend);
> }
>
> st->flags |= MODULE_SYMS;
> @@ -4482,6 +4557,16 @@ symbol_query(char *s, char *print_pad, struct syment **spp)
> return(cnt);
> }
>
> +static int
> +skip_symbols(struct syment *sp, char *s)
> +{
> + int pseudos, skip = 0;
> + pseudos = (strstr(s, "_MODULE_START_") || strstr(s,
"_MODULE_END_") ||
> + strstr(s, "_MODULE_INIT_START_") || strstr(s,
"_MODULE_INIT_END_"));
> + if (!pseudos && MODULE_PSEUDO_SYMBOL(sp))
> + skip = 1;
> + return skip;
> +}
It took really long for me to wrap my head around what is happening
here but in the end I'm pretty sure that the extra filtering is
unnecessary and can simply be dropped without problem. To be fair
what you are doing seems correct it's just by cleaning up the code the
problem became more obvious...
Me too, hard for me to figure out what's going on here. My thought was don't
go too far at one step, for now I just tried to keep it as it was. When
the code is stable enough, then get this part optimized...
Let's see what is happening here:
1) strstr returns a pointer to the start of the second string if is is
contained in the first one and NULL otherwise. This means 'pseudos'
is true if 's' contains any of the _MODULE_* strings, i.e. if s is a
pseudo symbol.
2) MODULE_PSEUDO_SYMBOL does basically the same only that it checks
'sp->name' instead of 's' and enforces that the
"_MODULE_*" strings
are at the beginning of the symbol name not just contained in it.
Let's look at the different cases
3.1) both 's' and 'sp' are proper, i.e. no pseudo, symbols
This means skip_symbols returns false so symname_hash_search
doesn't skip the symbol but compares 's' to 'sp->name' to
check if
'sp' is the symbol you are searching for. This is basically the
case you want.
3.2) both 's' and 'sp' are pseudo symbols
Again skip_symbols returns false and symname_hash_search compares
's' with 'sp->name' to check if 'sp' is the symbol you
are
searching for. I'm not entirely sure if this way
symname_hash_search is intended to be used but I also don't see a
reason why it shouldn't be done.
3.3) 's' is a pseudo and 'sp' a proper symbol
same like 3.2).
3.4) 's' is a proper symbol and 'sp' a psudo symbol
here skip_symbols returns true and symname_hash_search skips this
case.
So the only case that is filtered out is 3.4 in which 's' must not
contain any '_MODULES_*' while 'sp->name' has to start with one. But
that's something a simple STREQ can handle like in case 3.3. So what's
the point in having this extra filtering?
As you pointed out, the only case to skip is 3.4): A) s is not pseudo, and B) sp is
psedudo.
But the "pseudo" of s is different from the "psedudo" of sp.
Let's say "_MODULE_START_", "_MODULE_END_",
"_MODULE_INIT_START_", "_MODULE_INIT_END_"
are true pseudo symbols.
For s is not pseudo, s can be one of "proper symbol" and
"_MODULE_SECTION_" symbol.
For sp is pseudo, sp can be one of "true pseudo symbol" and
"_MODULE_SECTION_" symbol.
Since "proper symbol" and "true pseudo symbol" can never be the same,
so skip it or not doesn't
matter, it cannot pass the STREQ check later. The only case left is _MODULE_SECTION_
symbol.
If s and sp are both _MODULE_SECTION_ symbol, even they are equal string, it will be
skipped.
From my view it is the only use case for the skip. I agree the code
should be optimized.
> /*
> * Return the syment of a symbol.
> @@ -4489,58 +4574,16 @@ symbol_query(char *s, char *print_pad, struct syment **spp)
> struct syment *
> symbol_search(char *s)
> {
> - int i;
> - struct syment *sp_hashed, *sp, *sp_end;
> - struct load_module *lm;
> - int pseudos, search_init;
> + struct syment *sp_hashed, *sp;
>
> - sp_hashed = symname_hash_search(s);
> + sp_hashed = symname_hash_search(st->symname_hash, s, NULL);
>
> for (sp = sp_hashed ? sp_hashed : st->symtable; sp < st->symend;
sp++) {
> if (STREQ(s, sp->name))
> return(sp);
> }
>
> - pseudos = (strstr(s, "_MODULE_START_") || strstr(s,
"_MODULE_END_"));
> - search_init = FALSE;
> -
> - for (i = 0; i < st->mods_installed; i++) {
> - lm = &st->load_modules[i];
> - if (lm->mod_flags & MOD_INIT)
> - search_init = TRUE;
> - sp = lm->mod_symtable;
> - sp_end = lm->mod_symend;
> -
> - for ( ; sp <= sp_end; sp++) {
> - if (!pseudos && MODULE_PSEUDO_SYMBOL(sp))
> - continue;
> - if (STREQ(s, sp->name))
> - return(sp);
> - }
> - }
> -
> - if (!search_init)
> - return((struct syment *)NULL);
> -
> - pseudos = (strstr(s, "_MODULE_INIT_START_") || strstr(s,
"_MODULE_INIT_END_"));
> -
> - for (i = 0; i < st->mods_installed; i++) {
> - lm = &st->load_modules[i];
> - if (!lm->mod_init_symtable)
> - continue;
> - sp = lm->mod_init_symtable;
> - sp_end = lm->mod_init_symend;
> -
> - for ( ; sp < sp_end; sp++) {
> - if (!pseudos && MODULE_PSEUDO_SYMBOL(sp))
> - continue;
> -
> - if (STREQ(s, sp->name))
> - return(sp);
> - }
> - }
> -
> - return((struct syment *)NULL);
> + return symname_hash_search(st->mod_symname_hash, s, skip_symbols);
> }
>
> /*
> @@ -5432,33 +5475,13 @@ value_symbol(ulong value)
> int
> symbol_exists(char *symbol)
> {
> - int i;
> - struct syment *sp, *sp_end;
> - struct load_module *lm;
> + struct syment *sp;
>
> - if ((sp = symname_hash_search(symbol)))
> + if ((sp = symname_hash_search(st->symname_hash, symbol, NULL)))
> return TRUE;
>
> - for (i = 0; i < st->mods_installed; i++) {
> - lm = &st->load_modules[i];
> - sp = lm->mod_symtable;
> - sp_end = lm->mod_symend;
> -
> - for ( ; sp < sp_end; sp++) {
> - if (STREQ(symbol, sp->name))
> - return(TRUE);
> - }
> -
> - if (lm->mod_init_symtable) {
> - sp = lm->mod_init_symtable;
> - sp_end = lm->mod_init_symend;
> -
> - for ( ; sp < sp_end; sp++) {
> - if (STREQ(symbol, sp->name))
> - return(TRUE);
> - }
> - }
> - }
> + if ((sp = symname_hash_search(st->mod_symname_hash, symbol, NULL)))
> + return TRUE;
>
> return(FALSE);
> }
Isn't this function basically identical to symbol_search and thus can
be abbreviated to
return !!(symbol_search(symbol));
In the original symbol_search, there are 3 stages to find a symbol:
1) search in kernel symbols hash table.
2) iterate over all kernel symbols.
3) iterate over all kernel mods and their symbols.
As for symbol_exists, it only do 1) 3) stages. Personally I think stage 2) is
unnecessary, but I didn't have a strong reason to remove it. Thus I didn't
replace symbol_exists with symbol_search directly. If stage 2) can be removed,
then I'm OK with your modification.
> @@ -5515,7 +5538,7 @@ kernel_symbol_exists(char *symbol)
> {
> struct syment *sp;
>
> - if ((sp = symname_hash_search(symbol)))
> + if ((sp = symname_hash_search(st->symname_hash, symbol, NULL)))
> return TRUE;
> else
> return FALSE;
same like above. This could be abbreviated to
return !!(symname_hash_search(st->symname_hash, symbol, NULL));
Agreed, this one can be replaced this way.
> @@ -5527,7 +5550,7 @@ kernel_symbol_exists(char *symbol)
> struct syment *
> kernel_symbol_search(char *symbol)
> {
> - return symname_hash_search(symbol);
> + return symname_hash_search(st->symname_hash, symbol, NULL);
> }
>
> /*
> @@ -12464,8 +12487,10 @@ store_load_module_symbols(bfd *bfd, int dynamic, void
*minisyms,
> error(INFO, "%s: last symbol: %s is not _MODULE_END_%s?\n",
> lm->mod_name, lm->mod_load_symend->name, lm->mod_name);
>
> + mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend);
> lm->mod_symtable = lm->mod_load_symtable;
> lm->mod_symend = lm->mod_load_symend;
> + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
>
> lm->mod_flags &= ~MOD_EXT_SYMS;
> lm->mod_flags |= MOD_LOAD_SYMS;
> @@ -12495,6 +12520,7 @@ delete_load_module(ulong base_addr)
> req->name = lm->mod_namelist;
> gdb_interface(req);
> }
> + mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend);
> if (lm->mod_load_symtable) {
> free(lm->mod_load_symtable);
> namespace_ctl(NAMESPACE_FREE,
> @@ -12504,6 +12530,7 @@ delete_load_module(ulong base_addr)
> unlink_module(lm);
> lm->mod_symtable = lm->mod_ext_symtable;
> lm->mod_symend = lm->mod_ext_symend;
> + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
> lm->mod_flags &= ~(MOD_LOAD_SYMS|MOD_REMOTE|MOD_NOPATCH);
> lm->mod_flags |= MOD_EXT_SYMS;
> lm->mod_load_symtable = NULL;
> @@ -12532,6 +12559,7 @@ delete_load_module(ulong base_addr)
> req->name = lm->mod_namelist;
> gdb_interface(req);
> }
> + mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend);
> if (lm->mod_load_symtable) {
> free(lm->mod_load_symtable);
> namespace_ctl(NAMESPACE_FREE,
> @@ -12541,6 +12569,7 @@ delete_load_module(ulong base_addr)
> unlink_module(lm);
> lm->mod_symtable = lm->mod_ext_symtable;
> lm->mod_symend = lm->mod_ext_symend;
> + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
> lm->mod_flags &=
~(MOD_LOAD_SYMS|MOD_REMOTE|MOD_NOPATCH);
> lm->mod_flags |= MOD_EXT_SYMS;
> lm->mod_load_symtable = NULL;
I must admit I don't understand how the last two functions work, so I'm
relying on Kazu to comment on those.
The difference of mod symbols and kernel symbols, is that kernel symbols will not change
after loaded
into hash table, mod symbols can get modified by "mod" cmd. Whenever mod symbols
changed, it should
be synced to mod symbols hash table. The above changed lines are trying to do that.
Thanks,
Tao Liu
Thanks
Philipp