Hi Lijiang,
On Thu, Jun 27, 2024 at 9:27 PM lijiang <lijiang(a)redhat.com> wrote:
 On Fri, May 31, 2024 at 5:33 PM <devel-request(a)lists.crash-utility.osci.io> wrote:
>
> Date: Fri, 31 May 2024 17:19:34 +0800
> From: Tao Liu <ltao(a)redhat.com>
> Subject: [Crash-utility] [PATCH v4 11/16] Fix cpumask_t recursive
>         dependence issue
> To: devel(a)lists.crash-utility.osci.io
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>, "Naveen N . Rao"
>         <naveen.n.rao(a)linux.vnet.ibm.com>, Lianbo Jiang
<lijiang(a)redhat.com>,
>         Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Message-ID: <20240531091939.97828-12-ltao(a)redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> There is recursive dependence for cpumask_t and will exhause the stack,
> see the following stack trace:
>
> (gdb) bt
>  ...snip...
>  #61965 0x00000000005de98c in datatype_info (name=name@entry=0xa5b1fd
"cpumask_t", member=member@entry=0x0, dm=dm@entry=0xfffffffffffffffc) at
symbols.c:6694
>  #61966 0x000000000057e4ea in cpu_map_size ...
>  #61967 0x000000000058e7bd in get_cpus_online ...
>  #61968 0x000000000061fa4b in diskdump_get_prstatus_percpu ...
>  #61969 0x0000000000616d74 in get_netdump_regs_x86_64 ...
>  #61970 0x0000000000585290 in get_dumpfile_regs ...
>  #61971 0x00000000005b7a3c in x86_64_get_current_task_reg ...
>  #61972 0x0000000000650389 in crash_target::fetch_registers ...
>  #61973 0x00000000008f385a in target_fetch_registers ...
>  #61974 0x000000000086ecda in regcache::raw_update ...
>  #61975 regcache::raw_update ...
>  #61976 0x000000000086ed7a in readable_regcache::raw_read ...
>  #61977 0x000000000086f063 in readable_regcache::cooked_read_value ...
>  #61978 0x000000000089c4ee in sentinel_frame_prev_register ...
>  #61979 0x0000000000786c76 in frame_unwind_register_value ...
>  #61980 0x0000000000786f18 in frame_register_unwind ...
>  #61981 0x0000000000787267 in frame_unwind_register ...
>  #61982 0x00000000007ad9b0 in i386_unwind_pc ...
>  #61983 0x00000000007866c0 in frame_unwind_pc ...
>  #61984 0x000000000078679c in get_frame_pc ...
>  #61985 get_frame_address_in_block ...
>  #61986 0x0000000000786849 in get_frame_address_in_block_if_available ...
>  #61987 0x0000000000691466 in get_frame_block ...
>  #61988 0x00000000008b9430 in get_selected_block ...
>  #61989 0x000000000084f8f2 in parse_exp_in_context ...
>  #61990 0x000000000084f9e5 in parse_exp_1 ...
>  #61991 parse_expression ...
>  #61992 0x00000000008d44da in gdb_get_datatype ...
>  #61993 gdb_command_funnel_1 ...
>  #61994 0x00000000008d48ae in gdb_command_funnel ...
>  #61995 0x000000000059cc42 in gdb_interface ...
>  #61996 0x00000000005de98c in datatype_info (name=name@entry=0xa5b1fd
"cpumask_t", member=member@entry=0x0, dm=dm@entry=0xfffffffffffffffc) at
symbols.c:6694
>  #61997 0x000000000057e4ea in cpu_map_size ...
>  #61998 0x000000000058e7bd in get_cpus_online () ...
>  #61999 0x000000000061fa4b in diskdump_get_prstatus_percpu ...
>  #62000 0x0000000000616d74 in get_netdump_regs_x86_64 ...
>  #62001 0x0000000000585290 in get_dumpfile_regs ...
>  #62002 0x00000000005b7a3c in x86_64_get_current_task_reg ...
>  #62003 0x0000000000650389 in crash_target::fetch_registers ...
>
> The cpumask_t will be recursively evaluated. This patch will
> fix the bug.
>
> Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
> Cc: Hari Bathini <hbathini(a)linux.ibm.com>
> Cc: Mahesh J Salgaonkar <mahesh(a)linux.ibm.com>
> Cc: Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
> Cc: Lianbo Jiang <lijiang(a)redhat.com>
> Cc: HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab(a)nec.com>
> Cc: Tao Liu <ltao(a)redhat.com>
> Cc: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
> Signed-off-by: Tao Liu <ltao(a)redhat.com>
> ---
>  defs.h   |  1 +
>  kernel.c | 17 ++++++++++-------
>  2 files changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index ed52cc3..fd00462 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2429,6 +2429,7 @@ struct size_table {         /* stash of commonly-used sizes */
>         long maple_tree;
>         long maple_node;
>         long module_memory;
> +       long cpumask_t;
>  };
>
 Can you add the cpumask_t into the dump_offset_table()?
 
OK, agreed.
>  struct array_table {
> diff --git a/kernel.c b/kernel.c
> index 3730c55..2cae305 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -157,6 +157,7 @@ kernel_init()
>          if (!(kt->cpu_flags = (ulong *)calloc(NR_CPUS, sizeof(ulong))))
>                  error(FATAL, "cannot malloc cpu_flags array");
>
> +       STRUCT_SIZE_INIT(cpumask_t, "cpumask_t");
>         cpu_maps_init();
>
>         kt->stext = symbol_value("_stext");
> @@ -913,9 +914,10 @@ cpu_map_size(const char *type)
>         struct gnu_request req;
>
>          if (LKCD_KERNTYPES()) {
> -                if ((len = STRUCT_SIZE("cpumask_t")) < 0)
> -                        error(FATAL, "cannot determine type
cpumask_t\n");
> -               return len;
> +               if (INVALID_SIZE(cpumask_t))
> +                       error(FATAL, "cannot determine type cpumask_t\n");
> +               else
> +                       return SIZE(cpumask_t);
 Let's remove the 'else', keep the original style: 
OK
 +               if (INVALID_SIZE(cpumask_t))
 +                       error(FATAL, "Invalid type cpumask_t\n");
 +
 +               return SIZE(cpumask_t);
>         }
>
>         sprintf(map_symbol, "cpu_%s_map", type);
> @@ -925,11 +927,10 @@ cpu_map_size(const char *type)
>                 return len;
>         }
>
> -       len = STRUCT_SIZE("cpumask_t");
> -       if (len < 0)
> +       if (INVALID_SIZE(cpumask_t))
>                 return sizeof(ulong);
>         else
> -               return len;
> +               return SIZE(cpumask_t);
 Ditto. 
OK
 +       if (INVALID_SIZE(cpumask_t))
              return sizeof(ulong);
 -        else
 -            return len;
 +
 +        return SIZE(cpumask_t);
>
>  }
>
>  /*
> @@ -952,8 +953,10 @@ cpu_maps_init(void)
>                 { ACTIVE_MAP, "active" },
>         };
>
> -       if ((len = STRUCT_SIZE("cpumask_t")) < 0)
> +       if (INVALID_SIZE(cpumask_t))
>                 len = sizeof(ulong);
> +       else
> +               len = SIZE(cpumask_t);
>
>         buf = GETBUF(len);
 In addition, I still found two similar calls as below. Can you try to make similar
changes in those two functions?
 1.  kernel.c:
 void
 generic_get_irq_affinity(int irq)
 {
 ...
         if ((len = STRUCT_SIZE("cpumask_t")) < 0)
                 len = DIV_ROUND_UP(kt->cpus, BITS_PER_LONG) * sizeof(ulong);
 ...
 }
 2. tools.
 ulong *
 get_cpumask_buf(void)
 {
         int cpulen;
         if ((cpulen = STRUCT_SIZE("cpumask_t")) < 0)
                 cpulen = DIV_ROUND_UP(kt->cpus, BITS_PER_LONG) * sizeof(ulong);
         return (ulong *)GETBUF(cpulen);
 } 
OK
Thanks,
Tao Liu
 Thanks
 Lianbo
>
> --
> 2.40.1