 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] crash: Do not use bt -t flag in panic_search()
                                
                                
                                
                                    
                                        by Michael Holzheu
                                    
                                
                                
                                        Hi Dave,
I got a dump where a process "gmain" was incorrectly marked as running:
crash> ps | grep gmain
>   217      1   5      8bec23420     IN   0.0  463276  18240  gmain
The reason was that the "brute force" way parsing the "bt -t -o"
output in panic_search() found the symbol "panic" on the stack:
crash> bt -t -o 8bec23420
PID: 217    TASK: 8bec23420         CPU: 5   COMMAND: "gmain"
              START: __schedule at 83f650
  [       8b662b900] (null) at 0
  [       8b662b950] (null) at 0
  [       8b662b978] __schedule at 83f650
  [       8b662b990] (null) at 0
...
  [       8b662bb18] (null) at 0
  [       8b662bb40] panic at 83679a  <<<<<--------------
  [       8b662bb58] _ehead at 280da
The real stack trace was as follows:
crash> bt  8bec23420
Detaching after fork from child process 15508.
PID: 217    TASK: 8bec23420         CPU: 5   COMMAND: "gmain"
 #0 [8b662b8f0] __schedule at 83f650
 #1 [8b662b958] schedule at 83fade
 #2 [8b662b970] schedule_hrtimeout_range_clock at 842fc8
 #3 [8b662ba10] poll_schedule_timeout at 2c6e8a
 #4 [8b662ba30] do_sys_poll at 2c8604
 #5 [8b662be40] sys_poll at 2c8852
 #6 [8b662bea8] system_call at 843a66
IMHO the "-t" method is quite risky (at least on s390). What about using
the "normal" stack backtrace without the "-t" bt option?
---
 task.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--- a/task.c
+++ b/task.c
@@ -6633,7 +6633,7 @@ panic_search(void)
         fd = &foreach_data;
 	fd->keys = 1;
 	fd->keyword_array[0] = FOREACH_BT; 
-	fd->flags |= (FOREACH_t_FLAG|FOREACH_o_FLAG);
+	fd->flags |= FOREACH_o_FLAG;
 
 	dietask = lasttask = NO_TASK;
 	
                                
                         
                        
                                
                                10 years, 2 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        Re: [Crash-utility] [PATCH] crash: Do not use bt -t flag in panic_search()
                                
                                
                                
                                    
                                        by Dave Anderson
                                    
                                
                                
                                        
    
Ah, it must be because of the common user-kernel virtual address space on s390x?  I can accept a patch if it's s390 only.
Dave
Sent from my Verizon Wireless 4G LTE smartphone
-------- Original message --------
From: Dave Anderson <anderson(a)redhat.com> 
Date: 08/03/2015  11:18 AM  (GMT-05:00) 
To: Michael Holzheu <holzheu(a)linux.vnet.ibm.com> 
Cc: Heiko Carstens <heiko.carstens(a)de.ibm.com>, "Discussion list for crash utility usage,	maintenance and development" <crash-utility(a)redhat.com> 
Subject: Re: [Crash-utility] [PATCH] crash: Do not use bt -t flag in
 	panic_search() 
----- Original Message -----
> Hi Dave,
> 
> I got a dump where a process "gmain" was incorrectly marked as running:
> 
> crash> ps | grep gmain
> >   217      1   5      8bec23420     IN   0.0  463276  18240  gmain
> 
> The reason was that the "brute force" way parsing the "bt -t -o"
> output in panic_search() found the symbol "panic" on the stack:
> 
> crash> bt -t -o 8bec23420
> PID: 217    TASK: 8bec23420         CPU: 5   COMMAND: "gmain"
>               START: __schedule at 83f650
>   [       8b662b900] (null) at 0
>   [       8b662b950] (null) at 0
>   [       8b662b978] __schedule at 83f650
>   [       8b662b990] (null) at 0
> ...
>   [       8b662bb18] (null) at 0
>   [       8b662bb40] panic at 83679a  <<<<<--------------
>   [       8b662bb58] _ehead at 280da
I guess the obvious question is why "panic" was on the stack?
> 
> The real stack trace was as follows:
> 
> crash> bt  8bec23420
> Detaching after fork from child process 15508.
> PID: 217    TASK: 8bec23420         CPU: 5   COMMAND: "gmain"
>  #0 [8b662b8f0] __schedule at 83f650
>  #1 [8b662b958] schedule at 83fade
>  #2 [8b662b970] schedule_hrtimeout_range_clock at 842fc8
>  #3 [8b662ba10] poll_schedule_timeout at 2c6e8a
>  #4 [8b662ba30] do_sys_poll at 2c8604
>  #5 [8b662be40] sys_poll at 2c8852
>  #6 [8b662bea8] system_call at 843a66
> 
> IMHO the "-t" method is quite risky (at least on s390). What about using
> the "normal" stack backtrace without the "-t" bt option?
That really worries me -- introducing the usage of normal backtrace on all tasks
instead of simply walking the stack memory looking for text addresses is a huge
change.
Dave
 
> ---
>  task.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> --- a/task.c
> +++ b/task.c
> @@ -6633,7 +6633,7 @@ panic_search(void)
>          fd = &foreach_data;
>  	fd->keys = 1;
>  	fd->keyword_array[0] = FOREACH_BT;
> -	fd->flags |= (FOREACH_t_FLAG|FOREACH_o_FLAG);
> +	fd->flags |= FOREACH_o_FLAG;
>  
>  	dietask = lasttask = NO_TASK;
>  	
> 
--
Crash-utility mailing list
Crash-utility(a)redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
                                
                         
                        
                                
                                10 years, 2 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        Re: [Crash-utility] Wrong RSS  field in ps
                                
                                
                                
                                    
                                        by Dave Anderson
                                    
                                
                                
                                        
----- Original Message -----
> On Tue, 4 Aug 2015 16:57:35 -0400 (EDT)
> Dave Anderson <anderson(a)redhat.com> wrote:
> 
> > 
> > 
> > ----- Original Message -----
> > 
> > > > Michael
> > > 
> > > OK, so it would seem to be somewhere in the trail from enumerator_value()
> > > into the gdb_command_funnel() via the GNU_GET_DATATYPE req->command in
> > > datatype_info().
> > > 
> > > I'm having a hard time provisioning an s390x machine internally at this time.
> > > When I get one, I'll see if the problem has been there all along.
> > > 
> > > Dave
> > > 
> > 
> > Michael,
> > 
> > I finally got access to an s390x and I see the same thing.  In fact it also
> > happens on a RHEL6 2.6.32-based kernel, so it appears it's always been an
> > issue.  I'll look into it tomorrow.
> 
> Great, thanks!
> 
> Michael
> 
The problem is in gdb-7.6/gdb/symtab.c, in the eval_enum() function that
gets created/added by the crash utility's gdb-7.6.patch: 
static void
eval_enum(struct type *type, struct gnu_request *req)
{
        register int i;
        int len;
        int lastval;
        len = TYPE_NFIELDS (type);
        lastval = 0;
        for (i = 0; i < len; i++) {
                if (lastval != TYPE_FIELD_BITPOS (type, i)) {
                        lastval = TYPE_FIELD_BITPOS (type, i);
                }
                if (STREQ(TYPE_FIELD_NAME(type, i), req->name)) {
                        req->tagname = "(unknown)";
                        req->value = lastval;
                        return;
                }
                lastval++;
        }
}
The function is only used for anonymous (nameless) enums like MM_ANONPAGES.
It simply walks through the enumerator fields and looks for a matching string,
having captured the value of each enumerator in "lastval".  This function hasn't
changed, and has simply been forward-ported with each upgrade of gdb.  What has
changed, though, is gdb itself between gdb-7.3.1 and gdb-7.6, which added a new
TYPE_FIELD_ENUMVAL() macro, which should replace the two instances of 
TYPE_FIELD_BITPOS() above.
I'm thinking that this is an endian issue, where the generic TYPE_FIELD_BITPOS()
macro works OK for little-endian machines, and probably has always failed for big-endian
machines.  I'm provisioning a big-endian ppc64 to verify that.  (Although I'm not sure
what macro should have been used in the older gdb versions that didn't have a
TYPE_FIELD_ENUMVAL()?)
Anyway, that's the resolution -- I'll be updating the gdb-7.6.patch upstream later today
or tomorrow.
Dave
                                
                         
                        
                                
                                10 years, 2 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                        
                                
                                
                                        
                                                
                                        
                                        
                                        Wrong RSS  field in ps
                                
                                
                                
                                    
                                        by Michael Holzheu
                                    
                                
                                
                                        Hello Dave,
On s390 (kernel 4.2.0-rc2) the "RSS" field in "ps" is wrong.
The reason is that in rss_page_types_init() enumerator_value("MM_ANONPAGES",
&anonpages) returns zero for "anonpages" and therefore we add MM_FILEPAGES
twice instead of adding MM_ANONPAGES.
Example: Process that allocated 500 MB:
ps 2152
   PID    PPID  CPU       TASK        ST  %MEM     VSZ    RSS  COMM
   2152   1061   4      7aff0000      IN   0.0  514024   2236  eat_mem
crash> print/x ((struct task_struct *) 0x7aff0000)->mm->rss_stat
$1 = {
  count = {{
      counter = 0x113
    }, {
      counter = 0x1f414
    }, {
      counter = 0x0
    }}
}
Any idea why enumerator_value() is not working?
Michael
                                
                         
                        
                                
                                10 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                        
                                
                                
                                        
                                                
                                        
                                        
                                        Re: [Crash-utility] [PATCH]remind of using --zero_exlcuded
                                
                                
                                
                                    
                                        by Dave Anderson
                                    
                                
                                
                                        
    
Zhou,
This seems reasonable.  I may reword it slightly, but I won't be checking it in until I return from vacation.
Thanks,  Dave
Sent from my Verizon Wireless 4G LTE smartphone
-------- Original message --------
From: "Zhou, Wenjian/周文剑" <zhouwj-fnst(a)cn.fujitsu.com> 
Date: 07/21/2015  1:14 AM  (GMT-07:00) 
To: "Discussion list for crash utility usage, maintenance and development" <crash-utility(a)redhat.com> 
Subject: [Crash-utility] [PATCH]remind of using --zero_exlcuded 
The option --zero_excluded can be used to analyze the incomplete
dumpfile.
So it is needed to remind users of using it when trying to analyze
incomplete dumpfile without --zero_excluded.
-- 
Thanks
Zhou Wenjian
--
Crash-utility mailing list
Crash-utility(a)redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility
                                
                         
                        
                                
                                10 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                        
                                
                                
                                        
                                                
                                        
                                        
                                        Crash fails to analyse kernel dump from 4.2-rc4
                                
                                
                                
                                    
                                        by Qu Wenruo
                                    
                                
                                
                                        Hi all,
I updated my kernel to 4.2-rc4 but suddenly crash failed to analyse the 
kernel with the following error message:
------
crash: invalid kernel virtual address: 170000001d  type: "possible"
WARNING: cannot read cpu_possible_map
crash: invalid kernel virtual address: 3800000046  type: "present"
WARNING: cannot read cpu_present_map
crash: invalid kernel virtual address: 240000002d  type: "online"
WARNING: cannot read cpu_online_map
crash: invalid kernel virtual address: 570000006e  type: "active"
WARNING: cannot read cpu_active_map
crash: cannot determine base kernel version
crash: /home/adam/linux-btrfs/vmlinux and vmcore do not match!
------
Crash 7.1.0 and 7.1.2 fails with the same error.
Previously I'm testing 4.2-rc1 kernel and 7.1.0 works like a charm.
So is there something wrong with the new kernel?
Thanks,
Qu
                                
                         
                        
                                
                                10 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        Re: [Crash-utility] crash extensions help
                                
                                
                                
                                    
                                        by Dave Anderson
                                    
                                
                                
                                        
----- Original Message -----
> Hi Dave,
> 
> I found you from https://www.redhat.com/mailman/listinfo/crash-utility.
> I was really impressed with your contribution to the crash utility.
> I tried subscribing to the crash utility mailing list. But no luck.
Sorry about that -- I was on vacation for a couple of weeks until today.
You're on the list now.
> 
> I started writing the crash extensions,
> I am unable to read the all the members of task_struct  and mm_struct structures.
> I am trying to read mm->hiwater_vm, the defs.h header file do not have
> struct_mm_hiwater_mm variable as a offset_table member.
> 
> Could you please suggest me the ways that I could read all the members of
> the task_struct and mm_struct.
> Thanks in Advance.
> 
> --
>   Thanks & Regards
>   Rupesh P
For structure members that are not in the built-in offset_table, you can just 
use MEMBER_OFFSET() directly, for example:
  MEMBER_OFFSET("mm_struct", "hiwater_rss")
which returns a long offset value.  But be aware that it returns -1 on a failure,
so you should check for success/failure before using the return value.
The built-in OFFSET() macro verifies that the the pre-initialized offset value
read by MEMBER_OFFSET() and stored in the offset_table is not a -1, and if it is, 
it will generate a FATAL error.  In an extension module, it's up to you to do the
verification.
Dave
 
                                
                         
                        
                                
                                10 years, 3 months
                        
                        
                 
         
 
        
            
        
        
        
                
                        
                                
                                 
                                        
                                
                         
                        
                                
                                
                                        
                                                
                                        
                                        
                                        [PATCH] extensions/trace: max_buffer is optional
                                
                                
                                
                                    
                                        by Rabin Vincent
                                    
                                
                                
                                        max_buffer is optional in the kernel (depends on the
CONFIG_TRACE_MAX_TRACE option).  Don't fail if it isn't available.
---
 extensions/trace.c |   14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/extensions/trace.c b/extensions/trace.c
index 9f81568..c269f4c 100644
--- a/extensions/trace.c
+++ b/extensions/trace.c
@@ -34,6 +34,10 @@ static int encapsulated_current_trace;
  * trace_buffer is supported
  */
 static int trace_buffer_available;
+/*
+ * max_buffer is supported
+ */
+static int max_buffer_available;
 
 #define koffset(struct, member) struct##_##member##_offset
 
@@ -163,8 +167,10 @@ static int init_offsets(void)
 
 	if (trace_buffer_available) {
 		init_offset(trace_array, trace_buffer);
-		init_offset(trace_array, max_buffer);
 		init_offset(trace_buffer, buffer);
+
+		if (max_buffer_available)
+			init_offset(trace_array, max_buffer);
 	} else {
 		init_offset(trace_array, buffer);
 	}
@@ -448,6 +454,9 @@ out_fail:
 static int ftrace_int_max_tr_trace(void)
 {
 	if (trace_buffer_available) {
+		if (!max_buffer_available)
+			return 0;
+
 		global_max_buffer = global_trace + koffset(trace_array, max_buffer);
 		read_value(max_tr_ring_buffer, global_max_buffer, trace_buffer, buffer);
 	} else {
@@ -528,6 +537,9 @@ static int ftrace_init(void)
 
 	if (MEMBER_EXISTS("trace_array", "trace_buffer")) {
 		trace_buffer_available = 1;
+
+		if (MEMBER_EXISTS("trace_array", "max_buffer"))
+			max_buffer_available = 1;
 	} else {
 		sym_max_tr_trace = symbol_search("max_tr");
 		if (sym_max_tr_trace == NULL)
-- 
1.7.10.4
                                
                         
                        
                                
                                10 years, 3 months