----- "Michael Holzheu" <holzheu(a)linux.vnet.ibm.com> wrote:
 Hi Dave,
 
 I got an s390x dump of a Linux 2.6.36 system, where a task (kmcheck, pid=44) is
 missing in the ps output. I debugged the problem and I think that I found the
 reason:
 
 It looks like that crash does not walk the linked list of the pid hash table
 to the end, if it finds a NULL pointer in the pid.tasks[PIDTYPE_PID=0]
 array. Unfortunately, for the struct pid that is before our lost task in the
 linked list this condition is true. Therefore crash does not find our task. 
That sounds similar to the fix Bob Montgomery made in 5.0.7:
     - Fix for the potential to miss one or more tasks in 2.6.23 and earlier
       kernels, presumably due to catching an entry the kernel's pid_hash[]
       chain in transition.  Without the patch, the task will simply not be
       seen in the gathered task list.
       (bob.montgomery(a)hp.com)
where this was his patch posting -- which fixed refresh_hlist_task_table_v2():
  [Crash-utility] Missing PID 1 is crash problem with losing tasks
  
https://www.redhat.com/archives/crash-utility/2010-August/msg00049.html
and where your patch fixes refresh_hlist_task_table_v3().
I'll give it a test run...
Thanks,
  Dave
 
 The attached patch seems to fix this problem.
 
 Here my crash debug log with the 2.6.36 dump:
 ---------------------------------------------
 Task "kmcheck" is in hash slot 2941 in the linked list at position 2:
 
 crash> print pid_hash[2941]
 $4 = {
   first = 0x3f5fb7f8
 }
 
 crash> upid
 struct upid {
     int nr;
     struct pid_namespace *ns;
     struct hlist_node pid_chain;
 }
 SIZE: 32
 
 crash> upid.pid_chain
 struct upid {
   [16] struct hlist_node pid_chain;
 }
 
 crash> eval 0x3f5fb7f8 - 16
 hexadecimal: 3f5fb7e8  
 
 crash> upid 3f5fb7e8   <<<<---- the first upid in the list
 struct upid {
   nr = 565, 
   ns = 0x81d8f8, 
   pid_chain = {
     next = 0x3edea2b0, 
     pprev = 0x96554e8
   }
 }
 
 crash> pid
 struct pid {
     atomic_t count;
     unsigned int level;
     struct hlist_head tasks[3];
     struct rcu_head rcu;
     struct upid numbers[1];
 }
 SIZE: 80
 
 crash> pid.numbers
 struct pid {
   [48] struct upid numbers[1];
 }
 
 crash> eval 3f5fb7e8 - 48
 hexadecimal: 3f5fb7b8  
 
 crash> pid 3f5fb7b8
 struct pid {
   count = {
     counter = 1
   }, 
   level = 0, 
   tasks = {{
       first = 0x0 <<<----------- tasks[0] is NULL
     }, {
       first = 0x3d488620
     }, {
       first = 0x0
     }}, 
   rcu = {
     next = 0x5a5a5a5a5a5a5a5a, 
     func = 0x5a5a5a5a5a5a5a5a
   }, 
   numbers = {{
       nr = 565, 
       ns = 0x81d8f8, 
       pid_chain = {
         next = 0x3edea2b0,  <<<--------- Pointer to second element in
 list
         pprev = 0x96554e8
       }
     }}
 }
 
 crash> eval 0x3edea2b0 - 16
 hexadecimal: 3edea2a0   <<<-- The second upid in the list
 
 crash> upid 0x3edea2a0
 struct upid {
   nr = 44,                 <<<--- Our missing pid=44 (kmcheck)
   ns = 0x81d8f8, 
   pid_chain = {
     next = 0x0, 
     pprev = 0x3f5fb7f8
   }
 }
 
 crash> eval 0x3edea2a0 - 48
 hexadecimal: 3edea270  
 
 crash> pid 3edea270
 struct pid {
   count = {
     counter = 5
   }, 
   level = 0, 
   tasks = {{
       first = 0x3e799908   <<<--- Pointer to our task_struct.pids
     }, {
       first = 0x0
     }, {
       first = 0x0
     }}, 
   rcu = {
     next = 0x5a5a5a5a5a5a5a5a, 
     func = 0x5a5a5a5a5a5a5a5a
   }, 
   numbers = {{
       nr = 44, 
       ns = 0x81d8f8, 
       pid_chain = {
         next = 0x0, 
         pprev = 0x3f5fb7f8
       }
     }}
 }
 
 crash> task_struct.pids
 struct task_struct {
    [712] struct pid_link pids[3];
 }
 
 crash> eval 0x3e799908 - 712
 hexadecimal: 3e799640  
 
 crash> task_struct 3e799640 | grep comm
   comm = "kmcheck\000\000\000\000\000\000\000\000", <<<--- here it is
 ---
  task.c |    4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 --- a/task.c
 +++ b/task.c
 @@ -2006,7 +2006,7 @@ do_chained:
                  }
  
  		if (pid_tasks_0 == 0)
 -			continue;
 +			goto chain_next;
  
  		next = pid_tasks_0 - OFFSET(task_struct_pids);
  
 @@ -2042,7 +2042,7 @@ do_chained:
  		}
  
  		cnt++;
 -
 +chain_next:
  		if (pnext) {
  			kpp = pnext;
  			upid = pnext - OFFSET(upid_pid_chain);
 
 
 --
 Crash-utility mailing list
 Crash-utility(a)redhat.com
 
https://www.redhat.com/mailman/listinfo/crash-utility