The RU/TASK_RUNNING stat means the task is runnable.
It is either currently running or on a run queue waiting to run.
Currently, the crash tool uses the "rq_clock - sched_info->last_arrival"
formula to
calculate the duration of task in RU state. This is for the scenario of a task running on
a CPU.
But for the scenario of a task waiting in the CPU run queue (due to some reason
for example cfs/rt queue throttled), this formula could cause misunderstanding.
For example:
[ 220 10:36:38.026] [RU] PID: 12345 TASK: ffff8d674ab6b180 CPU: 1 COMMAND:
"task"
Looking closer:
crash> rq.clock ffff8de438a5acc0
clock = 87029229985307234,
crash> task -R sched_info,se.exec_start
PID: 12345 TASK: ffff8d674ab6b180 CPU: 1 COMMAND: "task"
sched_info = {
pcount = 33,
run_delay = 0,
last_arrival = 67983031958439673,
last_queued = 87029224561119369
},
se.exec_start = 67983031958476937,
67983031 67983031 87029224 87029229
|<- running on CPU ->| <- IN ->|<- waiting in queue
->|
For this scenario, the "task" was waiting in the run queue of the CPU only for 5
seconds,
we should use the "rq_clock - sched_info->last_queued" formula.
We can trust sched_info->last_queued as it is only set when the task enters the CPU run
queue.
Furthermore, when the task hits/runs on a CPU or dequeues the CPU run queue, it will be
reset to 0.
Therefore, my idea is simple:
If a task in RU stat and sched_info->last_queued has value (!= 0),
it means this task is waiting in the run queue, use "rq_clock -
sched_info->last_queued".
Otherwise, if a task in RU stat and sched_info->last_queued = 0
and sched_info->last_arrival has value (it must be), it means this task is running on
the CPU,
use "rq_clock - sched_info->last_arrival".
Signed-off-by: Kenneth Yin <kyin(a)redhat.com>
---
defs.h | 1 +
symbols.c | 2 ++
task.c | 21 +++++++++++++++------
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/defs.h b/defs.h
index 4cf169c..66f5ce4 100644
--- a/defs.h
+++ b/defs.h
@@ -1787,6 +1787,7 @@ struct offset_table { /* stash of commonly-used
offsets */
long vcpu_struct_rq;
long task_struct_sched_info;
long sched_info_last_arrival;
+ long sched_info_last_queued;
long page_objects;
long kmem_cache_oo;
long char_device_struct_cdev;
diff --git a/symbols.c b/symbols.c
index e30fafe..fb5035f 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9930,6 +9930,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sched_rt_entity_run_list));
fprintf(fp, " sched_info_last_arrival: %ld\n",
OFFSET(sched_info_last_arrival));
+ fprintf(fp, " sched_info_last_queued: %ld\n",
+ OFFSET(sched_info_last_queued));
fprintf(fp, " task_struct_thread_info: %ld\n",
OFFSET(task_struct_thread_info));
fprintf(fp, " task_struct_stack: %ld\n",
diff --git a/task.c b/task.c
index 3bafe79..f5386ac 100644
--- a/task.c
+++ b/task.c
@@ -332,9 +332,12 @@ task_init(void)
MEMBER_OFFSET_INIT(task_struct_last_run, "task_struct",
"last_run");
MEMBER_OFFSET_INIT(task_struct_timestamp, "task_struct",
"timestamp");
MEMBER_OFFSET_INIT(task_struct_sched_info, "task_struct",
"sched_info");
- if (VALID_MEMBER(task_struct_sched_info))
+ if (VALID_MEMBER(task_struct_sched_info)) {
MEMBER_OFFSET_INIT(sched_info_last_arrival,
"sched_info", "last_arrival");
+ MEMBER_OFFSET_INIT(sched_info_last_queued,
+ "sched_info", "last_queued");
+ }
if (VALID_MEMBER(task_struct_last_run) ||
VALID_MEMBER(task_struct_timestamp) ||
VALID_MEMBER(sched_info_last_arrival)) {
@@ -6035,7 +6038,7 @@ ulonglong
task_last_run(ulong task)
{
ulong last_run;
- ulonglong timestamp;
+ ulonglong timestamp,last_queued;
timestamp = 0;
fill_task_struct(task);
@@ -6047,10 +6050,16 @@ task_last_run(ulong task)
} else if (VALID_MEMBER(task_struct_timestamp))
timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
OFFSET(task_struct_timestamp)) : 0;
- else if (VALID_MEMBER(sched_info_last_arrival))
- timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
- OFFSET(task_struct_sched_info) +
- OFFSET(sched_info_last_arrival)) : 0;
+ else if (VALID_MEMBER(sched_info_last_queued))
+ last_queued = ULONGLONG(tt->task_struct +
+ OFFSET(task_struct_sched_info) +
+ OFFSET(sched_info_last_queued));
+ if (last_queued != 0) {
+ timestamp = tt->last_task_read ? last_queued : 0;
+ } else if (VALID_MEMBER(sched_info_last_arrival))
+ timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
+ OFFSET(task_struct_sched_info) +
+ OFFSET(sched_info_last_arrival)) : 0;
return timestamp;
}
--
2.31.1