For dumps with large task count, ps spends most its time in the
following stack:
task_to_context
task_to_pid
show_ps_data
show_ps
cmd_ps
exec_command
main_loop
captured_command_loop
catch_errors
captured_main
catch_errors
gdb_main_entry
gdb_main_loop
main
The above shows use of task_to_pid on each process. task_to_context is
O(N), thus ps is O(N^2).
Reduce task_to_context to O(N*log(N)) by using a binary search.
On a 1M task dump, this reduces ps time 45m => 17s.
Signed-off-by: Greg Thelen <gthelen(a)google.com>
---
defs.h | 2 ++
task.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 60 insertions(+), 6 deletions(-)
diff --git a/defs.h b/defs.h
index 3a3b6f66573c..15346f01fd4e 100644
--- a/defs.h
+++ b/defs.h
@@ -811,6 +811,7 @@ struct tgid_context { /* tgid and task stored for each
task */
struct task_table { /* kernel/local task table data */
struct task_context *current;
struct task_context *context_array;
+ struct task_context **context_by_task; /* task_context sorted by task addr */
void (*refresh_task_table)(void);
ulong flags;
ulong task_start;
@@ -871,6 +872,7 @@ struct task_table { /* kernel/local task table
data */
#define START_TIME_NSECS (0x8000)
#define THREAD_INFO_IN_TASK (0x10000)
#define PID_RADIX_TREE (0x20000)
+#define INDEXED_CONTEXTS (0x40000)
#define TASK_SLUSH (20)
diff --git a/task.c b/task.c
index cddc1f5b651a..5ad77408530d 100644
--- a/task.c
+++ b/task.c
@@ -783,6 +783,10 @@ allocate_task_space(int cnt)
malloc(cnt * sizeof(struct task_context))))
error(FATAL, "cannot malloc context array (%d tasks)",
cnt);
+ if (!(tt->context_by_task = (struct task_context **)
+ malloc(cnt * sizeof(struct task_context*))))
+ error(FATAL, "cannot malloc context_by_task array (%d
tasks)",
+ cnt);
if (!(tt->tgid_array = (struct tgid_context *)
malloc(cnt * sizeof(struct tgid_context))))
error(FATAL, "cannot malloc tgid array (%d tasks)",
@@ -802,6 +806,13 @@ allocate_task_space(int cnt)
"%scannot realloc context array (%d tasks)",
(pc->flags & RUNTIME) ? "" : "\n", cnt);
+ if (!(tt->context_by_task = (struct task_context **)
+ realloc(tt->context_by_task,
+ cnt * sizeof(struct task_context*))))
+ error(FATAL,
+ "%scannot realloc context_by_task array (%d
tasks)",
+ (pc->flags & RUNTIME) ? "" :
"\n", cnt);
+
if (!(tt->tgid_array = (struct tgid_context *)
realloc(tt->tgid_array,
cnt * sizeof(struct tgid_context))))
@@ -2700,6 +2711,7 @@ add_context(ulong task, char *tp)
if (has_cpu && (tt->flags & POPULATE_PANIC))
tt->panic_threads[tc->processor] = tc->task;
+ tt->flags &= ~INDEXED_CONTEXTS;
tt->running_tasks++;
return tc;
}
@@ -2747,6 +2759,33 @@ refresh_context(ulong curtask, ulong curpid)
}
}
+static int
+sort_by_task(const void *arg1, const void *arg2)
+{
+ const struct task_context *t1, *t2;
+
+ t1 = *(const struct task_context **)arg1;
+ t2 = *(const struct task_context **)arg2;
+
+ if (t1->task == t2->task)
+ return 0;
+
+ return (t1->task < t2->task) ? -1 : 1;
+}
+
+/* sort context_by_task by task address */
+static void
+sort_context_by_task(void)
+{
+ int i;
+
+ for (i = 0; i < tt->running_tasks; i++)
+ tt->context_by_task[i] = &tt->context_array[i];
+ qsort(tt->context_by_task, tt->running_tasks,
+ sizeof(*tt->context_by_task), sort_by_task);
+ tt->flags |= INDEXED_CONTEXTS;
+}
+
/*
* Sort the task_context array by PID number; for PID 0, sort by processor.
*/
@@ -2759,6 +2798,8 @@ sort_context_array(void)
qsort((void *)tt->context_array, (size_t)tt->running_tasks,
sizeof(struct task_context), sort_by_pid);
set_context(curtask, NO_PID);
+
+ sort_context_by_task();
}
static int
@@ -2804,6 +2845,8 @@ sort_context_array_by_last_run(void)
qsort((void *)tt->context_array, (size_t)tt->running_tasks,
sizeof(struct task_context), sort_by_last_run);
set_context(curtask, NO_PID);
+
+ sort_context_by_task();
}
/*
@@ -4640,15 +4683,24 @@ task_exists(ulong task)
struct task_context *
task_to_context(ulong task)
{
- int i;
- struct task_context *tc;
+ struct task_context key, *tc, **found;
+ int i;
+
+ /* Binary search the context_by_task array. */
+ if (tt->flags & INDEXED_CONTEXTS) {
+ key.task = task;
+ tc = &key;
+ found = bsearch(&tc, tt->context_by_task, tt->running_tasks,
+ sizeof(*tt->context_by_task), sort_by_task);
+ return found ? *found : NULL;
+ }
tc = FIRST_CONTEXT();
- for (i = 0; i < RUNNING_TASKS(); i++, tc++)
+ for (i = 0; i < RUNNING_TASKS(); i++, tc++)
if (tc->task == task)
- return tc;
-
- return NULL;
+ return tc;
+
+ return NULL;
}
/*
--
2.17.0.484.g0c8726318c-goog