>From da7a8b539bc82cb3f455465718221c09b53ac256 Mon Sep 17 00:00:00 2001 From: panfengyun Date: Mon, 25 Aug 2014 15:36:27 +0800 Subject: [PATCH] improve ps performance When using ps command in the system which contains numerous tasks, especilly RHEL 7, it will spend too much time to get the data of tasks. we collect an output of ps command as part of summary information of crash dump, and in catastrophic case, crash dump could have a huge number of tasks due to the bug causing the system panic. But it's a problem if ps command needs to spend several hours to finish execution. After investigation, what we found is, because of the following kernel commit and a fix in crash 7.0.4, we need a lot time to wait for ps command to calculate rss. kernel commit: commit 34e55232e59f7b19050267a05ff1226e5cd122a5 Author: KAMEZAWA Hiroyuki Date: Fri Mar 5 13:41:40 2010 -0800 mm: avoid false sharing of mm_counter ... crash fix: 7.0.4 - Fix for the "ps" command's display of per-task RSS and %MEM values in Linux 2.6.34 and later kernels in which SPLIT_RSS_COUNTING is enabled. Without the patch, the values are only taken from each task's mm_struct.rss_stat structure, which may contain stale values because they may not be synchronized with the RSS values stored in each per-thread task_struct.rss_stat structure; this may lead to invalid or slightly low RSS values, and worst-case, the %MEM value may show garbage percentage values. (vinayakm.list@gmail.com) So: 1. we fix rss calculation in get_task_mem_usage() to improve performance of ps command. 2. And we also fix the part of getting enumerators MM_FILEPAGES and MM_ANONPAGES which also speeds up ps command. The 1st fix improves the performance of ps mostly. we also have some tests. please check the following data: OS process number without fix fix all 3 parts --------------------------------------------------------------------- RHEL6 ~100k ~180s ~70s RHEL7 ~100k ~51870s(~14h) ~63s Signed-off-by: panfengyun --- crash-7.0.7/defs.h | 11 +++++++ crash-7.0.7/main.c | 1 + crash-7.0.7/memory.c | 65 ++++++++++++++++++++++++++++++++++++-------- crash-7.0.7/task.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 12 deletions(-) diff --git a/crash-7.0.7/defs.h b/crash-7.0.7/defs.h index 44df6ae..5ff3bf7 100755 --- a/crash-7.0.7/defs.h +++ b/crash-7.0.7/defs.h @@ -758,13 +758,23 @@ struct task_context { /* context stored for each task */ int processor; ulong ptask; ulong mm_struct; + ulong tgid; + ulong tgid_task_context_index; struct task_context *tc_next; }; +struct tgid_task_context{ /* context and tgid stored for each task */ + ulong tgid; + struct task_context *tc; +}; + struct task_table { /* kernel/local task table data */ struct task_context *current; struct task_context *context_array; + struct tgid_task_context *ttc_array; void (*refresh_task_table)(void); + long filepages; + long anonpages; ulong flags; ulong task_start; ulong task_end; @@ -4834,6 +4844,7 @@ ulong generic_get_stackbase(ulong); ulong generic_get_stacktop(ulong); void dump_task_table(int); void sort_context_array(void); +void set_tgid_context_array(void); int in_irq_ctx(ulonglong, int, ulong); /* diff --git a/crash-7.0.7/main.c b/crash-7.0.7/main.c index b0524d2..0728d74 100755 --- a/crash-7.0.7/main.c +++ b/crash-7.0.7/main.c @@ -773,6 +773,7 @@ reattempt: } else if (!(pc->flags & MINIMAL_MODE)) { tt->refresh_task_table(); sort_context_array(); + set_tgid_context_array(); } } if (!STREQ(pc->curcmd, pc->program_name)) diff --git a/crash-7.0.7/memory.c b/crash-7.0.7/memory.c index c97dd39..0abaaad 100755 --- a/crash-7.0.7/memory.c +++ b/crash-7.0.7/memory.c @@ -225,6 +225,7 @@ static int next_module_vaddr(ulong, ulong *); static int next_identity_mapping(ulong, ulong *); static int vm_area_page_dump(ulong, ulong, ulong, ulong, ulong, struct reference *); +void set_pages(void); static int dump_swap_info(ulong, ulong *, ulong *); static void swap_info_init(void); static char *get_swapdev(ulong, char *); @@ -1066,6 +1067,7 @@ vm_init(void) page_flags_init(); + set_pages(); vt->flags |= VM_INIT; } @@ -4072,6 +4074,31 @@ in_user_stack(ulong task, ulong vaddr) } /* + * set the const value of filepages and anonpages + * according to MM_FILEPAGES and MM_ANONPAGES. + */ +void +set_pages(void) +{ + long anonpages, filepages; + if (VALID_MEMBER(mm_struct_rss)) + return; + if (VALID_MEMBER(mm_struct_rss_stat)) + { + if (!enumerator_value("MM_FILEPAGES", &filepages) || + !enumerator_value("MM_ANONPAGES", &anonpages)) + { + filepages = 0; + anonpages = 1; + } + tt->filepages = filepages; + tt->anonpages = anonpages; + } else { + return; + } +} + +/* * Fill in the task_mem_usage structure with the RSS, virtual memory size, * percent of physical memory being used, and the mm_struct address. */ @@ -4108,11 +4135,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) if (VALID_MEMBER(mm_struct_rss_stat)) { long anonpages, filepages; - if (!enumerator_value("MM_FILEPAGES", &filepages) || - !enumerator_value("MM_ANONPAGES", &anonpages)) { - filepages = 0; - anonpages = 1; - } + anonpages = tt->anonpages; + filepages = tt->filepages; rss += LONG(tt->mm_struct + OFFSET(mm_struct_rss_stat) + OFFSET(mm_rss_stat_count) + @@ -4125,16 +4149,30 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) /* Check whether SPLIT_RSS_COUNTING is enabled */ if (VALID_MEMBER(task_struct_rss_stat)) { - int i, sync_rss; + int sync_rss; ulong tgid; struct task_context *tc1; + struct tgid_task_context *ttc_array, *ttc, *start, *end; - tgid = task_tgid(task); - - tc1 = FIRST_CONTEXT(); - for (i = 0; i < RUNNING_TASKS(); i++, tc1++) { - if (task_tgid(tc1->task) != tgid) - continue; + ttc_array = tt->ttc_array; + tgid = tc->tgid; + ttc = ttc_array + tc->tgid_task_context_index; + start = ttc; + if (tc->tgid_task_context_index > 0) + { + while ((start > ttc_array) && ((start -1 )->tgid == start->tgid)) + start--; + } + end = ttc; + if (tc->tgid_task_context_index < RUNNING_TASKS()) + { + while ((end < (ttc_array + (RUNNING_TASKS() - 1))) && + (end->tgid == (end + 1)->tgid)) + end++; + } + for (;start <= end;) + { + tc1 = start->tc; /* count 0 -> filepages */ if (!readmem(tc1->task + @@ -4160,6 +4198,9 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) continue; rss += sync_rss; + if(start == (ttc_array + (RUNNING_TASKS() - 1))) + break; + start++; } } diff --git a/crash-7.0.7/task.c b/crash-7.0.7/task.c index 75b1964..5016afb 100755 --- a/crash-7.0.7/task.c +++ b/crash-7.0.7/task.c @@ -19,6 +19,7 @@ static ulong get_panic_context(void); static int sort_by_pid(const void *, const void *); +static int sort_by_tgid(const void *, const void *); static void show_ps(ulong, struct psinfo *); static struct task_context *panic_search(void); static void allocate_task_space(int); @@ -493,6 +494,7 @@ task_init(void) } sort_context_array(); + set_tgid_context_array(); if (pc->flags & SILENT) initialize_task_state(); @@ -639,6 +641,11 @@ allocate_task_space(int cnt) malloc(cnt * sizeof(struct task_context)))) error(FATAL, "cannot malloc context array (%d tasks)", cnt); + if (!(tt->ttc_array = (struct tgid_task_context *) + malloc(cnt * sizeof(struct tgid_task_context)))) + error(FATAL, "cannot malloc ttc array (%d tasks)", + cnt); + } else { if (!(tt->task_local = (void *) realloc(tt->task_local, cnt * sizeof(void *)))) @@ -652,6 +659,13 @@ allocate_task_space(int cnt) error(FATAL, "%scannot realloc context array (%d tasks)", (pc->flags & RUNTIME) ? "" : "\n", cnt); + + if (!(tt->ttc_array = (struct tgid_task_context *) + realloc(tt->ttc_array, + cnt * sizeof(struct tgid_task_context)))) + error(FATAL, + "%scannot realloc ttc array (%d tasks)", + (pc->flags & RUNTIME) ? "" : "\n", cnt); } } @@ -2281,6 +2295,7 @@ store_context(struct task_context *tc, ulong task, char *tp) int *processor_addr; ulong *parent_addr; ulong *mm_addr; + ulong *tgid; int has_cpu; int do_verify; @@ -2320,6 +2335,7 @@ store_context(struct task_context *tc, ulong task, char *tp) else parent_addr = (ulong *)(tp + OFFSET(task_struct_parent)); mm_addr = (ulong *)(tp + OFFSET(task_struct_mm)); + tgid = (ulong *)(tp + OFFSET(task_struct_tgid)); has_cpu = task_has_cpu(task, tp); tc->pid = (ulong)(*pid_addr); @@ -2327,6 +2343,7 @@ store_context(struct task_context *tc, ulong task, char *tp) tc->processor = *processor_addr; tc->ptask = *parent_addr; tc->mm_struct = *mm_addr; + tc->tgid = *tgid; tc->task = task; tc->tc_next = NULL; @@ -2445,6 +2462,54 @@ sort_context_array_by_last_run(void) } /* + * Set the tgid_task_context array by tgid number. + */ +void +set_tgid_context_array(void) +{ + ulong i; + struct task_context *tc; + struct tgid_task_context *ttc; + + if (VALID_MEMBER(mm_struct_rss) || (!VALID_MEMBER(task_struct_rss_stat))) + return; + /* + * init tgid task context array by task context tc. + */ + tc = FIRST_CONTEXT(); + ttc = tt->ttc_array; + for (i = 0; i< tt->running_tasks; i++,tc++,ttc++) + { + ttc->tgid = tc->tgid; + ttc->tc = tc; + } + + qsort((void *)tt->ttc_array, (size_t)tt->running_tasks, + sizeof(struct tgid_task_context), sort_by_tgid); + + /* + * record the index of tc in tgid task context array + */ + ttc = tt->ttc_array; + for (i = 0; i < tt->running_tasks; i++, ttc++) + { + ttc->tc->tgid_task_context_index = i; + } +} + +static int +sort_by_tgid(const void *arg1, const void *arg2) +{ + struct tgid_task_context *t1, *t2; + + t1 = (struct tgid_task_context *)arg1; + t2 = (struct tgid_task_context *)arg2; + + return (t1->tgid < t2->tgid ? -1 : + t1->tgid == t2->tgid ? 0 : 1); +} + +/* * Keep a stash of the last task_struct accessed. Chances are it will * be hit several times before the next task is accessed. */ @@ -2916,7 +2981,10 @@ cmd_ps(void) cmd_usage(pc->curcmd, SYNOPSIS); if (flag & (PS_LAST_RUN|PS_MSECS)) + { sort_context_array_by_last_run(); + set_tgid_context_array(); + } else if (psinfo.cpus) { error(INFO, "-C option is only applicable with -l and -m\n"); goto bailout; @@ -5966,7 +6034,10 @@ foreach(struct foreach_data *fd) } } if (fd->flags & (FOREACH_l_FLAG|FOREACH_m_FLAG)) + { sort_context_array_by_last_run(); + set_tgid_context_array(); + } if ((fd->flags & FOREACH_m_FLAG) && INVALID_MEMBER(rq_timestamp)) option_not_supported('m'); @@ -6655,6 +6726,7 @@ dump_task_table(int verbose) fprintf(fp, " task_start: %lx\n", tt->task_start); fprintf(fp, " task_end: %lx\n", tt->task_end); fprintf(fp, " task_local: %lx\n", (ulong)tt->task_local); + fprintf(fp, " ttc_array: %lx\n", (ulong)tt->ttc_array); fprintf(fp, " max_tasks: %d\n", tt->max_tasks); fprintf(fp, " nr_threads: %d\n", tt->nr_threads); fprintf(fp, " running_tasks: %ld\n", tt->running_tasks); -- 1.7.1