[PATCH] arm64: Set VA_BITS_ACTUAL to CONFIG_ARM64_VA_BITS
by Hong YANG 杨红
Load CONFIG_ARM64_VA_BITS from vmlinux and set to VA_BITS_ACTUAL.
arm64_calc_VA_BITS() can't find correct VA_BITS for a qemu arm64
ramdump file:
1. The 'vabits_actual' symbol doesn't exist
2. There is no VA_BITS in vmcoreinfo
3. No TCR info (from vmcoreinfo)
4. The VA_BITS_ACTUAL not specified from command line arguements
5. The last code try to cacl it from the symboys address such as
swapper_pg_dir which got a wrong VA_BITS which mismatch with
the CONFIG_ARM64_VA_BITS
Thus we try to parse CONFIG_ARM64_VA_BITS from vmlinux before
symtab_init() which would mess with the _stext_vmlinux = UNINITIALIZED
condition.
Using kernel_config_data_end symbol to load config data when available
instead of seaching the magic end string, also restructure related code.
Even further, we can always read kernel config from vmlinux, and we
can verify if the config in vmlinux match with the config in the
core file, which also indicate if the vmlinux match with the core file.
Test:
1. Build an ARM64 kernel with CONFIG_ARM64_VA_BITS set to 39
2. Run the kernel in qemu and capture a ramdump
(enter qmeu console by ctrl+a c, enter dump-guest-memory -z /home/abc/ramdump.elf)
3. Load the ramdump with crash
before:
crash can't load the dump due to VA_BITS set to 38 by arm64_calc_VA_BITS()
after:
crash set VA_BITS_ACTUAL to CONFIG_ARM64_VA_BITS (39) thus it works
fine
Signed-off-by: Hong YANG <hong.yang3(a)nio.com>
---
arm64.c | 7 +
defs.h | 5 +
kernel.c | 434 ++++++++++++++++++++++++++++++++++--------------------
main.c | 1 +
symbols.c | 2 +-
5 files changed, 291 insertions(+), 158 deletions(-)
diff --git a/arm64.c b/arm64.c
index ef4a2b8..4682d08 100644
--- a/arm64.c
+++ b/arm64.c
@@ -322,6 +322,13 @@ arm64_init(int when)
ms = machdep->machspec;
+ /* st->CONFIG_ARM64_VA_BITS loaded from read_in_kernel_config_from_vmlinux() */
+ ms->CONFIG_ARM64_VA_BITS = st->CONFIG_ARM64_VA_BITS;
+ if (ms->CONFIG_ARM64_VA_BITS && !ms->VA_BITS_ACTUAL) {
+ error(WARNING, "Set VA_BITS_ACTUAL to CONFIG_ARM64_VA_BITS from vmlinux\n");
+ ms->VA_BITS_ACTUAL = ms->CONFIG_ARM64_VA_BITS;
+ }
+
/*
* The st->_stext_vmlinux is needed in arm64_init(PRE_GDB) when a
* dumpfile does not have vmcoreinfo and we use -m vabits_actual
diff --git a/defs.h b/defs.h
index 4cf169c..412c52e 100644
--- a/defs.h
+++ b/defs.h
@@ -2932,6 +2932,9 @@ struct symbol_table_data {
int kernel_symbol_type;
ulong linux_banner_vmlinux;
struct syment *mod_symname_hash[SYMNAME_HASH];
+
+ /* Loaded from vmlinux */
+ ulong CONFIG_ARM64_VA_BITS;
};
/* flags for st */
@@ -6336,6 +6339,7 @@ struct remote_file {
ulonglong xen_m2p(ulonglong);
void read_in_kernel_config(int);
+void read_in_kernel_config_from_vmlinux(int);
#define IKCFG_INIT (0)
#define IKCFG_READ (1)
@@ -6353,6 +6357,7 @@ enum {
#define MAGIC_START "IKCFG_ST"
#define MAGIC_END "IKCFG_ED"
#define MAGIC_SIZE (sizeof(MAGIC_START) - 1)
+#define GZIP_HEADER_SIZE 10
/*
* dev.c
diff --git a/kernel.c b/kernel.c
index b8d3b79..ba5faf8 100644
--- a/kernel.c
+++ b/kernel.c
@@ -78,6 +78,8 @@ static int read_xc_p2m(ulonglong, void *, long);
static void read_p2m(ulong, int, void *);
static int search_mapping_page(ulong, ulong *, ulong *, ulong *);
static void read_in_kernel_config_err(int, char *);
+static void kernel_config_scan(int, char *);
+static int kernel_config_deflat(char *, int, char *, int);
static void BUG_bytes_init(void);
static int BUG_x86(void);
static int BUG_x86_64(void);
@@ -10627,13 +10629,14 @@ static char *ikconfig[] = {
"CONFIG_HZ",
"CONFIG_DEBUG_BUGVERBOSE",
"CONFIG_DEBUG_INFO_REDUCED",
+ "CONFIG_ARM64_VA_BITS",
NULL,
};
void
read_in_kernel_config(int command)
{
- struct syment *sp;
+ struct syment *sp, *sp_end;
int ii, jj, ret, end, found=0;
unsigned long size, bufsz;
uint64_t magic;
@@ -10654,108 +10657,94 @@ read_in_kernel_config(int command)
return;
}
- /* We don't know how large IKCONFIG is, so we start with
- * 32k, if we can't find MAGIC_END assume we didn't read
- * enough, double it and try again.
- */
- ii = 32;
+ if ((sp_end = symbol_search("kernel_config_data_end")) != NULL) {
+ size = sp_end->value - sp->value;
+ if ((buf = (char *)malloc(size)) == NULL)
+ {
+ error(WARNING, "cannot malloc IKCONFIG input buffer\n");
+ return;
+ }
+ if (!readmem(sp->value, KVADDR, buf, size,
+ "kernel_config_data", RETURN_ON_ERROR)) {
+ error(WARNING, "cannot read kernel_config_data\n");
+ goto out2;
+ }
+ bufsz = size;
+ head = buf + GZIP_HEADER_SIZE;
+ } else {
+ /* We don't know how large IKCONFIG is, so we start with
+ * 32k, if we can't find MAGIC_END assume we didn't read
+ * enough, double it and try again.
+ */
+ ii = 32;
again:
- size = ii * 1024;
+ size = ii * 1024;
- if ((buf = (char *)malloc(size)) == NULL) {
- error(WARNING, "cannot malloc IKCONFIG input buffer\n");
- return;
- }
-
- if (!readmem(sp->value, KVADDR, buf, size,
- "kernel_config_data", RETURN_ON_ERROR)) {
- error(WARNING, "cannot read kernel_config_data\n");
- goto out2;
- }
-
- /* Find the start */
- if (strstr(buf, MAGIC_START))
- head = buf + MAGIC_SIZE + 10; /* skip past MAGIC_START and gzip header */
- else {
- /*
- * Later versions put the magic number before the compressed data.
- */
- if (readmem(sp->value - 8, KVADDR, &magic, 8,
- "kernel_config_data MAGIC_START", RETURN_ON_ERROR) &&
- STRNEQ(&magic, MAGIC_START)) {
- head = buf + 10;
- } else {
- error(WARNING, "could not find MAGIC_START!\n");
+ if ((buf = (char *)malloc(size)) == NULL) {
+ error(WARNING, "cannot malloc IKCONFIG input buffer\n");
+ return;
+ }
+
+ if (!readmem(sp->value, KVADDR, buf, size,
+ "kernel_config_data", RETURN_ON_ERROR)) {
+ error(WARNING, "cannot read kernel_config_data\n");
goto out2;
}
- }
- tail = head;
+ /* Find the start */
+ if (strstr(buf, MAGIC_START))
+ head = buf + MAGIC_SIZE + GZIP_HEADER_SIZE; /* skip past MAGIC_START and gzip header */
+ else {
+ /*
+ * Later versions put the magic number before the compressed data.
+ */
+ if (readmem(sp->value - MAGIC_SIZE, KVADDR, &magic, MAGIC_SIZE,
+ "kernel_config_data MAGIC_START", RETURN_ON_ERROR) &&
+ STRNEQ(&magic, MAGIC_START)) {
+ head = buf + GZIP_HEADER_SIZE;
+ } else {
+ error(WARNING, "could not find MAGIC_START!\n");
+ goto out2;
+ }
+ }
- end = strlen(MAGIC_END);
+ tail = head;
+ end = strlen(MAGIC_END);
- /* Find the end*/
- while (tail < (buf + (size - 1))) {
-
- if (strncmp(tail, MAGIC_END, end)==0) {
- found = 1;
- break;
+ /* Find the end*/
+ while (tail < (buf + (size - 1))) {
+ if (strncmp(tail, MAGIC_END, end)==0) {
+ found = 1;
+ break;
+ }
+ tail++;
}
- tail++;
- }
- if (found) {
- bufsz = tail - head;
- size = 10 * bufsz;
- if ((uncomp = (char *)malloc(size)) == NULL) {
- error(WARNING, "cannot malloc IKCONFIG output buffer\n");
- goto out2;
- }
- } else {
- if (ii > 512) {
- error(WARNING, "could not find MAGIC_END!\n");
- goto out2;
+ if (found) {
+ bufsz = tail - head;
} else {
- free(buf);
- ii *= 2;
- goto again;
+ if (ii > 512) {
+ error(WARNING, "could not find MAGIC_END!\n");
+ goto out2;
+ } else {
+ free(buf);
+ ii *= 2;
+ goto again;
+ }
}
}
-
- /* initialize zlib */
- stream.next_in = (Bytef *)head;
- stream.avail_in = (uInt)bufsz;
-
- stream.next_out = (Bytef *)uncomp;
- stream.avail_out = (uInt)size;
-
- stream.zalloc = NULL;
- stream.zfree = NULL;
- stream.opaque = NULL;
-
- ret = inflateInit2(&stream, -MAX_WBITS);
- if (ret != Z_OK) {
- read_in_kernel_config_err(ret, "initialize");
- goto out1;
+ size = 10 * bufsz;
+ if ((uncomp = (char *)malloc(size)) == NULL) {
+ error(WARNING, "cannot malloc IKCONFIG output buffer\n");
+ goto out2;
}
- ret = inflate(&stream, Z_FINISH);
-
- if (ret != Z_STREAM_END) {
- inflateEnd(&stream);
- if (ret == Z_NEED_DICT ||
- (ret == Z_BUF_ERROR && stream.avail_in == 0)) {
- read_in_kernel_config_err(Z_DATA_ERROR, "uncompress");
- goto out1;
- }
- read_in_kernel_config_err(ret, "uncompress");
+ if (kernel_config_deflat(buf, bufsz, uncomp, size) != 0) {
+ error(WARNING, "kernel_config_deflat failed\n");
goto out1;
}
- size = stream.total_out;
-
- ret = inflateEnd(&stream);
pos = uncomp;
@@ -10785,88 +10774,92 @@ again:
goto out1;
}
- do {
- ret = sscanf(pos, "%511[^\n]\n%n", line, &ii);
- if (ret > 0) {
- if ((command == IKCFG_READ) || CRASHDEBUG(8))
- fprintf(fp, "%s\n", line);
+ kernel_config_scan(command, pos);
+out1:
+ free(uncomp);
+out2:
+ free(buf);
- pos += ii;
+ return;
+}
- ln = line;
-
- /* skip leading whitespace */
- while (whitespace(*ln))
- ln++;
-
- /* skip comments -- except when looking for "not set" */
- if (*ln == '#') {
- if (strstr(ln, "CONFIG_DEBUG_BUGVERBOSE") &&
- strstr(ln, "not set"))
- kt->flags |= BUGVERBOSE_OFF;
- if (strstr(ln, "CONFIG_DEBUG_INFO_REDUCED"))
- if (CRASHDEBUG(1))
- error(INFO, "%s\n", ln);
- continue;
- }
+void
+read_in_kernel_config_from_vmlinux(int command)
+{
+ int ret;
+ bfd_vma config_data_addr = 0;
+ bfd_vma config_data_end_addr = 0;
- /* Find '=' */
- if ((head = strchr(ln, '=')) != NULL) {
- *head = '\0';
- val = head + 1;
+ if (!st->bfd && (st->bfd = bfd_openr(pc->namelist, NULL)) == NULL)
+ error(FATAL, "cannot open object file: %s\n", pc->namelist);
- head--;
+ if (!bfd_check_format(st->bfd, bfd_object)) {
+ error(WARNING, "File '%s' is not a supported object file\n", pc->namelist);
+ bfd_close(st->bfd);
+ st->bfd = NULL;
+ return;
+ }
- /* skip trailing whitespace */
- while (whitespace(*head)) {
- *head = '\0';
- head--;
- }
+ asymbol *config_sym = NULL;
+ void *minisyms;
+ unsigned int symsize;
+ long symcount = bfd_read_minisymbols(st->bfd, FALSE, &minisyms, &symsize);
+
+ if (symcount > 0) {
+ for (long i = 0; i < symcount; i++) {
+ asymbol *sym;
+ sym = bfd_minisymbol_to_symbol(st->bfd, FALSE, minisyms + i * symsize, NULL);
+ const char *name = bfd_asymbol_name(sym);
+ if (strcmp(name, "kernel_config_data") == 0) {
+ config_data_addr = bfd_asymbol_value(sym);
+ config_sym = sym;
+ } else if (strcmp(name, "kernel_config_data_end") == 0) {
+ config_data_end_addr = bfd_asymbol_value(sym);
+ config_sym = sym;
+ break;
+ }
+ }
+ free(minisyms);
+ }
- /* skip whitespace */
- while (whitespace(*val))
- val++;
+ if (!config_data_addr || !config_data_end_addr) {
+ error(FATAL, "kernel_config_data/kernel_config_data_end symbol not found in %s\n", pc->namelist);
+ } else {
+ if (CRASHDEBUG(1))
+ error(INFO, "Found config data: (0x%lx - 0x%lx)\n",
+ config_data_addr, config_data_end_addr);
+ }
- } else /* Bad line, skip it */
- continue;
+ char *buf = NULL;
+ size_t size = config_data_end_addr - config_data_addr;
+ size_t bufsz = size;
+ if ((buf = (char *)malloc(size)) == NULL) {
+ error(WARNING, "cannot malloc IKCONFIG input buffer\n");
+ return;
+ }
- if (command != IKCFG_INIT)
- continue;
+ asection *section = config_sym->section;
+ bfd_vma offset = config_data_addr - section->vma;
+ if (CRASHDEBUG(1))
+ error(INFO, "Reading section %s vma 0x%lx, offset x%lx)\n",
+ section->name, section->vma, offset);
+ if (!bfd_get_section_contents(st->bfd, section, buf, offset, size)) {
+ error(FATAL, "Failed to read section contents: %s\n", bfd_errmsg(bfd_get_error()));
+ }
- for (jj = 0; ikconfig[jj]; jj++) {
- if (STREQ(ln, ikconfig[jj])) {
-
- if (STREQ(ln, "CONFIG_NR_CPUS")) {
- kt->kernel_NR_CPUS = atoi(val);
- if (CRASHDEBUG(1))
- error(INFO,
- "CONFIG_NR_CPUS: %d\n",
- kt->kernel_NR_CPUS);
-
- } else if (STREQ(ln, "CONFIG_PGTABLE_4")) {
- machdep->flags |= VM_4_LEVEL;
- if (CRASHDEBUG(1))
- error(INFO, "CONFIG_PGTABLE_4\n");
-
- } else if (STREQ(ln, "CONFIG_HZ")) {
- machdep->hz = atoi(val);
- if (CRASHDEBUG(1))
- error(INFO,
- "CONFIG_HZ: %d\n",
- machdep->hz);
-
- } else if (STREQ(ln, "CONFIG_DEBUG_INFO_REDUCED")) {
- if (STREQ(val, "y")) {
- error(WARNING,
- "CONFIG_DEBUG_INFO_REDUCED=y\n");
- no_debugging_data(INFO);
- }
- }
- }
- }
- }
- } while (ret > 0);
+ char *uncomp = NULL;
+ size = 10 * size;
+ if ((uncomp = (char *)malloc(size)) == NULL) {
+ error(WARNING, "cannot malloc IKCONFIG output buffer\n");
+ goto out2;
+ }
+
+ if (kernel_config_deflat(buf, bufsz, uncomp, size) != 0) {
+ error(WARNING, "kernel_config_deflat failed\n");
+ goto out1;
+ }
+ kernel_config_scan(command, uncomp);
out1:
free(uncomp);
out2:
@@ -10875,6 +10868,133 @@ out2:
return;
}
+static int
+kernel_config_deflat(char *buf, int bufsz, char *uncomp, int size)
+{
+ int ret = -1;
+
+ /* initialize zlib */
+ z_stream stream;
+ stream.next_in = (Bytef *)buf + GZIP_HEADER_SIZE;
+ stream.avail_in = (uInt)bufsz;
+ stream.next_out = (Bytef *)uncomp;
+ stream.avail_out = (uInt)size;
+ stream.zalloc = NULL;
+ stream.zfree = NULL;
+ stream.opaque = NULL;
+
+ ret = inflateInit2(&stream, -MAX_WBITS);
+ if (ret != Z_OK) {
+ read_in_kernel_config_err(ret, "initialize");
+ return -1;
+ }
+
+ ret = inflate(&stream, Z_FINISH);
+
+ if (ret != Z_STREAM_END) {
+ inflateEnd(&stream);
+ if (ret == Z_NEED_DICT ||
+ (ret == Z_BUF_ERROR && stream.avail_in == 0)) {
+ read_in_kernel_config_err(Z_DATA_ERROR, "uncompress");
+ return -1;
+ }
+ read_in_kernel_config_err(ret, "uncompress");
+ return -1;
+ }
+ size = stream.total_out;
+ ret = inflateEnd(&stream);
+
+ return ret;
+}
+
+static void
+kernel_config_scan(int command, char *pos)
+{
+ int ii, jj, ret;
+ char *ln, *head, *val;
+ char line[512];
+
+ while ((ret = sscanf(pos, "%511[^\n]\n%n", line, &ii)) > 0) {
+ if ((command == IKCFG_READ) || CRASHDEBUG(8))
+ fprintf(fp, "%s\n", line);
+
+ pos += ii;
+ ln = line;
+
+ /* skip leading whitespace */
+ while (whitespace(*ln))
+ ln++;
+
+ /* skip comments -- except when looking for "not set" */
+ if (*ln == '#') {
+ if (strstr(ln, "CONFIG_DEBUG_BUGVERBOSE") &&
+ strstr(ln, "not set"))
+ kt->flags |= BUGVERBOSE_OFF;
+ if (strstr(ln, "CONFIG_DEBUG_INFO_REDUCED"))
+ if (CRASHDEBUG(1))
+ error(INFO, "%s\n", ln);
+ continue;
+ }
+
+ /* Find '=' */
+ if ((head = strchr(ln, '=')) != NULL) {
+ *head = '\0';
+ val = head + 1;
+ head--;
+
+ /* skip trailing whitespace */
+ while (whitespace(*head)) {
+ *head = '\0';
+ head--;
+ }
+
+ /* skip whitespace */
+ while (whitespace(*val))
+ val++;
+ } else /* Bad line, skip it */
+ continue;
+
+ if (command != IKCFG_INIT)
+ continue;
+
+ for (jj = 0; ikconfig[jj]; jj++) {
+ if (STREQ(ln, ikconfig[jj])) {
+
+ if (STREQ(ln, "CONFIG_NR_CPUS")) {
+ kt->kernel_NR_CPUS = atoi(val);
+ if (CRASHDEBUG(1))
+ error(INFO,
+ "CONFIG_NR_CPUS: %d\n",
+ kt->kernel_NR_CPUS);
+ } else if (STREQ(ln, "CONFIG_PGTABLE_4")) {
+ machdep->flags |= VM_4_LEVEL;
+ if (CRASHDEBUG(1))
+ error(INFO, "CONFIG_PGTABLE_4\n");
+
+ } else if (STREQ(ln, "CONFIG_HZ")) {
+ machdep->hz = atoi(val);
+ if (CRASHDEBUG(1))
+ error(INFO,
+ "CONFIG_HZ: %d\n",
+ machdep->hz);
+ } else if (STREQ(ln, "CONFIG_DEBUG_INFO_REDUCED")) {
+ if (STREQ(val, "y")) {
+ error(WARNING,
+ "CONFIG_DEBUG_INFO_REDUCED=y\n");
+ no_debugging_data(INFO);
+ }
+ } else if (STREQ(ln, "CONFIG_ARM64_VA_BITS")) {
+ st->CONFIG_ARM64_VA_BITS = atol(val);
+ if (CRASHDEBUG(1))
+ error(INFO,
+ "CONFIG_ARM64_VA_BITS: %lu\n",
+ st->CONFIG_ARM64_VA_BITS);
+ }
+ }
+ }
+ }
+}
+
static void
read_in_kernel_config_err(int e, char *msg)
{
diff --git a/main.c b/main.c
index 71bcc15..a93d4e5 100644
--- a/main.c
+++ b/main.c
@@ -707,6 +707,7 @@ main(int argc, char **argv)
cmdline_init();
mem_init();
hq_init();
+ read_in_kernel_config_from_vmlinux(IKCFG_INIT);
machdep_init(PRE_SYMTAB);
symtab_init();
paravirt_init();
diff --git a/symbols.c b/symbols.c
index e30fafe..358df13 100644
--- a/symbols.c
+++ b/symbols.c
@@ -206,7 +206,7 @@ symtab_init(void)
asymbol *sort_x;
asymbol *sort_y;
- if ((st->bfd = bfd_openr(pc->namelist, NULL)) == NULL)
+ if (!st->bfd && (st->bfd = bfd_openr(pc->namelist, NULL)) == NULL)
error(FATAL, "cannot open object file: %s\n", pc->namelist);
if (!bfd_check_format_matches(st->bfd, bfd_object, &matching))
--
2.43.0
1 day, 19 hours
[PATCH] Use CC env var to get compiler version
by kelefa.sane@smile.fr
From: Kéléfa Sané <kelefa.sane(a)smile.fr>
The source file build_data.c generated at compilation time define a
variable compiler_version which is obtained by calling "gcc --version"
cmd. This call retrieve the native gcc compiler install on host build
machine but not necessarily the compiler use to build the project (ex:
cross compilation).
The CC env variable commonly used in Makefile project define the
compiler to use at build, so this is the appropriate way to retrieve the
compiler version, when the CC env var is define.
Signed-off-by: Kéléfa Sané <kelefa.sane(a)smile.fr>
---
configure.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/configure.c b/configure.c
index 4668c9a..a20e174 100644
--- a/configure.c
+++ b/configure.c
@@ -1362,7 +1362,17 @@ make_build_data(char *target)
fp1 = popen("date", "r");
fp2 = popen("id", "r");
- fp3 = popen("gcc --version", "r");
+
+ const char *cc_env = getenv("CC");
+ if(NULL == cc_env) {
+ fp3 = popen("gcc --version", "r");
+ }
+ else {
+ char compiler_version_cmd[512];
+
+ snprintf(compiler_version_cmd, sizeof(compiler_version_cmd)-1, "%s --version", cc_env);
+ fp3 = popen(compiler_version_cmd, "r");
+ }
if ((fp4 = fopen("build_data.c", "w")) == NULL) {
perror("build_data.c");
2 days, 20 hours
[PATCH] Fix the "ps -m" command shows wrong duration of RU task
by Kenneth Yin
The RU/TASK_RUNNING stat means the task is runnable.
It is either currently running or on a run queue waiting to run.
Currently, the crash tool uses the "rq_clock - sched_info->last_arrival" formula to
calculate the duration of task in RU state. This is for the scenario of a task running on a CPU.
But for the scenario of a task waiting in the CPU run queue (due to some reason
for example cfs/rt queue throttled), this formula could cause misunderstanding.
For example:
[ 220 10:36:38.026] [RU] PID: 12345 TASK: ffff8d674ab6b180 CPU: 1 COMMAND: "task"
Looking closer:
crash> rq.clock ffff8de438a5acc0
clock = 87029229985307234,
crash> task -R sched_info,se.exec_start
PID: 12345 TASK: ffff8d674ab6b180 CPU: 1 COMMAND: "task"
sched_info = {
pcount = 33,
run_delay = 0,
last_arrival = 67983031958439673,
last_queued = 87029224561119369
},
se.exec_start = 67983031958476937,
67983031 67983031 87029224 87029229
|<- running on CPU ->| <- IN ->|<- waiting in queue ->|
For this scenario, the "task" was waiting in the run queue of the CPU only for 5 seconds,
we should use the "rq_clock - sched_info->last_queued" formula.
We can trust sched_info->last_queued as it is only set when the task enters the CPU run queue.
Furthermore, when the task hits/runs on a CPU or dequeues the CPU run queue, it will be reset to 0.
Therefore, my idea is simple:
If a task in RU stat and sched_info->last_queued has value (!= 0),
it means this task is waiting in the run queue, use "rq_clock - sched_info->last_queued".
Otherwise, if a task in RU stat and sched_info->last_queued = 0
and sched_info->last_arrival has value (it must be), it means this task is running on the CPU,
use "rq_clock - sched_info->last_arrival".
Signed-off-by: Kenneth Yin <kyin(a)redhat.com>
---
defs.h | 1 +
symbols.c | 2 ++
task.c | 21 +++++++++++++++------
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/defs.h b/defs.h
index 4cf169c..66f5ce4 100644
--- a/defs.h
+++ b/defs.h
@@ -1787,6 +1787,7 @@ struct offset_table { /* stash of commonly-used offsets */
long vcpu_struct_rq;
long task_struct_sched_info;
long sched_info_last_arrival;
+ long sched_info_last_queued;
long page_objects;
long kmem_cache_oo;
long char_device_struct_cdev;
diff --git a/symbols.c b/symbols.c
index e30fafe..fb5035f 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9930,6 +9930,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sched_rt_entity_run_list));
fprintf(fp, " sched_info_last_arrival: %ld\n",
OFFSET(sched_info_last_arrival));
+ fprintf(fp, " sched_info_last_queued: %ld\n",
+ OFFSET(sched_info_last_queued));
fprintf(fp, " task_struct_thread_info: %ld\n",
OFFSET(task_struct_thread_info));
fprintf(fp, " task_struct_stack: %ld\n",
diff --git a/task.c b/task.c
index 3bafe79..f5386ac 100644
--- a/task.c
+++ b/task.c
@@ -332,9 +332,12 @@ task_init(void)
MEMBER_OFFSET_INIT(task_struct_last_run, "task_struct", "last_run");
MEMBER_OFFSET_INIT(task_struct_timestamp, "task_struct", "timestamp");
MEMBER_OFFSET_INIT(task_struct_sched_info, "task_struct", "sched_info");
- if (VALID_MEMBER(task_struct_sched_info))
+ if (VALID_MEMBER(task_struct_sched_info)) {
MEMBER_OFFSET_INIT(sched_info_last_arrival,
"sched_info", "last_arrival");
+ MEMBER_OFFSET_INIT(sched_info_last_queued,
+ "sched_info", "last_queued");
+ }
if (VALID_MEMBER(task_struct_last_run) ||
VALID_MEMBER(task_struct_timestamp) ||
VALID_MEMBER(sched_info_last_arrival)) {
@@ -6035,7 +6038,7 @@ ulonglong
task_last_run(ulong task)
{
ulong last_run;
- ulonglong timestamp;
+ ulonglong timestamp,last_queued;
timestamp = 0;
fill_task_struct(task);
@@ -6047,10 +6050,16 @@ task_last_run(ulong task)
} else if (VALID_MEMBER(task_struct_timestamp))
timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
OFFSET(task_struct_timestamp)) : 0;
- else if (VALID_MEMBER(sched_info_last_arrival))
- timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
- OFFSET(task_struct_sched_info) +
- OFFSET(sched_info_last_arrival)) : 0;
+ else if (VALID_MEMBER(sched_info_last_queued))
+ last_queued = ULONGLONG(tt->task_struct +
+ OFFSET(task_struct_sched_info) +
+ OFFSET(sched_info_last_queued));
+ if (last_queued != 0) {
+ timestamp = tt->last_task_read ? last_queued : 0;
+ } else if (VALID_MEMBER(sched_info_last_arrival))
+ timestamp = tt->last_task_read ? ULONGLONG(tt->task_struct +
+ OFFSET(task_struct_sched_info) +
+ OFFSET(sched_info_last_arrival)) : 0;
return timestamp;
}
--
2.31.1
1 week
[PATCH] Fix incorrect task state during exit
by Stephen Brennan
task_state() assumes that exit_state is a unsigned long, when in
reality, it has been declared as an int since 97dc32cdb1b53 ("reduce
size of task_struct on 64-bit machines"), in Linux 2.6.22. So on 64-bit
machines, task_state() reads 8 bytes rather than 4, and gets the wrong
exit_state value by including the next field.
This has gone unnoticed because directly after exit_state comes
exit_code, which is generally zero while the task is alive. When the
exit_code is set, exit_state is usually set not long after. Since
task_state_string() only checks whether exit_state bits are set, it
never notices the presence of the exit code inside of the state.
But this leaves open a window during the process exit, when the
exit_code has been set (in do_exit()), but the exit_state has not (in
exit_notify()). In this case, crash reports a state of "??", but in
reality, the task is still running -- it's just running the exit()
system call. This race window can be long enough to be observed in core
dumps, for example if the mmput() takes a long time.
This should be considered a bug. A task state of "??" or "(unknown)" is
frequently of concern when debugging, as it could indicate that the
state fields had some sort of corruption, and draw the attention of the
debugger. To handle it properly, record the size of exit_state, and read
it conditionally as a UINT or ULONG, just like the state. This ensures
we retain compatibility with kernel before v2.6.22. Whether that is
actually desirable is anybody's guess.
Reported-by: Jeffery Yoder <jeffery.yoder(a)oracle.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
---
defs.h | 1 +
task.c | 11 +++++++++--
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 4cf169c..58362d0 100644
--- a/defs.h
+++ b/defs.h
@@ -2435,6 +2435,7 @@ struct size_table { /* stash of commonly-used sizes */
long prb_desc;
long wait_queue_entry;
long task_struct_state;
+ long task_struct_exit_state;
long printk_safe_seq_buf_buffer;
long sbitmap_word;
long sbitmap;
diff --git a/task.c b/task.c
index 3bafe79..e07b479 100644
--- a/task.c
+++ b/task.c
@@ -306,6 +306,7 @@ task_init(void)
MEMBER_SIZE_INIT(task_struct_state, "task_struct", "__state");
}
MEMBER_OFFSET_INIT(task_struct_exit_state, "task_struct", "exit_state");
+ MEMBER_SIZE_INIT(task_struct_exit_state, "task_struct", "exit_state");
MEMBER_OFFSET_INIT(task_struct_pid, "task_struct", "pid");
MEMBER_OFFSET_INIT(task_struct_comm, "task_struct", "comm");
MEMBER_OFFSET_INIT(task_struct_next_task, "task_struct", "next_task");
@@ -5965,8 +5966,14 @@ task_state(ulong task)
state = ULONG(tt->task_struct + OFFSET(task_struct_state));
else
state = UINT(tt->task_struct + OFFSET(task_struct_state));
- exit_state = VALID_MEMBER(task_struct_exit_state) ?
- ULONG(tt->task_struct + OFFSET(task_struct_exit_state)) : 0;
+
+ if (VALID_MEMBER(task_struct_exit_state)
+ && SIZE(task_struct_exit_state) == sizeof(ulong))
+ exit_state = ULONG(tt->task_struct + OFFSET(task_struct_exit_state));
+ else if (VALID_MEMBER(task_struct_exit_state))
+ exit_state = UINT(tt->task_struct + OFFSET(task_struct_exit_state));
+ else
+ exit_state = 0;
return (state | exit_state);
}
--
2.43.5
1 week, 2 days
[PATCH] vmware_guestdump: Version 7 support
by Alexey Makhalov
ESXi 9.0 updated debug.guest format. CPU architecture type was
introduced and several fields of the header not used by the crash
were moved around. It is version 7 now.
Make corresponding changes in debug.guest parser and keep it
backward compatible with older versions.
Fix comment and log messages typos as well.
Signed-off-by: Alexey Makhalov <alexey.makhalov(a)broadcom.com>
---
vmware_guestdump.c | 48 ++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 44 insertions(+), 4 deletions(-)
diff --git a/vmware_guestdump.c b/vmware_guestdump.c
index 78f37fb..1a6ef9b 100644
--- a/vmware_guestdump.c
+++ b/vmware_guestdump.c
@@ -30,6 +30,7 @@
* 2. Number of Virtual CPUs (4 bytes) } - struct guestdumpheader
* 3. Reserved gap
* 4. Main Memory information - struct mainmeminfo{,_old}
+ * 5. Reserved gap #2. Only in v7+
* (use get_vcpus_offset() to get total size of guestdumpheader)
* vcpus_offset: ---------\
* 1. struct vcpu_state1 \
@@ -111,6 +112,22 @@ struct vcpu_state2 {
uint8_t reserved3[65];
} __attribute__((packed));
+typedef enum {
+ CPU_ARCH_AARCH64,
+ CPU_ARCH_X86,
+} cpu_arch;
+
+/*
+ * Returns the size of reserved gap #2 in the header right after the Main Mem.
+ */
+static inline long
+get_gap2_size(uint32_t version)
+{
+ if (version == 7)
+ return 11;
+ return 0;
+}
+
/*
* Returns the size of the guest dump header.
*/
@@ -128,6 +145,9 @@ get_vcpus_offset(uint32_t version, int mem_holes)
return sizeof(struct guestdumpheader) + 14 + sizeof(struct mainmeminfo);
case 6: /* ESXi 8.0u2 */
return sizeof(struct guestdumpheader) + 15 + sizeof(struct mainmeminfo);
+ case 7: /* ESXi 9.0 */
+ return sizeof(struct guestdumpheader) + 8 + sizeof(struct mainmeminfo) +
+ get_gap2_size(version);
}
return 0;
@@ -155,10 +175,10 @@ get_vcpu_gapsize(uint32_t version)
*
* guestdump (debug.guest) is a simplified version of the *.vmss which does
* not contain a full VM state, but minimal guest state, such as a memory
- * layout and CPUs state, needed for debugger. is_vmware_guestdump()
+ * layout and CPUs state, needed for the debugger. is_vmware_guestdump()
* and vmware_guestdump_init() functions parse guestdump header and
* populate vmss data structure (from vmware_vmss.c). In result, all
- * handlers (except mempry_dump) from vmware_vmss.c can be reused.
+ * handlers (except memory_dump) from vmware_vmss.c can be reused.
*
* debug.guest does not have a dedicated header magic or file format signature
* To probe debug.guest we need to perform series of validations. In addition,
@@ -225,7 +245,8 @@ is_vmware_guestdump(char *filename)
/* vcpu_offset adjustment for mem_holes is required only for version 1. */
vcpus_offset = get_vcpus_offset(hdr.version, mmi.mem_holes);
} else {
- if (fseek(fp, vcpus_offset - sizeof(struct mainmeminfo), SEEK_SET) == -1) {
+ if (fseek(fp, vcpus_offset - sizeof(struct mainmeminfo) - get_gap2_size(hdr.version),
+ SEEK_SET) == -1) {
if (CRASHDEBUG(1))
error(INFO, LOGPRX"Failed to fseek '%s': [Error %d] %s\n",
filename, errno, strerror(errno));
@@ -240,6 +261,25 @@ is_vmware_guestdump(char *filename)
fclose(fp);
return FALSE;
}
+
+ /* Check CPU architecture field. Next 4 bytes after the Main Mem */
+ if (hdr.version >= 7) {
+ cpu_arch arch;
+ if (fread(&arch, sizeof(cpu_arch), 1, fp) != 1) {
+ if (CRASHDEBUG(1))
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ "CPU arch", filename, errno, strerror(errno));
+ fclose(fp);
+ return FALSE;
+ }
+ if (arch != CPU_ARCH_X86) {
+ if (CRASHDEBUG(1))
+ error(INFO,
+ LOGPRX"Invalid or unsupported CPU architecture: %d\n", arch);
+ fclose(fp);
+ return FALSE;
+ }
+ }
}
if (fseek(fp, 0L, SEEK_END) == -1) {
if (CRASHDEBUG(1))
@@ -300,7 +340,7 @@ vmware_guestdump_init(char *filename, FILE *ofp)
if (!machine_type("X86") && !machine_type("X86_64")) {
error(INFO,
- LOGPRX"Invalid or unsupported host architecture for .vmss file: %s\n",
+ LOGPRX"Invalid or unsupported host architecture for .guest file: %s\n",
MACHINE_TYPE);
result = FALSE;
goto exit;
--
2.43.5
1 week, 3 days
[PATCH v2 0/5] gdb multi-stack unwinding support
by Tao Liu
This patchset is based on Alexy's work [1], and is the follow-up of the
previous "gdb stack unwinding support for crash utility" patchset.
Currently gdb target analyzes only one task at a time and it backtraces
only straight stack until end of the stack. If stacks were concatenated
during exceptions or interrupts, gdb bt will show only the topmost one.
This patchset will introduce multiple stacks support for gdb stack unwinding,
which can be observed as a different threads from gdb perspective. A
short usage is as follows:
'set <PID>' - to switch to a specific task
'gdb info threads' - to see list of in-kernel stacks of this task.
'gdb thread <ID>' - to switch to the stack.
'gdb bt' - to unwind it.
E.g, with the patchset:
crash> bt
PID: 17636 TASK: ffff88032e0742c0 CPU: 11 COMMAND: "kworker/11:4"
#0 [ffff88037fca6b58] machine_kexec at ffffffff8103cef2
#1 [ffff88037fca6ba8] crash_kexec at ffffffff810c9aa3
#2 [ffff88037fca6c70] panic at ffffffff815f0444
...
#9 [ffff88037fca6ec8] do_nmi at ffffffff815fd980
#10 [ffff88037fca6ef0] end_repeat_nmi at ffffffff815fcec1
[exception RIP: memcpy+13]
RIP: ffffffff812f5b1d RSP: ffff88034f2a9728 RFLAGS: 00010046
RAX: ffffc900139fe000 RBX: ffff880374b7a1b0 RCX: 0000000000000030
RBP: ffff88034f2a9778 R8: 000000007fffffff R9: 00000000ffffffff
...
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
--- <NMI exception stack> ---
#11 [ffff88034f2a9728] memcpy at ffffffff812f5b1d
#12 [ffff88034f2a9728] mga_dirty_update at ffffffffa024ad2b [mgag200]
#13 [ffff88034f2a9780] mga_imageblit at ffffffffa024ae3f [mgag200]
#14 [ffff88034f2a97a0] bit_putcs at ffffffff813424ef
...
crash> info threads
Id Target Id Frame
* 1 17636 kworker/11:4 (stack 0) crash_setup_regs (oldregs=0x0, newregs=0xffff88037fca6bb0)
2 17636 kworker/11:4 (stack 1) 0xffffffff812f5b1d in memcpy ()
crash> thread 2
crash> gdb bt
#0 0xffffffff812f5b1d in memcpy () at arch/x86/lib/memcpy_64.S:69
...
There are 2 stacks of the current task, and we can list/switch-to/unwind
each stack.
[1]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01204.html
v2 -> v1: 1) Rebase this patchset onto gdb-16.2 [2].
2) Improved the silent_call_bt() to catch the error FATAL.
[2]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01354.html
Tao Liu (5):
Add multi-threads support in crash target
Call cmd_bt silently after "set pid"
x86_64: Add gdb multi-stack unwind support
arm64: Add gdb multi-stack unwind support
ppc64: Add gdb multi-stack unwind support
arm64.c | 85 +++++++++++++++++++++++++++++++--
crash_target.c | 49 +++++++++++++++++--
defs.h | 3 +-
gdb_interface.c | 6 +--
kernel.c | 43 +++++++++++++++++
ppc64.c | 70 +++++++++++++++++++++++----
task.c | 4 +-
x86_64.c | 123 +++++++++++++++++++++++++++++++++++++++++++++---
8 files changed, 354 insertions(+), 29 deletions(-)
--
2.47.0
2 weeks, 6 days