----- Original Message -----
On Mon, Nov 23, 2015 at 11:35:38AM -0500, Dave Anderson wrote:
>
>
> ----- Original Message -----
> > > > Would you like me to spin a v4 with this condition added? Or, since
> > > > it
> > > > actually seems to be addressing a non-qemu-generated dump issue,
then
> > > > maybe you just want to submit it as a new patch on top of the qemu
> > > > patch?
> > >
> > > No, I'm cleaning it up now -- there are a couple other gotcha's
> > > w/respect
> > > to backtracing, since we're now going to keep the
> > > ms->panic_task_regs[cpus]
> > > array intact, even if there's no note in some of them. I'm going
to
> > > make
> > > sure it's zero-filled in the not-found case, and I have to make sure
> > > that
> > > it's recognized on both architectures. I'll post the the updated
> > > additions
> > > to arm.c and arm64.c so that you can do a quick test on a legitimate
> > > QEMU dump.
> > >
> > > Might be Monday, I'm not sure...
> >
> > Sounds good to me :-)
> >
> > Thanks,
> > drew
>
> Drew,
>
> Can you sanity-test the attached patch? If it works OK for you, I'll post
> it as-is.
Works. I tested on both arm and aarch64. I still think we should commit
it as two patches though. The qemu one, with its commit message focused
on qemu, and then this one, with its commit message focused on handling
missing prstatus for some cpus for unknown reasons. Actually, I don't
think it should even be possible to be missing prstatus for some cpus in
the qemu case (qemu will always populate them all), so the "dumpfiles
created with qemu..." comment might need some tweaking as well, since the
else-continue is being added to its if-block.
Thanks,
drew
All right, I'll post your v3 patch as-is, and make a follow-up patch
with my additions (plus a tweak to the comment).
Thanks,
Dave
>
> Thanks,
> Dave
>
> diff --git a/arm.c b/arm.c
> index ffc7c06..114a6ae 100644
> --- a/arm.c
> +++ b/arm.c
> @@ -580,8 +580,8 @@ arm_get_crash_notes(void)
>
> buf = GETBUF(SIZE(note_buf));
>
> - if (!(panic_task_regs = malloc(kt->cpus*sizeof(*panic_task_regs))))
> - error(FATAL, "cannot malloc panic_task_regs space\n");
> + if (!(panic_task_regs = calloc((size_t)kt->cpus,
> sizeof(*panic_task_regs))))
> + error(FATAL, "cannot calloc panic_task_regs space\n");
>
> for (i=0;i<kt->cpus;i++) {
>
> @@ -597,6 +597,32 @@ arm_get_crash_notes(void)
> note = (Elf32_Nhdr *)buf;
> p = buf + sizeof(Elf32_Nhdr);
>
> + /*
> + * dumpfiles created with qemu won't have crash_notes, but there will
> + * be elf notes.
> + */
> + if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE()))
{
> + if (DISKDUMP_DUMPFILE())
> + note = diskdump_get_prstatus_percpu(i);
> + else if (KDUMP_DUMPFILE())
> + note = netdump_get_prstatus_percpu(i);
> + if (note) {
> + /*
> + * SIZE(note_buf) accounts for a "final note", which is a
> + * trailing empty elf note header.
> + */
> + long notesz = SIZE(note_buf) - sizeof(Elf32_Nhdr);
> +
> + if (sizeof(Elf32_Nhdr) + roundup(note->n_namesz, 4) +
> + note->n_descsz == notesz)
> + BCOPY((char *)note, buf, notesz);
> + } else {
> + error(WARNING,
> + "cannot find NT_PRSTATUS note for cpu: %d\n", i);
> + continue;
> + }
> + }
> +
> if (note->n_type != NT_PRSTATUS) {
> error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n");
> goto fail;
> @@ -835,6 +861,9 @@ arm_back_trace(struct bt_info *bt)
> static void
> arm_back_trace_cmd(struct bt_info *bt)
> {
> + if (bt->flags & BT_REGS_NOT_FOUND)
> + return;
> +
> if (kt->flags & DWARF_UNWIND)
> unwind_backtrace(bt);
> else
> @@ -1301,12 +1330,13 @@ arm_get_dumpfile_stack_frame(struct bt_info *bt,
> ulong *nip, ulong *ksp)
> {
> const struct machine_specific *ms = machdep->machspec;
>
> - if (!ms->crash_task_regs)
> + if (!ms->crash_task_regs ||
> + (!ms->crash_task_regs[bt->tc->processor].ARM_pc &&
> + !ms->crash_task_regs[bt->tc->processor].ARM_sp)) {
> + bt->flags |= BT_REGS_NOT_FOUND;
> return FALSE;
> + }
>
> - if (!is_task_active(bt->task))
> - return FALSE;
> -
> /*
> * We got registers for panic task from crash_notes. Just return them.
> */
> @@ -1339,11 +1369,9 @@ arm_get_stack_frame(struct bt_info *bt, ulong *pcp,
> ulong *spp)
> else
> ret = arm_get_frame(bt, &ip, &sp);
>
> - if (!ret) {
> + if (!ret)
> error(WARNING, "cannot determine starting stack frame for task %lx\n",
> bt->task);
> - return;
> - }
>
> if (pcp)
> *pcp = ip;
> diff --git a/arm64.c b/arm64.c
> index 06b45c2..2304a4c 100644
> --- a/arm64.c
> +++ b/arm64.c
> @@ -1181,6 +1181,7 @@ arm64_back_trace_cmd(struct bt_info *bt)
> stackframe.pc = bt->hp->eip ?
> bt->hp->eip : GET_STACK_ULONG(bt->hp->esp);
> stackframe.sp = bt->hp->esp + 8;
> + bt->flags &= ~BT_REGS_NOT_FOUND;
> } else {
> stackframe.sp = bt->stkptr;
> stackframe.pc = bt->instptr;
> @@ -1197,6 +1198,9 @@ arm64_back_trace_cmd(struct bt_info *bt)
> return;
> }
>
> + if (bt->flags & BT_REGS_NOT_FOUND)
> + return;
> +
> if (!(bt->flags & BT_KDUMP_ADJUST)) {
> if (bt->flags & BT_USER_SPACE)
> goto complete_user;
> @@ -1336,8 +1340,12 @@ arm64_get_dumpfile_stackframe(struct bt_info *bt,
> struct arm64_stackframe *frame
> struct machine_specific *ms = machdep->machspec;
> struct arm64_pt_regs *ptregs;
>
> - if (!ms->panic_task_regs)
> + if (!ms->panic_task_regs ||
> + (!ms->panic_task_regs[bt->tc->processor].sp &&
> + !ms->panic_task_regs[bt->tc->processor].pc)) {
> + bt->flags |= BT_REGS_NOT_FOUND;
> return FALSE;
> + }
>
> ptregs = &ms->panic_task_regs[bt->tc->processor];
> frame->sp = ptregs->sp;
> @@ -1371,19 +1379,17 @@ static void
> arm64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
> {
> int ret;
> - struct arm64_stackframe stackframe;
> + struct arm64_stackframe stackframe = { 0 };
>
> if (DUMPFILE() && is_task_active(bt->task))
> ret = arm64_get_dumpfile_stackframe(bt, &stackframe);
> else
> ret = arm64_get_stackframe(bt, &stackframe);
>
> - if (!ret) {
> + if (!ret)
> error(WARNING,
> "cannot determine starting stack frame for task %lx\n",
> bt->task);
> - return;
> - }
>
> bt->frameptr = stackframe.fp;
> if (pcp)
> @@ -1811,8 +1817,8 @@ arm64_get_crash_notes(void)
>
> buf = GETBUF(SIZE(note_buf));
>
> - if (!(ms->panic_task_regs = malloc(kt->cpus * sizeof(struct
> arm64_pt_regs))))
> - error(FATAL, "cannot malloc panic_task_regs space\n");
> + if (!(ms->panic_task_regs = calloc((size_t)kt->cpus, sizeof(struct
> arm64_pt_regs))))
> + error(FATAL, "cannot calloc panic_task_regs space\n");
>
> for (i = 0; i < kt->cpus; i++) {
>
> @@ -1828,6 +1834,32 @@ arm64_get_crash_notes(void)
> note = (Elf64_Nhdr *)buf;
> p = buf + sizeof(Elf64_Nhdr);
>
> + /*
> + * dumpfiles created with qemu won't have crash_notes, but there will
> + * be elf notes.
> + */
> + if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE()))
{
> + if (DISKDUMP_DUMPFILE())
> + note = diskdump_get_prstatus_percpu(i);
> + else if (KDUMP_DUMPFILE())
> + note = netdump_get_prstatus_percpu(i);
> + if (note) {
> + /*
> + * SIZE(note_buf) accounts for a "final note", which is a
> + * trailing empty elf note header.
> + */
> + long notesz = SIZE(note_buf) - sizeof(Elf64_Nhdr);
> +
> + if (sizeof(Elf64_Nhdr) + roundup(note->n_namesz, 4) +
> + note->n_descsz == notesz)
> + BCOPY((char *)note, buf, notesz);
> + } else {
> + error(WARNING,
> + "cannot find NT_PRSTATUS note for cpu: %d\n", i);
> + continue;
> + }
> + }
> +
> if (note->n_type != NT_PRSTATUS) {
> error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n");
> goto fail;
> diff --git a/defs.h b/defs.h
> index 8e96461..dc79b9b 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -5048,6 +5048,7 @@ ulong cpu_map_addr(const char *type);
> #define BT_EFRAME_TARGET (0x800000000000ULL)
> #define BT_CPUMASK (0x1000000000000ULL)
> #define BT_SHOW_ALL_REGS (0x2000000000000ULL)
> +#define BT_REGS_NOT_FOUND (0x4000000000000ULL)
> #define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
>
> #define BT_REF_HEXVAL (0x1)
> @@ -5721,6 +5722,7 @@ void dump_registers_for_qemu_mem_dump(void);
> void kdump_backup_region_init(void);
> void display_regs_from_elf_notes(int, FILE *);
> void display_ELF_note(int, int, void *, FILE *);
> +void *netdump_get_prstatus_percpu(int);
> #define PRSTATUS_NOTE (1)
> #define QEMU_NOTE (2)
>
> diff --git a/netdump.c b/netdump.c
> index bfa818f..3ff49f0 100644
> --- a/netdump.c
> +++ b/netdump.c
> @@ -2345,6 +2345,27 @@ dump_Elf64_Nhdr(Elf64_Off offset, int store)
> return len;
> }
>
> +void *
> +netdump_get_prstatus_percpu(int cpu)
> +{
> + int online;
> +
> + if ((cpu < 0) || (cpu >= nd->num_prstatus_notes))
> + return NULL;
> +
> + /*
> + * If no cpu mapping was done, then there must be
> + * a one-to-one relationship between the number
> + * of online cpus and the number of notes.
> + */
> + if ((online = get_cpus_online()) &&
> + (online == kt->cpus) &&
> + (online != nd->num_prstatus_notes))
> + return NULL;
> +
> + return nd->nt_prstatus_percpu[cpu];
> +}
> +
> /*
> * Send the request to the proper architecture hander.
> */