handle x86_64 xen code/data relocation
by Itsuro ODA
Hi all,
Recent version of xen (ex. RHEL5.2, 3.2.0) on the x86_64
moves the physical(machine) address of xen code/data area after
the system started up. The start address of this is stored in
'xen_phys_start'. Thus to get a machine address of a xen text symbol
from its virtual address, calculate
"va - __XEN_VIRT_START + xen_phys_start".
crash and makedumpfile command need the value of xen_phys_start.
They know the virtual address of 'xen_phys_start' symbol but
no way to extract the value of xen_phys_start.
I think adding the xen_phys_start value to the CRASHINFO ElfNote
section at first. (Plan A: patch for xen hypervisor code attaced)
It is smallest modification necessary over all.
On the other hand there is a opinion that it is better to upgrade
a user-package than a hypervisor or kernel package.
The xen_phys_start value can be got from /proc/iomem.
-------------------------------------------------------
# cat /proc/iomem
...
7e600000-7f5fffff : Hypervisor code and data *** this line
...
-------------------------------------------------------
So the kexec-tools can handle it theoretically.
The Plan B is that kexec-tools adds another ElfNote section which
holds the xen_phys_start value. The attached patch works well
though I am concern about it is a bit tricky.
Which plan is better ? Or more good implementation ?
Please comment.
(note that crash and makedumpfile modification is same degree
for both plan.)
Thanks.
Itsuro Oda
=== Plan A (modify the xen hypervisor. It is for RHEL5.2 but almost same for other version) ===
--- include/xen/elfcore.h.org 2008-04-17 14:11:41.000000000 +0900
+++ include/xen/elfcore.h 2008-04-17 14:11:57.000000000 +0900
@@ -66,6 +66,7 @@
unsigned long xen_compile_time;
unsigned long tainted;
#ifdef CONFIG_X86
+ unsigned long xen_phys_start;
unsigned long dom0_pfn_to_mfn_frame_list_list;
#endif
} crash_xen_info_t;
--- arch/x86/crash.c.org 2008-04-17 14:12:51.000000000 +0900
+++ arch/x86/crash.c 2008-04-17 14:13:13.000000000 +0900
@@ -102,6 +102,7 @@
hvm_disable();
info = kexec_crash_save_info();
+ info->xen_phys_start = xen_phys_start;
info->dom0_pfn_to_mfn_frame_list_list =
arch_get_pfn_to_mfn_frame_list_list(dom0);
}
================================================================
=== Plan B (modify the kexec-tools. proof of concept version) ===
diff -ru kexec-tools-testing-20080324.org/kexec/arch/x86_64/crashdump-x86_64.c kexec-tools-testing-20080324/kexec/arch/x86_64/crashdump-x86_64.c
--- kexec-tools-testing-20080324.org/kexec/arch/x86_64/crashdump-x86_64.c 2008-03-21 13:16:28.000000000 +0900
+++ kexec-tools-testing-20080324/kexec/arch/x86_64/crashdump-x86_64.c 2008-04-22 15:15:08.000000000 +0900
@@ -73,6 +73,25 @@
return -1;
}
+static int get_hypervisor_paddr(struct kexec_info *info)
+{
+ uint64_t start;
+
+ if (!xen_present())
+ return 0;
+
+ if (parse_iomem_single("Hypervisor code and data\n", &start, NULL) == 0) {
+ info->hypervisor_paddr_start = start;
+#ifdef DEBUG
+ printf("kernel load physical addr start = 0x%016Lx\n", start);
+#endif
+ return 0;
+ }
+
+ fprintf(stderr, "Cannot determine hypervisor physical load addr\n");
+ return -1;
+}
+
/* Retrieve info regarding virtual address kernel has been compiled for and
* size of the kernel from /proc/kcore. Current /proc/kcore parsing from
* from kexec-tools fails because of malformed elf notes. A kernel patch has
@@ -581,6 +600,9 @@
if (get_kernel_paddr(info))
return -1;
+ if (get_hypervisor_paddr(info))
+ return -1;
+
if (get_kernel_vaddr_and_size(info))
return -1;
@@ -620,6 +642,9 @@
*/
elfcorehdr = add_buffer(info, tmp, sz, 16*1024, align, min_base,
max_addr, -1);
+ if (info->hypervisor_paddr_start && xen_present()) {
+ *(info->hypervisor_paddr_loc) += elfcorehdr;
+ }
if (delete_memmap(memmap_p, elfcorehdr, sz) < 0)
return -1;
cmdline_add_memmap(mod_cmdline, memmap_p);
diff -ru kexec-tools-testing-20080324.org/kexec/crashdump.c kexec-tools-testing-20080324/kexec/crashdump.c
--- kexec-tools-testing-20080324.org/kexec/crashdump.c 2008-03-21 13:16:28.000000000 +0900
+++ kexec-tools-testing-20080324/kexec/crashdump.c 2008-04-22 15:33:47.000000000 +0900
@@ -36,8 +36,10 @@
#define FUNC crash_create_elf64_headers
#define EHDR Elf64_Ehdr
#define PHDR Elf64_Phdr
+#define NHDR Elf64_Nhdr
#include "crashdump-elf.c"
#undef ELF_WIDTH
+#undef NHDR
#undef PHDR
#undef EHDR
#undef FUNC
@@ -46,8 +48,10 @@
#define FUNC crash_create_elf32_headers
#define EHDR Elf32_Ehdr
#define PHDR Elf32_Phdr
+#define NHDR Elf32_Nhdr
#include "crashdump-elf.c"
#undef ELF_WIDTH
+#undef NHDR
#undef PHDR
#undef EHDR
#undef FUNC
diff -ru kexec-tools-testing-20080324.org/kexec/crashdump-elf.c kexec-tools-testing-20080324/kexec/crashdump-elf.c
--- kexec-tools-testing-20080324.org/kexec/crashdump-elf.c 2008-01-11 12:13:48.000000000 +0900
+++ kexec-tools-testing-20080324/kexec/crashdump-elf.c 2008-04-22 15:35:16.000000000 +0900
@@ -1,6 +1,6 @@
-#if !defined(FUNC) || !defined(EHDR) || !defined(PHDR)
-#error FUNC, EHDR and PHDR must be defined
+#if !defined(FUNC) || !defined(EHDR) || !defined(PHDR) || !defined(NHDR)
+#error FUNC, EHDR, PHDR and NHDR must be defined
#endif
#if (ELF_WIDTH == 64)
@@ -37,6 +37,7 @@
uint64_t vmcoreinfo_addr, vmcoreinfo_len;
int has_vmcoreinfo = 0;
int (*get_note_info)(int cpu, uint64_t *addr, uint64_t *len);
+ int has_hypervisor_paddr_start = 0;
if (xen_present())
nr_cpus = xen_get_nr_phys_cpus();
@@ -78,6 +79,11 @@
sz += sizeof(PHDR);
}
+ if (info->hypervisor_paddr_start && xen_present()) {
+ sz += sizeof(PHDR) + sizeof(NHDR) + 4 + sizeof(unsigned long);
+ has_hypervisor_paddr_start = 1;
+ }
+
/*
* Make sure the ELF core header is aligned to at least 1024.
* We do this because the secondary kernel gets the ELF core
@@ -168,6 +174,22 @@
dbgprintf_phdr("vmcoreinfo header", phdr);
}
+ if (has_hypervisor_paddr_start) {
+ phdr = (PHDR *) bufp;
+ bufp += sizeof(PHDR);
+ phdr->p_type = PT_NOTE;
+ phdr->p_flags = 0;
+ phdr->p_offset = phdr->p_paddr = 0;
+ phdr->p_vaddr = 0;
+ phdr->p_filesz = phdr->p_memsz = sizeof(NHDR) + 4 + sizeof(unsigned long);
+ phdr->p_align = 0;
+
+ (elf->e_phnum)++;
+ dbgprintf_phdr("hypervisor phys addr header", phdr);
+
+ info->hypervisor_paddr_loc = (unsigned long *)&phdr->p_offset;
+ }
+
/* Setup an PT_LOAD type program header for the region where
* Kernel is mapped if info->kern_size is non-zero.
*/
@@ -225,6 +247,24 @@
(elf->e_phnum)++;
dbgprintf_phdr("Elf header", phdr);
}
+
+ if (has_hypervisor_paddr_start) {
+ NHDR *nhdr;
+ unsigned int offset = (void *)bufp - *buf;
+
+ nhdr = (NHDR *) bufp;
+ bufp += sizeof(NHDR);
+ nhdr->n_namesz = 4;
+ nhdr->n_descsz = sizeof(unsigned long);
+ nhdr->n_type = 0x1000003;
+ memcpy(bufp, "Xen", 4);
+ bufp += 4;
+ *((unsigned long *)bufp) = info->hypervisor_paddr_start;
+ bufp += sizeof(unsigned long);
+
+ *(info->hypervisor_paddr_loc) = offset;
+ }
+
return 0;
}
diff -ru kexec-tools-testing-20080324.org/kexec/kexec.h kexec-tools-testing-20080324/kexec/kexec.h
--- kexec-tools-testing-20080324.org/kexec/kexec.h 2008-03-21 13:16:28.000000000 +0900
+++ kexec-tools-testing-20080324/kexec/kexec.h 2008-04-22 15:08:57.000000000 +0900
@@ -123,6 +123,8 @@
unsigned long kern_vaddr_start;
unsigned long kern_paddr_start;
unsigned long kern_size;
+ unsigned long hypervisor_paddr_start;
+ unsigned long *hypervisor_paddr_loc;
};
void usage(void);
======================================================================================
--
Itsuro ODA <oda(a)valinux.co.jp>
16 years, 6 months
Re: source line numbers and modules (on x86_64)
by Mike Snitzer
Hi,
I searched the archives and found that you've discussed an issue I'm
seeing with x86_64 kernels where crash doesn't have line numbers for
modules' symbols:
https://www.redhat.com/archives/crash-utility/2008-January/msg00021.html
I'm using crash-4.0-6.3 on a RHEL5U1 x86_64 system with a custom
2.6.22.19 kernel. Given that the RHEL5U1 x86_64 kernels clearly do
provide accurate line numbers for modules, has anyone identified how
that is? I have to believe the redhat kernel is patched to fix this
issue.
I looked over the various redhat patches that are applied to RHEL5's
2.6.18 sources but can't see a patch that stands out as specifically
addressing this x86_64 issue. But I could easily be overlooking some
patch.
please advise, thanks.
Mike
ps. please cc me as I've not yet been able to join the list
16 years, 7 months
crash version 4.0-6.3 is available
by Dave Anderson
- Support for Fedora FC9 kernels containing the linux-2.6.utrace.patch,
which removes the task_struct.parent member. Without the patch, the
crash session fails during initialization with the error message:
"crash: invalid structure member offset: task_struct_parent".
(anderson(a)redhat.com)
- Available in Fedora Rawhide devel branch:
build devel:crash-4.0-6.3
http://koji.fedoraproject.org/koji/buildinfo?buildID=47600
- Further scalability improvements to the "search -k" mechanisms.
(anderson(a)redhat.com)
- Changed ppc64 manner of determining the number of cpus to first check
the cpu_present_map, and only if that doesn't exist, continue to use
the cpu_online_map. Without the patch, depending upon which cpus
were offline, crash sessions could fail during initialization with
the error message: "crash: cannot determine idle task addresses from
init_tasks[] or runqueues[]". (anderson(a)redhat.com)
- Fix/workaround for the ppc64 "bt" command on panic/active tasks when
run against dumpfiles whose kernel had crashed with one or more
cpus offline. Without the patch, the "bt" command could cause a
segmentation violation, or fail because the starting stack location
and instruction pointer were invalid. With the patch, an error
message will be displayed, indicating that the NT_PRSTATUS note for
that task could not be determined. (anderson(a)redhat.com)
- Added support for vtop translation of 1MB large pages available on
new z10 (s390x) systems. (holzheu(a)linux.vnet.ibm.com)
- Prevent misleading init-time warning message for s390/s390x when
verifying the vmlinux file with respect to the host machine type.
Without the patch, this message would appear when running on s390
or s390x machines: "WARNING: machine type mismatch: crash utility:
S390X /usr/lib/debug/lib/modules/2.6.18-86.el5/vmlinux: (unknown)"
(holzheu(a)linux.vnet.ibm.com)
- Minor documentation fix to crash.8 man page, moving the "wr" command
from being munged into the "whatis" description into its own list
entry. (yamato(a)redhat.com)
- Support for running against an x86 xen-syms hypervisor binary based
upon xen 3.1.2 or later. Without the patch, the session would fail
to recognize that it was PAE, and "bt" commands on the non-active
task would fail with the error messages "bt: cannot resolve stack
trace" and "bt: invalid structure size: task_struct".
(oda(a)valinux.co.jp, anderson(a)redhat.com)
- Support for running against an x86_64 xen-syms hypervisor binary
based upon xen 3.1.2 or later. Without the patch, the session would
fail during initialization with the error message: "crash: cannot
resolve idle_pg_table_4". In addition, the x86_64 xen-syms
hypervisor is now relocatable, but the kdump vmcore does not
(currently) export the base physical address of the relocated
hypervisor text and static data. Without that knowledge, the crash
utility cannot make virtual to physical address translations, and
therefore cannot navigate through the vmcore. To address that
shortcoming, a patch is required for either the xen hypervisor code
or the kexec-tools package to export the value of the hypervisor's
"xen_phys_start" symbol to the vmcore. Until such time, however, a
workaround has been put in place to pass the value with a new command
line option that is invoked like so:
# crash --xen_phys_start <address> xen-syms vmcore
The value of the xen_phys_start <address> argument can be
determined in two ways, either from /proc/iomem on the live
system running the dom0 kernel that generated the kdump, or by
running crash on the target vmcore using the dom0 vmlinux file.
For example, on this system, the <address> argument would be
3ee00000:
# cat /proc/iomem | grep Hypervisor
3ee00000-3fdfffff : Hypervisor code and data
#
Alternatively, the vmcore file in this example indicates that the
<address> argument would be 0x3f000000:
# crash vmlinux vmcore
...
crash> px xen_hypervisor_res
xen_hypervisor_res = $3 = {
start = 0x3f000000,
end = 0x3fffffff,
name = 0xffffffff8049ab72 "Hypervisor code and data",
flags = 0x80000200,
parent = 0xffff880000001180,
sibling = 0x0,
child = 0xffff8800000000a8
}
If the --xen_phys_start command line option is not used, the session
will fail during initialization. However there will be a warning
message preceding the failure indicating: "WARNING: This hypervisor
is relocatable; if initialization fails below, try using the
--xen_phys_start <address> command line option". Eventually the
value of the hypervisor's "xen_phys_start" will be passed in the
vmcore header, obviating the need for this workaround.
(oda(a)valinux.co.jp, anderson(a)redhat.com)
Download from: http://people.redhat.com/anderson
16 years, 7 months
the kmem get some errors
by Yang Zhiguo
hi,
When i use kmem command on a IA64 with RHEL5.1 as following, there are
some errors.
Anyone can explain it?
crash> kmem -c
kmem: cannot determine page cache size
crash> kmem -C
kmem: address_space page cache radix tree not supported
crash> kmem -la
kmem: active_list does not exist in this kernel
crash> kmem -La
kmem: active_list does not exist in this kernel
crash> kmem -li
kmem: inactive_list does not exist in this kernel
crash> kmem -Li
kmem: inactive_list does not exist in this kernel
crash> kmem -P e0000001843760c0
<segmentation violation>
Best Regards,
yang
16 years, 7 months
a strange thing
by Yang Zhiguo
hi, all
When i used crash in a bash script file, a strange thing happened.
step 1:
I created a bash script file: test.sh and put the following line to the
file.
crash -i inputfile > stdout
[root@rhel51rc2 gid]# echo "crash -i inputfile > stdout" > test.sh
step 2:
I ran the bash script file.
But i forgot to create the inputfile, so the tesh.sh process hanguped.
[root@rhel51rc2 gid]# chmod 777 test.sh
[root@rhel51rc2 gid]# ./test.sh
step 3:
I killed the process in another terminal.
[root@rhel51rc2 gid]# ps aux | grep crash
root 2290 1.3 2.6 156064 89648 pts/3 S+ 16:53 0:03 crash
-i inputfile
root 2310 0.0 0.0 61280 1792 pts/4 S+ 16:56 0:00 grep crash
[root@rhel51rc2 gid]# kill -9 2290
[root@rhel51rc2 gid]#
step 4:
I continued to use the first terminal. The strange thing happend, that I
could not see
the command(ex. ls) I inputed, but when I pressed Enter, It was
executed. what's wrong?
[root@rhel51rc2 gid]# ./test.sh
./test.sh: line 1: 2290 Killed crash -i inputfile >stdout
[root@rhel51rc2 gid]# a.out CVS gid.c gid.stp Makefile run.sh
stdout test.sh
[root@rhel51rc2 gid]#
Best Regards,
yang
16 years, 7 months
some updates of xencrash
by Itsuro ODA
Hi,
The attached is some fixes which are necessary for xencrash to use
for the newer version of xen (after 3.1.0, ex. 3.1.2, 3.2.0).
* x86
- add some symbols which are used to know the boundary
condition at the tracing in 'bt'.
- fix for the symbol 'idle_pg_table_l3' going away.
* x86_64
- fix for the virtual address area of xen code/data is added.
- fix for the symbol 'idle_pg_table_l4' going away.
- add the option '--xen_phys_start_mfn='
newer version of xen (ex. RHEL5.2, 3.2.0) moves the
physical(machine) address of xen code/data area after
the system started up.
The relocated physical(machine) address of xen code/data
area is necessary to use xencrash.
The value can be got by 'cat /proc/iomem' under dom0.
-------------------------------------------------------
# cat /proc/iomem
...
7e600000-7f5fffff : Hypervisor code and data *** this line
...
-------------------------------------------------------
ex. to use xencrash:
# crash --xen_phys_start_mfn=0x7e600 xen-syms vmcore
- extract 'xen_phys_start_mfn' from the XEN_ELFNOTE_CRASH_INFO
ElfNote.
This is experimental code because it is necessary to modify
the xen to add the xen_phys_start_mfn value in the ElfNote
at crash time. (the patch is also attached in this mail.)
A sample dump is availavle in the following URL:
http://people.valinux.co.jp/~oda/x86_64_dump_080417.tar.gz
This patch is for crash-4.0-6.2.
Thanks.
Itsuro Oda
=== for x86 ===
--- lkcd_x86_trace.c.org 2008-04-17 10:13:05.000000000 +0900
+++ lkcd_x86_trace.c 2008-04-17 10:13:17.000000000 +0900
@@ -1423,6 +1423,7 @@
if (XEN_HYPER_MODE()) {
func_name = kl_funcname(pc);
if (STREQ(func_name, "idle_loop") || STREQ(func_name, "hypercall")
+ || STREQ(func_name, "tracing_off")
|| STREQ(func_name, "handle_exception")) {
UPDATE_FRAME(func_name, pc, 0, sp, bp, asp, 0, 0, bp - sp, 0);
return(trace->nframes);
@@ -1682,6 +1683,7 @@
if (func_name && XEN_HYPER_MODE()) {
if (STREQ(func_name, "continue_nmi") ||
STREQ(func_name, "vmx_asm_vmexit_handler") ||
+ STREQ(func_name, "handle_nmi_mce") ||
STREQ(func_name, "deferred_nmi")) {
/* Interrupt frame */
sp = curframe->fp + 4;
--- x86.c.org 2008-04-17 10:29:21.000000000 +0900
+++ x86.c 2008-04-17 10:31:51.000000000 +0900
@@ -2846,7 +2846,10 @@
*paddr = kvaddr - DIRECTMAP_VIRT_START;
return TRUE;
}
- pgd = (ulonglong *)symbol_value("idle_pg_table_l3");
+ if (symbol_exists("idle_pg_table_l3"))
+ pgd = (ulonglong *)symbol_value("idle_pg_table_l3");
+ else
+ pgd = (ulonglong *)symbol_value("idle_pg_table");
} else {
if (!vt->vmalloc_start) {
*paddr = VTOP(kvaddr);
@@ -4965,7 +4968,8 @@
break;
case PRE_GDB:
- if (symbol_exists("idle_pg_table_l3")) {
+ if (symbol_exists("create_pae_xen_mappings") ||
+ symbol_exists("idle_pg_table_l3")) {
machdep->flags |= PAE;
PGDIR_SHIFT = PGDIR_SHIFT_3LEVEL;
PTRS_PER_PTE = PTRS_PER_PTE_3LEVEL;
=== for x86_64 ===
--- defs.h.org 2008-04-17 15:03:14.000000000 +0900
+++ defs.h 2008-04-17 15:41:10.000000000 +0900
@@ -2162,10 +2162,13 @@
#define FILL_PML4_HYPER() { \
if (!machdep->machspec->last_pml4_read) { \
- readmem(symbol_value("idle_pg_table_4"), KVADDR, \
- machdep->machspec->pml4, PAGESIZE(), "idle_pg_table_4", \
+ unsigned long idle_pg_table = \
+ symbol_exists("idle_pg_table_4") ? symbol_value("idle_pg_table_4") : \
+ symbol_value("idle_pg_table"); \
+ readmem(idle_pg_table, KVADDR, \
+ machdep->machspec->pml4, PAGESIZE(), "idle_pg_table", \
FAULT_ON_ERROR); \
- machdep->machspec->last_pml4_read = symbol_value("idle_pg_table_4"); \
+ machdep->machspec->last_pml4_read = idle_pg_table; \
}\
}
@@ -4081,6 +4084,8 @@
void get_kdump_regs(struct bt_info *, ulong *, ulong *);
void xen_kdump_p2m_mfn(char *);
int is_sadump_xen(void);
+void set_xen_phys_start_mfn(char *);
+ulong xen_phys_start_mfn(void);
/*
* diskdump.c
--- main.c.org 2008-04-17 15:26:37.000000000 +0900
+++ main.c 2008-04-17 15:45:18.000000000 +0900
@@ -48,6 +48,7 @@
{"no_ikconfig", 0, 0, 0},
{"hyper", 0, 0, 0},
{"p2m_mfn", required_argument, 0, 0},
+ {"xen_phys_start_mfn", required_argument, 0, 0},
{"zero_excluded", 0, 0, 0},
{"no_panic", 0, 0, 0},
{"more", 0, 0, 0},
@@ -155,6 +156,9 @@
else if (STREQ(long_options[option_index].name, "p2m_mfn"))
xen_kdump_p2m_mfn(optarg);
+ else if (STREQ(long_options[option_index].name, "xen_phys_start_mfn"))
+ set_xen_phys_start_mfn(optarg);
+
else if (STREQ(long_options[option_index].name, "zero_excluded"))
*diskdump_flags |= ZERO_EXCLUDED;
--- netdump.c.org 2008-04-17 15:09:06.000000000 +0900
+++ netdump.c 2008-04-17 15:48:44.000000000 +0900
@@ -1521,6 +1521,8 @@
*/
if (!nd->xen_kdump_data->p2m_mfn)
nd->xen_kdump_data->p2m_mfn = *(uptr+(words-1));
+ if (words > 9 && !nd->xen_kdump_data->xen_phys_start_mfn)
+ nd->xen_kdump_data->xen_phys_start_mfn = *(uptr+(words-2));
}
}
break;
@@ -1724,6 +1726,8 @@
*/
if (!nd->xen_kdump_data->p2m_mfn)
nd->xen_kdump_data->p2m_mfn = *(up+(words-1));
+ if (words > 9 && !nd->xen_kdump_data->xen_phys_start_mfn)
+ nd->xen_kdump_data->xen_phys_start_mfn = *(up+(words-2));
}
}
break;
@@ -2312,3 +2316,22 @@
return FALSE;
}
+
+void
+set_xen_phys_start_mfn(char *arg)
+{
+ ulong value;
+ int errflag = 0;
+
+ value = htol(arg, RETURN_ON_ERROR|QUIET, &errflag);
+ if (!errflag)
+ xen_kdump_data.xen_phys_start_mfn = value;
+ else
+ error(WARNING, "invalid xen_phys_start_mfn argument: %s\n", arg);
+}
+
+ulong
+xen_phys_start_mfn(void)
+{
+ return nd->xen_kdump_data->xen_phys_start_mfn;
+}
--- netdump.h.org 2008-04-17 15:38:59.000000000 +0900
+++ netdump.h 2008-04-17 15:39:38.000000000 +0900
@@ -109,6 +109,7 @@
ulong accesses;
int p2m_frames;
ulong *p2m_mfn_frame_list;
+ ulong xen_phys_start_mfn;
};
#define KDUMP_P2M_INIT (0x1)
--- x86_64.c.org 2008-04-17 15:34:32.000000000 +0900
+++ x86_64.c 2008-04-17 15:36:53.000000000 +0900
@@ -1401,6 +1401,10 @@
return FALSE;
if (XEN_HYPER_MODE()) {
+ if (XENTEXT_VIRT_ADDR(kvaddr)) {
+ *paddr = kvaddr - __XEN_VIRT_START + (xen_phys_start_mfn() << PAGESHIFT());
+ return TRUE;
+ }
if (DIRECTMAP_VIRT_ADDR(kvaddr)) {
*paddr = kvaddr - DIRECTMAP_VIRT_START;
return TRUE;
--- xen_hyper_defs.h.org 2008-04-17 15:31:23.000000000 +0900
+++ xen_hyper_defs.h 2008-04-17 15:34:15.000000000 +0900
@@ -64,6 +64,9 @@
#define DIRECTMAP_VIRT_START (0xffff830000000000)
#define DIRECTMAP_VIRT_END (0xffff840000000000)
#define PAGE_OFFSET_XEN_HYPER DIRECTMAP_VIRT_START
+#define __XEN_VIRT_START (0xFFFF828C80000000)
+#define XENTEXT_VIRT_ADDR(vaddr) \
+ (((vaddr) >= __XEN_VIRT_START) && ((vaddr) < DIRECTMAP_VIRT_START))
#endif
#ifdef IA64
=== Xen (explanation purpose) ===
--- include/xen/elfcore.h.org 2008-04-17 14:11:41.000000000 +0900
+++ include/xen/elfcore.h 2008-04-17 14:11:57.000000000 +0900
@@ -66,6 +66,7 @@
unsigned long xen_compile_time;
unsigned long tainted;
#ifdef CONFIG_X86
+ unsigned long xen_phys_start_mfn;
unsigned long dom0_pfn_to_mfn_frame_list_list;
#endif
} crash_xen_info_t;
--- arch/x86/crash.c.org 2008-04-17 14:12:51.000000000 +0900
+++ arch/x86/crash.c 2008-04-17 14:13:13.000000000 +0900
@@ -102,6 +102,7 @@
hvm_disable();
info = kexec_crash_save_info();
+ info->xen_phys_start_mfn = xen_phys_start >> PAGE_SHIFT;
info->dom0_pfn_to_mfn_frame_list_list =
arch_get_pfn_to_mfn_frame_list_list(dom0);
}
--
Itsuro ODA <oda(a)valinux.co.jp>
16 years, 7 months
Ramin SHARIATIAN est absent(e).
by ramin.shariatian@ineo.com
Je serai absent(e) à partir du 21/04/2008 de retour le 05/05/2008.
Je répondrai à votre message dès mon retour.
16 years, 7 months
Re: Crash-utility Digest, Vol 31, Issue 10
by Durga Prasad
Hi,
I could not find kernel-debuginfo for SLES 9 kernels.
What do I do ?
On 4/16/08, crash-utility-request(a)redhat.com <
crash-utility-request(a)redhat.com> wrote:
>
> Send Crash-utility mailing list submissions to
> crash-utility(a)redhat.com
>
> To subscribe or unsubscribe via the World Wide Web, visit
> https://www.redhat.com/mailman/listinfo/crash-utility
> or, via email, send a message with subject or body 'help' to
> crash-utility-request(a)redhat.com
>
> You can reach the person managing the list at
> crash-utility-owner(a)redhat.com
>
> When replying, please edit your Subject line so it is more specific
> than "Re: Contents of Crash-utility digest..."
>
>
> Today's Topics:
>
> 1. x86_64 crash (cross-platform) (Cliff Wickman)
> 2. Re: x86_64 crash (cross-platform) (Chip Coldwell)
>
>
> ----------------------------------------------------------------------
>
> Message: 1
> Date: Tue, 15 Apr 2008 15:34:38 -0500
> From: Cliff Wickman <cpw(a)sgi.com>
> Subject: [Crash-utility] x86_64 crash (cross-platform)
> To: anderson(a)redhat.com
> Cc: crash-utility(a)redhat.com
> Message-ID: <E1JlrrS-0002i7-Oe(a)eag09.americas.sgi.com>
>
>
> Hi Dave,
>
> We've recently seen lcrash fail with a floating point exception
> very early in initialization.
> Seems to be a binary compiled under one distribution and then
> executed on another.
>
> And the solution seems to be the -Wl,--hash-style=both option.
>
> Have others reported this?
> There must be a cleaner solution than in my notes below. That is,
> an easier way to link with this option.
>
> Thanks.
> -Cliff
> --------------------------------------------------------------------------
>
> To make crash so that it runs under SLES or RedHat:
> cd /home/estes02/cpw/crash-4.0-6.2
> make
>
> Compile it on a SLES system:
> good:
> cpw@spandau:/tmp/cpw/crash-4.0-6.2> readelf -S crash | grep -A1 hash
> [ 4] .hash HASH 0000000000400290 00000290
> 0000000000009490 0000000000000004 A 5 0 8
> on a RHEL:
> bad:
> cpw@alcatraz crash-4.0-6.2 $ readelf -S crash | grep -A1 hash
> [ 3] .gnu.hash GNU_HASH 0000000000400240 00000240
> 000000000000a040 0000000000000000 A 4 0 8
> OR
>
> vi gdb-6.1/gdb/configure
> ---
> if test "$GCC" = yes; then
> CFLAGS="-g -O2 -Wl,--hash-style=both"
> else
> CFLAGS="-g -Wl,--hash-style=both"
> fi
> else
> if test "$GCC" = yes; then
> CFLAGS="-O2 -Wl,--hash-style=both"
> else
> CFLAGS="-Wl,--hash-style=both"
> ---
> make
> readelf -S crash | grep -A1 hash (should show GNU form)
>
> cpw@alcatraz crash-4.0-6.2 $ readelf -S crash | grep -A1 hash
> [ 3] .hash HASH 0000000000400240 00000240
> 0000000000009490 0000000000000004 A 5 0 8
> [ 4] .gnu.hash GNU_HASH 00000000004096d0 000096d0
> 000000000000a040 0000000000000000 A 5 0 8
> ------------------------------------------------------------------------
>
>
>
> ------------------------------
>
> Message: 2
> Date: Tue, 15 Apr 2008 16:43:30 -0400 (EDT)
> From: Chip Coldwell <coldwell(a)redhat.com>
> Subject: Re: [Crash-utility] x86_64 crash (cross-platform)
> To: "Discussion list for crash utility usage, maintenance and
> development" <crash-utility(a)redhat.com>
> Message-ID:
> <alpine.LRH.1.00.0804151642150.32524(a)bogart.boston.redhat.com>
> Content-Type: TEXT/PLAIN; charset=US-ASCII
>
> On Tue, 15 Apr 2008, Cliff Wickman wrote:
>
> >
> > Hi Dave,
> >
> > We've recently seen lcrash fail with a floating point exception
> > very early in initialization.
> > Seems to be a binary compiled under one distribution and then
> > executed on another.
> >
> > And the solution seems to be the -Wl,--hash-style=both option.
> >
> > Have others reported this?
>
> This isn't specific to crash; it's the result of a change in the
> Linux/glibc ABI. Specifically, the new ABI has a .hash.gnu section
> whereas the old one has .hash. Your linker option above causes both
> sections to be included, so that the old dynamic loader can load a
> binary built with a new toolchain.
>
> Chip
>
> --
> Charles M. "Chip" Coldwell
> Senior Software Engineer
> Red Hat, Inc
> 978-392-2426
>
> GPG ID: 852E052F
> GPG FPR: 77E5 2B51 4907 F08A 7E92 DE80 AFA9 9A8F 852E 052F
>
>
>
> ------------------------------
>
> --
> Crash-utility mailing list
> Crash-utility(a)redhat.com
> https://www.redhat.com/mailman/listinfo/crash-utility
>
>
> End of Crash-utility Digest, Vol 31, Issue 10
> *********************************************
>
--
---------
Keep dreaming, for dreams are the precursors of things to come real.. @!#$
16 years, 7 months
crash no longer works with x86_64 xen-syms / kdump-vmcore (xen 3.1.2-based)
by Dave Anderson
Hi Oda-san,
We have a problem with the RHEL5.2 xen hypervisor kdump vmcores.
The RHEL5.2 hypervisor sources have been upgraded to xen version 3.1.2,
and the x86_64 hypervisor is now relocatable to a dynamically-determined
physical address at boot-time. That being the case, the crash utility
cannot possibly work with xen-syms/vmcore pairs without knowing how to
translate hypervisor virtual addresses to their physical counterparts.
The makedumpfile utility would run into the same problem.
I suspect you may have seen this working with upstream xen?
This is how I understand it:
Prior to RHEL5.2, the hypervisor's text and static data region was
direct-mapped. There was (and still is) a direct-mapped region defined
like so:
DIRECTMAP_VIRT_START 0xffff830000000000
DIRECTMAP_VIRT_END 0xffff840000000000
and the hypervisor text and static data was located inside of that
direct-mapped region:
# nm -Bn xen-syms-2.6.18-53.el5.debug
ffff830000100000 A _start
ffff830000100000 A _stext
ffff830000100000 T start
ffff830000100048 t bad_cpu
ffff83000010004f t not_multiboot
ffff830000100054 t print_err
ffff830000100075 t __start
... [ snip ] ...
ffff83000020e6e0 b model
ffff83000020e700 b cpu_msrs
ffff83000020e900 b saved_lvtpc
ffff83000020ea00 b reset_value
ffff83000020ea40 b reset_value
ffff83000020ea60 b reset_value
ffff83000020ea80 A _end
#
Because hypervisor text/static-data it was direct-mapped, the
virtual-to-physical address translation was simple -- only requiring
that the DIRECTMAP_VIRT_START identifier (0xffff830000000000) be
subtracted from the virtual address, leaving the physical address.
In the upgraded RHEL5.2 hypervisor, the kernel text and static data
is dynamically relocated at boot time, to a location that is based
upon the physical memory layout of the host machine. It has its own
1GB mapped region, that exists here:
XEN_VIRT_START 0xffff828c80000000
XEN_VIRT_END 0xffff828cc0000000
# nm -Bn xen-syms-2.6.18-89.el5.debug
ffff828c80100000 A _start
ffff828c80100000 A _stext
ffff828c80100000 T start
ffff828c80100014 t __high_start
ffff828c801000b7 t int_msg
ffff828c801000d7 t hex_msg
... [ snip ] ...
ffff828c8024e4a0 b cpu_msrs
ffff828c8024e8a0 b saved_lvtpc
ffff828c8024eaa0 b cpu_type
ffff828c8024eac0 b reset_value
ffff828c8024eb00 b reset_value
ffff828c8024eb20 b reset_value
ffff828c8024eb40 A _end
#
So translating hypervisor virtual addresses to their physical
address can no longer be done by simply subtracting a direct-map
identifier like before.
Note that the hypervisor code's new version of __pa(), for example,
ends up doing this:
if ( va > DIRECTMAP_VIRT_START )
return va - DIRECTMAP_VIRT_START;
return va - XEN_VIRT_START + xen_phys_start;
where the value of "xen_phys_start" is the base physical address
of the relocated hypervisor text and static data.
Again, there is the problem -- when crash is looking at a xen-syms
binary and a vmcore, it does not know the value of "xen_phys_start"
and therefore cannot translate hypervisor virtual addresses, and
therefore is completely useless.
It seems to me that the xen kdump mechanism needs to be modified
to store the "xen_phys_start" value in the vmcore someplace?
Do have any thoughts on how best to address this?
Thanks,
Dave
16 years, 7 months
x86_64 crash (cross-platform)
by Cliff Wickman
Hi Dave,
We've recently seen lcrash fail with a floating point exception
very early in initialization.
Seems to be a binary compiled under one distribution and then
executed on another.
And the solution seems to be the -Wl,--hash-style=both option.
Have others reported this?
There must be a cleaner solution than in my notes below. That is,
an easier way to link with this option.
Thanks.
-Cliff
--------------------------------------------------------------------------
To make crash so that it runs under SLES or RedHat:
cd /home/estes02/cpw/crash-4.0-6.2
make
Compile it on a SLES system:
good:
cpw@spandau:/tmp/cpw/crash-4.0-6.2> readelf -S crash | grep -A1 hash
[ 4] .hash HASH 0000000000400290 00000290
0000000000009490 0000000000000004 A 5 0 8
on a RHEL:
bad:
cpw@alcatraz crash-4.0-6.2 $ readelf -S crash | grep -A1 hash
[ 3] .gnu.hash GNU_HASH 0000000000400240 00000240
000000000000a040 0000000000000000 A 4 0 8
OR
vi gdb-6.1/gdb/configure
---
if test "$GCC" = yes; then
CFLAGS="-g -O2 -Wl,--hash-style=both"
else
CFLAGS="-g -Wl,--hash-style=both"
fi
else
if test "$GCC" = yes; then
CFLAGS="-O2 -Wl,--hash-style=both"
else
CFLAGS="-Wl,--hash-style=both"
---
make
readelf -S crash | grep -A1 hash (should show GNU form)
cpw@alcatraz crash-4.0-6.2 $ readelf -S crash | grep -A1 hash
[ 3] .hash HASH 0000000000400240 00000240
0000000000009490 0000000000000004 A 5 0 8
[ 4] .gnu.hash GNU_HASH 00000000004096d0 000096d0
000000000000a040 0000000000000000 A 5 0 8
------------------------------------------------------------------------
16 years, 7 months