Re: [Crash-utility] [RFC]Patch to add sparse memory support
by Dave Anderson
Thanks -- this one works OK...
Dave
Jeff Moyer wrote:
> try this:
>
> This patch add sparsemem support to crash.
> Per-node memory display functions (kmem -n) are not yet working
> for sparsemem kernels. This will come in a later patch.
>
> Signed-off-by: David Wilder <dwilder(a)us.ibm.com
>
> diff -Naur crash-4.0-2.21/defs.h crash-4.0-2.21-sparse.4/defs.h
> --- crash-4.0-2.21/defs.h 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/defs.h 2006-03-17 11:58:32.000000000 -0800
> @@ -663,6 +663,10 @@
> int ptrs_per_pgd;
> char *cmdline_arg;
> struct machine_specific *machspec;
> + ulong section_size_bits;
> + ulong max_physaddr_bits;
> + ulong max_physmem_bits;
> + ulong sections_per_root;
> };
>
> /*
> @@ -1230,6 +1234,7 @@
> long x8664_pda_cpunumber;
> long x8664_pda_me;
> long tss_struct_ist;
> + long mem_section_section_mem_map;
> };
>
> struct size_table { /* stash of commonly-used sizes */
> @@ -1464,6 +1469,7 @@
> ulong cached_vma_hits[VMA_CACHE];
> int vma_cache_index;
> ulong vma_cache_fills;
> + void *mem_sec;
> };
>
> #define NODES (0x1)
> @@ -1474,7 +1480,15 @@
> #define V_MEM_MAP (0x20)
> #define PERCPU_KMALLOC_V2 (0x40)
> #define KMEM_CACHE_UNAVAIL (0x80)
> -#define DISCONTIGMEM (0x100)
> +#define FLATMEM (0x100)
> +#define DISCONTIGMEM (0x200)
> +#define SPARSEMEM (0x400)
> +#define SPARSEMEM_EX (0x800)
> +
> +#define IS_FLATMEM() (vt->flags & FLATMEM)
> +#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
> +#define IS_SPARSEMEM() (vt->flags & SPARSEMEM)
> +#define IS_SPARSEMEM_EX() (vt->flags & SPARSEMEM_EX)
>
> #define COMMON_VADDR_SPACE() (vt->flags & COMMON_VADDR)
> #define PADDR_PRLEN (vt->paddr_prlen)
> @@ -1703,6 +1717,34 @@
> #define VIRTPAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask)
> #define PHYSPAGEBASE(X) (((physaddr_t)(X)) & (physaddr_t)machdep->pagemask)
>
> +/*
> + * Sparse memory stuff
> + * These must follow the definitions in the kernel mmzone.h
> + */
> +#define SECTION_SIZE_BITS() (machdep->section_size_bits)
> +#define MAX_PHYSADDR_BITS() (machdep->max_physaddr_bits)
> +#define MAX_PHYSMEM_BITS() (machdep->max_physmem_bits)
> +#define SECTIONS_SHIFT() (MAX_PHYSMEM_BITS() - SECTION_SIZE_BITS())
> +#define PA_SECTION_SHIFT() (SECTION_SIZE_BITS())
> +#define PFN_SECTION_SHIFT() (SECTION_SIZE_BITS() - PAGESHIFT())
> +#define NR_MEM_SECTIONS() (1UL << SECTIONS_SHIFT())
> +#define PAGES_PER_SECTION() (1UL << PFN_SECTION_SHIFT())
> +#define PAGE_SECTION_MASK() (~(PAGES_PER_SECTION()-1))
> +
> +#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT())
> +#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT())
> +
> +#define SECTIONS_PER_ROOT() (machdep->sections_per_root)
> +
> +/* CONFIG_SPARSEMEM_EXTREME */
> +#define _SECTIONS_PER_ROOT_EXTREME() (PAGESIZE() / STRUCT_SIZE("mem_section"))
> +/* !CONFIG_SPARSEMEM_EXTREME */
> +#define _SECTIONS_PER_ROOT() (1)
> +
> +#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT())
> +#define NR_SECTION_ROOTS() (NR_MEM_SECTIONS() / SECTIONS_PER_ROOT())
> +#define SECTION_ROOT_MASK() (SECTIONS_PER_ROOT() - 1)
> +
> /*
> * Machine specific stuff
> */
> @@ -1747,6 +1789,16 @@
>
> #define TIF_SIGPENDING (2)
>
> +// CONFIG_X86_PAE
> +#define _SECTION_SIZE_BITS_PAE 30
> +#define _MAX_PHYSADDR_BITS_PAE 36
> +#define _MAX_PHYSMEM_BITS_PAE 36
> +
> +// !CONFIG_X86_PAE
> +#define _SECTION_SIZE_BITS 26
> +#define _MAX_PHYSADDR_BITS 32
> +#define _MAX_PHYSMEM_BITS 32
> +
> #endif /* X86 */
>
> #ifdef X86_64
> @@ -1861,6 +1913,10 @@
> #define VALID_LEVEL4_PGT_ADDR(X) \
> (((X) == VIRTPAGEBASE(X)) && IS_KVADDR(X) && !IS_VMALLOC_ADDR(X))
>
> +#define _SECTION_SIZE_BITS 27
> +#define _MAX_PHYSADDR_BITS 40
> +#define _MAX_PHYSMEM_BITS 40
> +
> #endif /* X86_64 */
>
> #ifdef ALPHA
> @@ -1939,6 +1995,10 @@
>
> #define TIF_SIGPENDING (2)
>
> +#define _SECTION_SIZE_BITS 24
> +#define _MAX_PHYSADDR_BITS 44
> +#define _MAX_PHYSMEM_BITS 44
> +
> #endif /* PPC */
>
> #ifdef IA64
> @@ -1963,6 +2023,9 @@
> #define KERNEL_UNCACHED_BASE ((ulong)KERNEL_UNCACHED_REGION << REGION_SHIFT)
> #define KERNEL_CACHED_BASE ((ulong)KERNEL_CACHED_REGION << REGION_SHIFT)
>
> +#define _SECTION_SIZE_BITS 30
> +#define _MAX_PHYSMEM_BITS 50
> +
> /*
> * As of 2.6, these are no longer straight forward.
> */
> @@ -2168,6 +2231,10 @@
> #define STACK_FRAME_OVERHEAD 112
> #define EXCP_FRAME_MARKER 0x7265677368657265
>
> +#define _SECTION_SIZE_BITS 24
> +#define _MAX_PHYSADDR_BITS 44
> +#define _MAX_PHYSMEM_BITS 44
> +
> #endif /* PPC64 */
>
> #ifdef S390
> diff -Naur crash-4.0-2.21/ia64.c crash-4.0-2.21-sparse.4/ia64.c
> --- crash-4.0-2.21/ia64.c 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/ia64.c 2006-03-16 11:28:57.000000000 -0800
> @@ -206,6 +206,9 @@
> ARRAY_LENGTH_INIT(machdep->nr_irqs, irq_desc,
> "_irq_desc", NULL, 0);
> machdep->hz = 1024;
> + machdep->section_size_bits = _SECTION_SIZE_BITS;
> + machdep->max_physaddr_bits = _MAX_PHYSADDR_BITS;
> + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
> ia64_create_memmap();
> break;
>
> diff -Naur crash-4.0-2.21/memory.c crash-4.0-2.21-sparse.4/memory.c
> --- crash-4.0-2.21/memory.c 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/memory.c 2006-03-17 13:47:17.000000000 -0800
> @@ -132,8 +132,18 @@
> static void do_vm_flags(ulong);
> static void PG_reserved_flag_init(void);
> static ulong nr_blockdev_pages(void);
> -
> -
> +void sparse_mem_init(void);
> +void list_mem_sections(void);
> +void sparse_dump_mem_map(void);
> +ulong sparse_decode_mem_map(ulong, ulong);
> +char *read_mem_section(ulong);
> +ulong nr_to_section(ulong);
> +int valid_section(ulong);
> +int section_has_mem_map(ulong);
> +ulong section_mem_map_addr(ulong);
> +ulong valid_section_nr(ulong);
> +void list_mem_sections(void);
> +ulong pfn_to_map(ulong);
>
> /*
> * Memory display modes specific to this file.
> @@ -447,10 +457,17 @@
> error(FATAL, "no swapper_pg_dir or cpu_pgd symbols exist?\n");
>
> get_symbol_data("high_memory", sizeof(ulong), &vt->high_memory);
> - if (kernel_symbol_exists("mem_map"))
> +
> + if (kernel_symbol_exists("mem_map")){
> get_symbol_data("mem_map", sizeof(char *), &vt->mem_map);
> + vt->flags |= FLATMEM;
> + } else if (kernel_symbol_exists("mem_section"))
> + vt->flags |= SPARSEMEM;
> else
> vt->flags |= DISCONTIGMEM;
> +
> + sparse_mem_init();
> +
> vt->vmalloc_start = machdep->vmalloc_start();
> if (IS_VMALLOC_ADDR(vt->mem_map))
> vt->flags |= V_MEM_MAP;
> @@ -3480,9 +3497,9 @@
> static void
> dump_mem_map(struct meminfo *mi)
> {
> - long i, n;
> + long node;
> long total_pages;
> - int others, page_not_mapped, phys_not_mapped;
> + int others, page_not_mapped, phys_not_mapped, page_mapping;
> ulong pp, ppend;
> physaddr_t phys, physend;
> ulong tmp, reserved, shared, slabs;
> @@ -3502,6 +3519,7 @@
> char buf4[BUFSIZE];
> char *page_cache;
> char *pcache;
> + ulong section, section_nr, nr_mem_sections, node_or_section_size;
>
> v22 = VALID_MEMBER(page_inode); /* page.inode vs. page.mapping */
>
> @@ -3589,22 +3607,63 @@
> done = FALSE;
> total_pages = 0;
>
> - for (n = 0; n < vt->numnodes; n++) {
> + if (IS_SPARSEMEM()){
> + nr_mem_sections = NR_MEM_SECTIONS();
> + }else{
> + nr_mem_sections = 1;
> + }
> +
> + /* iterate over all possable section */
> + node = 0;
> + for (section_nr = 0; section_nr < nr_mem_sections ; section_nr++){
> +
> +
> + if (CRASHDEBUG(2))
> + printf("section_nr = %d\n",section_nr);
> +
> + /* If we are looking up a spicific address jump directly
> + to the section with that page */
> + if (IS_SPARSEMEM() && mi->flags&ADDRESS_SPECIFIED){
> + ulong pfn = mi->spec_addr >> PAGESHIFT();
> + section_nr=pfn_to_section_nr(pfn);
> + }
> +
> + if(IS_SPARSEMEM() && !(section=valid_section_nr(section_nr))){
> + /*break; on a real sparsemem system we need to check
> + * every section as gaps may exist. But this
> + * can be slow. If we know we dont have gaps
> + * just stop validating sections when we
> + * get to the end of the valid ones.
> + * In the future find a way to short circuit
> + * this loop.
> + */
> + continue;
> + }
> +
> if (print_hdr) {
> - fprintf(fp, "%s%s", n ? "\n" : "", hdr);
> + fprintf(fp, "%s%s", node ? "\n" : "", hdr);
> print_hdr = FALSE;
> }
>
> - nt = &vt->node_table[n];
> - total_pages += nt->size;
> - pp = nt->mem_map;
> - phys = nt->start_paddr;
> - if ((vt->flags & V_MEM_MAP) && (vt->numnodes == 1))
> - node_size = vt->max_mapnr;
> - else
> - node_size = nt->size;
> +next_node:
> + if (IS_SPARSEMEM()){
> + pp = section_mem_map_addr(section);
> + pp = sparse_decode_mem_map(pp,section_nr);
> + phys = section_nr*PAGES_PER_SECTION()*PAGESIZE();
> + node_or_section_size = PAGES_PER_SECTION();
> + }else{
> + nt = &vt->node_table[node];
> + total_pages += nt->size;
> + pp = nt->mem_map;
> + phys = nt->start_paddr;
> + if ((vt->flags & V_MEM_MAP) && (vt->numnodes == 1))
> + node_or_section_size = vt->max_mapnr;
> + else
> + node_or_section_size = nt->size;
> + }
>
> - for (i = 0; i < node_size;
> + ulong i;
> + for (i = 0; i < node_or_section_size;
> i++, pp += SIZE(page), phys += PAGESIZE()) {
>
> if ((i % PGMM_CACHED) == 0) {
> @@ -3693,11 +3752,12 @@
> }
> continue;
> }
> + page_mapping = VALID_MEMBER(page_mapping);
>
> if (v22) {
> inode = ULONG(pcache + OFFSET(page_inode));
> offset = ULONG(pcache + OFFSET(page_offset));
> - } else {
> + } else if (page_mapping) {
> mapping = ULONG(pcache +
> OFFSET(page_mapping));
> index = ULONG(pcache + OFFSET(page_index));
> @@ -3740,6 +3800,20 @@
> space(MINSPACE),
> mkstring(buf4, 8, CENTER|RJUST, " "),
> " ");
> + else if (!page_mapping)
> + fprintf(fp, "%s%s%s%s%s%s%s %2d ",
> + mkstring(buf0, VADDR_PRLEN,
> + LJUST|LONG_HEX, MKSTR(pp)),
> + space(MINSPACE),
> + mkstring(buf1, MAX(PADDR_PRLEN,
> + strlen("PHYSICAL")),
> + RJUST|LONGLONG_HEX, MKSTR(&phys)),
> + space(MINSPACE),
> + mkstring(buf3, VADDR_PRLEN,
> + CENTER|RJUST, "-------"),
> + space(MINSPACE),
> + mkstring(buf4, 8, CENTER|RJUST, "-----"),
> + count);
> else
> fprintf(fp, "%s%s%s%s%s%s%8ld %2d ",
> mkstring(buf0, VADDR_PRLEN,
> @@ -3801,6 +3875,7 @@
> fprintf(fp, "\n");
> } else if (THIS_KERNEL_VERSION > LINUX(2,4,9)) {
> fprintf(fp, "%lx\n", flags);
> + node=node;
> } else {
>
> if ((flags >> v24_PG_locked) & 1)
> @@ -3864,9 +3939,12 @@
> if (done)
> break;
> }
> -
> if (done)
> break;
> + node++;
> + if ( (node < vt->numnodes) && (!IS_SPARSEMEM())) {
> + goto next_node;
> + }
> }
>
> switch (mi->flags)
> @@ -8904,6 +8982,16 @@
> physaddr_t pstart, pend;
> ulong node_size;
>
> + if (IS_SPARSEMEM()){
> + ulong map;
> + map = pfn_to_map(phys >> PAGESHIFT());
> + if ( map ){
> + *pp = map;
> + return TRUE;
> + }
> + return FALSE;
> + }
> +
> for (n = 0; n < vt->numnodes; n++) {
> nt = &vt->node_table[n];
> if ((vt->flags & V_MEM_MAP) && (vt->numnodes == 1))
> @@ -9009,8 +9097,12 @@
> fprintf(fp, "%sV_MEM_MAP", others++ ? "|" : "");
> if (vt->flags & KMEM_CACHE_UNAVAIL)
> fprintf(fp, "%sKMEM_CACHE_UNAVAIL", others++ ? "|" : "");
> - if (vt->flags & DISCONTIGMEM)
> - fprintf(fp, "%sDISCONTIGMEM", others++ ? "|" : "");
> + if (vt->flags & FLATMEM)
> + fprintf(fp, "%sFLATMEM", others++ ? "|" : "");
> + if (vt->flags & SPARSEMEM)
> + fprintf(fp, "%sSPARSEMEM", others++ ? "|" : "");\
> + if (vt->flags & SPARSEMEM_EX)
> + fprintf(fp, "%sSPARSEMEM_EX", others++ ? "|" : "");\
> fprintf(fp, ")\n");
> if (vt->kernel_pgd[0] == vt->kernel_pgd[1])
> fprintf(fp, " kernel_pgd[NR_CPUS]: %lx ...\n",
> @@ -10015,6 +10107,11 @@
> char buf5[BUFSIZE];
> struct node_table *nt;
>
> + if (IS_SPARSEMEM() && !initialize){
> + error(WARNING,"Per node memory data is not available for this kernel\n");
> + return;
> + }
> +
> if (!(vt->flags & NODES)) {
> if (!initialize)
> error(FATAL,
> @@ -10054,12 +10151,6 @@
> "node_mem_map", FAULT_ON_ERROR);
> } else {
> node_mem_map = BADADDR;
> - if (!badaddr && initialize) {
> - error(INFO,
> - "pglist_data.node_mem_map structure member does not exist.\n");
> - error(INFO,
> - "certain memory-related commands will fail or display invalid data\n\n");
> - }
> badaddr = TRUE;
> }
>
> @@ -10570,3 +10661,172 @@
> return retval;
> }
>
> +
> +/* Functions for sparse mem support */
> +ulong sparse_decode_mem_map(ulong coded_mem_map, ulong section_nr)
> +{
> + return coded_mem_map + (section_nr_to_pfn(section_nr)*STRUCT_SIZE("struct page"));
> +}
> +
> +void
> +sparse_mem_init(void)
> +{
> + ulong addr;
> + ulong mem_section_size;
> +
> + if (!IS_SPARSEMEM())
> + return;
> +
> + /* How can I tell if SPARSEMEM_EX is set? */
> + vt->flags |= SPARSEMEM_EX;
> +
> +
> + if (IS_SPARSEMEM_EX()){
> + machdep->sections_per_root = _SECTIONS_PER_ROOT_EXTREME();
> + mem_section_size = sizeof(void *)*NR_SECTION_ROOTS();
> + }else{
> + machdep->sections_per_root = _SECTIONS_PER_ROOT();
> + mem_section_size = STRUCT_SIZE("struct mem_section")*NR_SECTION_ROOTS();
> + }
> +
> + if (CRASHDEBUG(1)){
> + fprintf(fp, "PAGESIZE=%d\n",PAGESIZE());
> + fprintf(fp,"mem_section_size = %d\n",mem_section_size);
> + fprintf(fp, "NR_SECTION_ROOTS=%d\n", NR_SECTION_ROOTS());
> + fprintf(fp, "NR_MEM_SECTIONS=%d\n",NR_MEM_SECTIONS());
> + fprintf(fp, "SECTIONS_PER_ROOT = %d\n",SECTIONS_PER_ROOT() );
> + fprintf(fp, "SECTION_ROOT_MASK=0x%x\n", SECTION_ROOT_MASK());
> + fprintf(fp, "PAGES_PER_SECTION=%d\n", PAGES_PER_SECTION());
> + }
> +
> + if (!(vt->mem_sec = malloc(mem_section_size)))
> + error(FATAL, "cannot malloc mem_sec cache\n");
> +
> + addr = symbol_value("mem_section");
> + readmem(addr, KVADDR,vt->mem_sec ,mem_section_size,
> + "memory section root table", FAULT_ON_ERROR);
> +
> + MEMBER_OFFSET_INIT(mem_section_section_mem_map, "mem_section","section_mem_map");
> +}
> +
> +char
> +*read_mem_section(ulong addr)
> +{
> + static char *mem_section;
> +
> + if (!mem_section){
> + mem_section = GETBUF(STRUCT_SIZE("struct mem_section"));
> + }
> +
> + if (!IS_KVADDR(addr))
> + return 0;
> +
> + readmem(addr,KVADDR,mem_section,STRUCT_SIZE("struct mem_section"),
> + "memory section", FAULT_ON_ERROR);
> +
> + return mem_section;
> +}
> +
> +ulong
> +nr_to_section(ulong nr)
> +{
> + ulong addr;
> +
> + ulong *mem_sec = vt->mem_sec;
> + if (!IS_KVADDR(mem_sec[SECTION_NR_TO_ROOT(nr)]))
> + return 0;
> +
> + if (IS_SPARSEMEM_EX())
> + addr=mem_sec[SECTION_NR_TO_ROOT(nr)] + (nr & SECTION_ROOT_MASK())* STRUCT_SIZE("struct mem_section");
> + else
> + addr=mem_sec[0] + (nr & SECTION_ROOT_MASK())* STRUCT_SIZE("struct mem_section");
> +
> + if (!IS_KVADDR(addr))
> + return 0;
> +
> + return addr;
> +}
> +
> +/*
> + * We use the lower bits of the mem_map pointer to store
> + * a little bit of information. There should be at least
> + * 3 bits here due to 32-bit alignment.
> + */
> +#define SECTION_MARKED_PRESENT (1UL<<0)
> +#define SECTION_HAS_MEM_MAP (1UL<<1)
> +#define SECTION_MAP_LAST_BIT (1UL<<2)
> +#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
> +
> +
> +int valid_section(ulong addr)
> +{
> + char *mem_section;
> +
> + if ((mem_section = read_mem_section(addr)))
> + return (ULONG(mem_section+OFFSET(mem_section_section_mem_map))
> + && SECTION_MARKED_PRESENT);
> + return 0;
> +}
> +
> +int section_has_mem_map(ulong addr)
> +{
> + char *mem_section;
> +
> + if ((mem_section = read_mem_section(addr)))
> + return (ULONG(mem_section+OFFSET(mem_section_section_mem_map))
> + && SECTION_HAS_MEM_MAP);
> + return 0;
> +}
> +
> +ulong section_mem_map_addr(ulong addr)
> +{
> + char *mem_section;
> + ulong map;
> +
> + if ((mem_section = read_mem_section(addr))){
> + map = ULONG(mem_section+OFFSET(mem_section_section_mem_map));
> + map &= SECTION_MAP_MASK;
> + return map;
> + }
> + return 0;
> +}
> +
> +
> +ulong valid_section_nr(ulong nr)
> +{
> + ulong addr = nr_to_section(nr);
> + if(valid_section(addr))
> + return addr;
> + return 0;
> +}
> +
> +ulong pfn_to_map(ulong pfn)
> +{
> + ulong section, page_offset;
> +
> + section=pfn_to_section_nr(pfn);
> + if (section_has_mem_map(section)){
> + page_offset = pfn - section_nr_to_pfn(section);
> + return(section_mem_map_addr(section) + (page_offset*PAGESIZE()));
> + }
> + return 0;
> +}
> +
> +void list_mem_sections(void)
> +{
> + ulong nr,addr;
> + ulong nr_mem_sections = NR_MEM_SECTIONS();
> + ulong coded_mem_map;
> +
> + for (nr = 0; nr <= nr_mem_sections ; nr++){
> + if( (addr=valid_section_nr(nr)) ){
> + coded_mem_map=section_mem_map_addr(addr);
> + fprintf(fp,"nr=%d section = %llx coded_mem_map=%lx pfn=%d mem_map=%lx\n",
> + nr,
> + addr,
> + coded_mem_map,
> + section_nr_to_pfn(nr),
> + sparse_decode_mem_map(coded_mem_map,nr));
> + }
> + }
> +}
> diff -Naur crash-4.0-2.21/ppc64.c crash-4.0-2.21-sparse.4/ppc64.c
> --- crash-4.0-2.21/ppc64.c 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/ppc64.c 2006-03-17 12:14:28.000000000 -0800
> @@ -162,7 +162,9 @@
> m->l3_shift = m->l2_shift + m->l2_index_size;
> m->l4_shift = m->l3_shift + m->l3_index_size;
> }
> -
> + machdep->section_size_bits = _SECTION_SIZE_BITS;
> + machdep->max_physaddr_bits = _MAX_PHYSADDR_BITS;
> + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
> ppc64_paca_init();
> machdep->vmalloc_start = ppc64_vmalloc_start;
> MEMBER_OFFSET_INIT(thread_struct_pg_tables,
> diff -Naur crash-4.0-2.21/ppc.c crash-4.0-2.21-sparse.4/ppc.c
> --- crash-4.0-2.21/ppc.c 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/ppc.c 2006-03-17 12:20:36.000000000 -0800
> @@ -138,6 +138,9 @@
> machdep->hz = HZ;
> if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
> machdep->hz = 1000;
> + machdep->section_size_bits = _SECTION_SIZE_BITS;
> + machdep->max_physaddr_bits = _MAX_PHYSADDR_BITS;
> + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
> break;
>
> case POST_INIT:
> diff -Naur crash-4.0-2.21/s390.c crash-4.0-2.21-sparse.4/s390.c
> --- crash-4.0-2.21/s390.c 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/s390.c 2006-03-16 11:28:57.000000000 -0800
> @@ -159,6 +159,9 @@
> machdep->vmalloc_start = s390_vmalloc_start;
> machdep->dump_irq = s390_dump_irq;
> machdep->hz = HZ;
> + machdep->section_size_bits = _SECTION_SIZE_BITS;
> + machdep->max_physaddr_bits = _MAX_PHYSADDR_BITS;
> + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
> break;
>
> case POST_INIT:
> diff -Naur crash-4.0-2.21/s390x.c crash-4.0-2.21-sparse.4/s390x.c
> --- crash-4.0-2.21/s390x.c 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/s390x.c 2006-03-16 11:28:57.000000000 -0800
> @@ -174,6 +174,9 @@
> machdep->vmalloc_start = s390x_vmalloc_start;
> machdep->dump_irq = s390x_dump_irq;
> machdep->hz = HZ;
> + machdep->section_size_bits = _SECTION_SIZE_BITS;
> + machdep->max_physaddr_bits = _MAX_PHYSADDR_BITS;
> + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
> break;
>
> case POST_INIT:
> diff -Naur crash-4.0-2.21/x86_64.c crash-4.0-2.21-sparse.4/x86_64.c
> --- crash-4.0-2.21/x86_64.c 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/x86_64.c 2006-03-16 11:28:57.000000000 -0800
> @@ -211,6 +211,9 @@
> machdep->hz = HZ;
> if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
> machdep->hz = 1000;
> + machdep->section_size_bits = _SECTION_SIZE_BITS;
> + machdep->max_physaddr_bits = _MAX_PHYSADDR_BITS;
> + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
> break;
>
> case POST_INIT:
> diff -Naur crash-4.0-2.21/x86.c crash-4.0-2.21-sparse.4/x86.c
> --- crash-4.0-2.21/x86.c 2006-02-14 14:40:02.000000000 -0800
> +++ crash-4.0-2.21-sparse.4/x86.c 2006-03-16 11:28:57.000000000 -0800
> @@ -1741,6 +1741,9 @@
> machdep->hz = HZ;
> if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
> machdep->hz = 1000;
> + machdep->section_size_bits = _SECTION_SIZE_BITS;
> + machdep->max_physaddr_bits = _MAX_PHYSADDR_BITS;
> + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
> break;
>
> case POST_INIT:
18 years, 8 months
Error when compiling crash-4.0-2.21
by Nguyen Anh Quynh
Hi,
I am trying to compile crash-4.0-2.21 on my x86 box. I am running
Ubuntu 5.10, with gcc 4.0.2. I grabbed crash from
http://people.redhat.com/anderson/crash-4.0-2.21.tar.gz, untar it and
inside the newly created crash-4.0-2.21 directory, I compiled it with
"make". But there is a problem, see below. How could I fix it?
Thank you,
Q
^^^^
$make
......
......
ar: creating libgdb.a
ranlib libgdb.a
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 build_data.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 main.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 tools.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 global_data.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 memory.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 filesys.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 -DGDB_6_1 help.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 -DGDB_6_1 task.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 -DGDB_6_1 kernel.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 test.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 -DGDB_6_1 gdb_interface.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 net.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 dev.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 -DGDB_6_1 alpha.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 -DMCLX x86.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 ppc.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 ia64.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 s390.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 s390x.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 ppc64.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 x86_64.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 extensions.c
cc -c -g -DX86 -D_FILE_OFFSET_BITS=64 va_server.c
va_server.c:60: error: initializer element is not constant
make[4]: *** [va_server.o] Error 1
make[3]: *** [gdb] Error 2
make[2]: *** [all-gdb] Error 2
make[1]: *** [gdb_merge] Error 2
make: *** [all] Error 2
Here is my gcc version:
$ gcc -v
Using built-in specs.
Target: i486-linux-gnu
Configured with: ../src/configure -v
--enable-languages=c,c++,java,f95,objc,ada,treelang --prefix=/usr
--with-gxx-include-dir=/usr/include/c++/4.0.2 --enable-shared
--with-system-zlib --libexecdir=/usr/lib --enable-nls
--without-included-gettext --enable-threads=posix
--program-suffix=-4.0 --enable-__cxa_atexit
--enable-libstdcxx-allocator=mt --enable-clocale=gnu
--enable-libstdcxx-debug --enable-java-gc=boehm --enable-java-awt=gtk
--enable-gtk-cairo
--with-java-home=/usr/lib/jvm/java-1.4.2-gcj-4.0-1.4.2.0/jre
--enable-mpfr --disable-werror --enable-checking=release
i486-linux-gnu
Thread model: posix
gcc version 4.0.2 20050808 (prerelease) (Ubuntu 4.0.1-4ubuntu9)
18 years, 8 months
[patch] remove kernel header include file
by Nguyen Anh Quynh
hi,
lkcd_dump_v5.h includes <linux/list.h>, which unfortunately includes a
kernel-only header. below is the content of <linux/list.h> on my box
(Ubuntu 5.10): this file has only 1 line
^^^
#include <linux/err_kernel_only.h>
^^^
i guess you guys dont have this compiling problem because you are
running Redhat/FC, not Ubuntu. obviously it is a bad idea to use a
kernel header in userspace. so i propose a patch which puts a new
list.h file in the source tree. please see the patch below (this
"list.h" is taken from Xen source tree, but i guess you have already
seen it in many places)
Signed-off-by: Nguyen Anh Quynh <aquynh(a)gmail.com>
# diffstat crash.diff
list.h | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lkcd_dump_v5.h | 2
2 files changed, 187 insertions(+), 1 deletion(-)
18 years, 8 months
Need to update page->flags
by David Wilder
The definitions for page->flags in crash are no longer correct in crash
resulting in incorrect output for the kmem -i command. The page flags
are currently defined for V22 V24 and one flag for V26. The flag
definitions are changing in the 2.6 kernel often, crash will need to
update these definitions for each 2.6.x. Yuck! Can we limit the flag
definitions to just a few kernel versions? Any suggestions on what
versions we should have definitions for?
--
David Wilder
IBM Linux Technology Center
Beaverton, Oregon, USA
dwilder(a)us.ibm.com
(503)578-3789
18 years, 8 months
missing word in "help list"
by Chip Coldwell
<nitpick>
There's a missing word in the DESCRIPTION for "help list":
DESCRIPTION
This command dumps the contents of a linked list. The entries in a linked
are typically data structures that are tied together in one of two formats:
"The entries in a linked are typically ..." should read "The entries
in a linked list are typically".
</nitpick>
Chip
--
Charles M. "Chip" Coldwell
Senior Software Engineer
Red Hat, Inc
18 years, 8 months
poisoned per_cpu data not handled on ppc64
by Olaf Hering
We use this patch to catch incorrect access to per_cpu data. But crash
cant deal with it unfortunately.
http://patchwork.ozlabs.org/linuxppc/patch?id=4423
honeydew:~ # crash -s /root/2.6.15.42-kexec/vmlinux /proc/vmcore
crash: pglist_data.node_mem_map structure member does not exist.
crash: certain memory-related commands will fail or display invalid data
crash: invalid kernel virtual address: aeeeeeeeef3736ae type: "runqueues entry (per_cpu)"
18 years, 8 months
Problem with disassembling functions that use BUG()
by Gerard Snitselaar
Using an AS4 i386 based system with a 2.6.9-22ELsmp kernel. Currently BUG()
gets defined as the following:
#define BUG() \
__asm__ __volatile__( "ud2\n" \
"\t.word %c0\n" \
"\t.long %c1\n" \
: : "i" (__LINE__), "i" (__FILE__))
So after the ud2 opcode it places __LINE__ and __FILE__ in the next 6 bytes.
The trap handler for ud2 uses these to print a message saying where BUG() was
used.
Crash has no knowledge of this convention so it thinks the byte after the ud2
opcode is the start of the next instruction. This results in bad disassemblies
being generated. The example where I ran into it was flush_tlb_others() .
Below I have included the source for flush_tlb_others , the disassembly from
crash, the raw code for flush_tlb_others, and what I think the disassembly
should be if one takes into account the convention used in BUG(). What
initially made me suspicous was that I didn't see "call <_spin_lock>"
anywhere, and the offsets for jumps didn't line up with instructions. From
what I can tell this would probably have to be dealt with in print_insn() in
gdb/opcodes/i386-dis.c . Not sure how to go about it, or what should be done
since newer kernels allow you to configure whether those bytes get encoded
after the ud2 opcode with CONFIG_DEBUG_VERBOSE.
Any ideas on solving this?
source for flush_tlb_others:
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
cpumask_t tmp;
/*
* A couple of (to be removed) sanity checks:
*
* - we do not send IPIs to not-yet booted CPUs.
* - current CPU must not be in mask
* - mask must exist :)
*/
BUG_ON(cpus_empty(cpumask));
cpus_and(tmp, cpumask, cpu_online_map);
BUG_ON(!cpus_equal(cpumask, tmp));
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
* Temporarily this turns IRQs off, so that lockups are
* detected by the NMI watchdog.
*/
spin_lock(&tlbstate_lock);
flush_mm = mm;
flush_va = va;
#if NR_CPUS <= BITS_PER_LONG
atomic_set_mask(cpumask, &flush_cpumask);
#else
{
int k;
unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
unsigned long *cpu_mask = (unsigned long *)&cpumask;
for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
atomic_set_mask(cpu_mask[k], &flush_mask[k]);
}
#endif
/*
* We have to send the IPI only to
* CPUs affected.
*/
send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
mb();
flush_mm = NULL;
flush_va = 0;
spin_unlock(&tlbstate_lock);
}
========
crash disassembly of flush_tlb_others:
crash> dis flush_tlb_others
0xc011681a <flush_tlb_others>: push %esi
0xc011681b <flush_tlb_others+0x1>: test %eax,%eax
0xc011681d <flush_tlb_others+0x3>: push %ebx
0xc011681e <flush_tlb_others+0x4>: mov %edx,%ebx
0xc0116820 <flush_tlb_others+0x6>: mov %eax,%edx
0xc0116822 <flush_tlb_others+0x8>: push %esi
0xc0116823 <flush_tlb_others+0x9>: mov %ecx,%esi
0xc0116825 <flush_tlb_others+0xb>: mov %eax,(%esp)
0xc0116828 <flush_tlb_others+0xe>: jne 0xc0116832
<flush_tlb_others+0x18>
0xc011682a <flush_tlb_others+0x10>: ud2a
0xc011682c <flush_tlb_others+0x12>: add %sp,(%esi)
0xc011682f <flush_tlb_others+0x15>: add (%esi),%ebp
0xc0116831 <flush_tlb_others+0x17>: rorb $0x91,0x880523d0(%ecx)
0xc0116838 <flush_tlb_others+0x1e>: inc %eax
0xc0116839 <flush_tlb_others+0x1f>: sarb $0xc2,(%ecx)
0xc011683c <flush_tlb_others+0x22>: je 0xc0116846
<flush_tlb_others+0x2c>
0xc011683e <flush_tlb_others+0x24>: ud2a
0xc0116840 <flush_tlb_others+0x26>: imul $0xc02e0326,(%ecx),%eax
0xc0116846 <flush_tlb_others+0x2c>: mov $0xfffff000,%eax
0xc011684b <flush_tlb_others+0x31>: and %esp,%eax
0xc011684d <flush_tlb_others+0x33>: mov 0x10(%eax),%eax
0xc0116850 <flush_tlb_others+0x36>: bt %eax,(%esp)
0xc0116854 <flush_tlb_others+0x3a>: sbb %eax,%eax
0xc0116856 <flush_tlb_others+0x3c>: test %eax,%eax
0xc0116858 <flush_tlb_others+0x3e>: je 0xc0116862
<flush_tlb_others+0x48>
0xc011685a <flush_tlb_others+0x40>: ud2a
0xc011685c <flush_tlb_others+0x42>: push $0x1
0xc011685e <flush_tlb_others+0x44>: add %es:(%esi),%ebp
0xc0116861 <flush_tlb_others+0x47>: rolb $0xb,0xf0875db(%ebp)
0xc0116868 <flush_tlb_others+0x4e>: imul $0x26,(%ecx),%eax
0xc011686b <flush_tlb_others+0x51>: add (%esi),%ebp
0xc011686d <flush_tlb_others+0x53>: sarb $0xe8,0xc0321c3c(%eax)
0xc0116874 <flush_tlb_others+0x5a>: int $0x93
0xc0116876 <flush_tlb_others+0x5c>: sbb (%eax),%eax
0xc0116878 <flush_tlb_others+0x5e>: mov %ebx,0xc0409160
0xc011687e <flush_tlb_others+0x64>: mov (%esp),%eax
0xc0116881 <flush_tlb_others+0x67>: mov %esi,0xc0409164
0xc0116887 <flush_tlb_others+0x6d>: lock or %eax,0xc040915c
0xc011688e <flush_tlb_others+0x74>: mov 0xc031f300,%ecx
0xc0116894 <flush_tlb_others+0x7a>: mov $0xfd,%edx
0xc0116899 <flush_tlb_others+0x7f>: mov (%esp),%eax
0xc011689c <flush_tlb_others+0x82>: call *0x80(%ecx)
0xc01168a2 <flush_tlb_others+0x88>:
jmp 0xc01168aa <flush_tlb_others+0x90>
0xc01168a4 <flush_tlb_others+0x8a>: mfence
0xc01168a7 <flush_tlb_others+0x8d>: lea 0x0(%esi),%esi
0xc01168aa <flush_tlb_others+0x90>: cmpl $0x0,0xc040915c
0xc01168b1 <flush_tlb_others+0x97>:
jne 0xc01168a4 <flush_tlb_others+0x8a>
0xc01168b3 <flush_tlb_others+0x99>: movl $0x0,0xc0409160
0xc01168bd <flush_tlb_others+0xa3>: mov $0xc0321c3c,%eax
0xc01168c2 <flush_tlb_others+0xa8>: movl $0x0,0xc0409164
0xc01168cc <flush_tlb_others+0xb2>: call 0xc02cfcb6 <_spin_unlock>
0xc01168d1 <flush_tlb_others+0xb7>: pop %ebx
0xc01168d2 <flush_tlb_others+0xb8>: pop %ebx
0xc01168d3 <flush_tlb_others+0xb9>: pop %esi
0xc01168d4 <flush_tlb_others+0xba>: ret
========
raw code for flush_tlb_others:
crash> rd flush_tlb_others 0x2f
c011681a: 53c08556 c289d389 89ce8956 08752404 V..S....V....$u.
c011682a: 01660b0f c02e0326 0523d089 c0409188 ..f.&.....#...@.
c011683a: 0874c239 01690b0f c02e0326 fff000b8 9.t...i.&.......
c011684a: 8be021ff a30f1040 c0192404 0874c085 .!..@....$....t.
c011685a: 016a0b0f c02e0326 0875db85 016b0b0f ..j.&.....u...k.
c011686a: c02e0326 321c3cb8 93cde8c0 1d89001b &....<.2........
c011687a: c0409160 8924048b 40916435 0509f0c0 `.@...$.5d.@....
c011688a: c040915c f3000d8b fdbac031 8b000000 \.@.....1.......
c011689a: 91ff2404 00000080 ae0f06eb 00768df0 .$............v.
c01168aa: 915c3d83 7500c040 6005c7f1 00c04091 .=\.@..u...`.@..
c01168ba: b8000000 c0321c3c 916405c7 0000c040 ....<.2...d.@...
c01168ca: e5e80000 5b001b93 52c35e5b .......[[^.R
========
hand disassembly of flush_tlb_others taking into account
kernel convention of encoding __LINE__ and __FILE__ after
the ud2a opcode:
flush_tlb_others+0x000: 56 push %esi;
flush_tlb_others+0x001: 85c0 test %eax, %eax;
flush_tlb_others+0x003: 53 push %ebx;
flush_tlb_others+0x004: 89d3 mov %edx, %ebx;
flush_tlb_others+0x006: 89c2 mov %eax, %edx;
flush_tlb_others+0x008: 56 push %esi;
flush_tlb_others+0x009: 89ce mov %ecx, %esi;
flush_tlb_others+0x00b: 890424 mov %eax, (%esp);
flush_tlb_others+0x00e: 7508 jne flush_tlb_others+0x18
flush_tlb_others+0x010: 0f0b ud2;
flush_tlb_others+0x012: 660126032ec0 ; put __LINE__ __FILE__ here for trap
handler
358 arch/i386/kernel/smp.c
flush_tlb_others+0x018: 89d0 mov %edx, %eax;
flush_tlb_others+0x01a: 2305889140c0 and 0xc0409188, %eax;
flush_tlb_others+0x020: 39c2 cmp %eax, %edx;
flush_tlb_others+0x022: 7408 je flush_tlb_others+0x2c
flush_tlb_others+0x024: 0f0b ud2;
flush_tlb_others+0x026: 690126032ec0 ; put __LINE__ __FILE__ here for trap
handler
361 arch/i386/kernel/smp.c
flush_tlb_others+0x02c: b8000fffff mov $0xfffff000, %eax;
flush_tlb_others+0x031: 21e0 and %esp, %eax;
flush_tlb_others+0x033: 8b4010 mov 0x10(%eax), %eax;
flush_tlb_others+0x036: 0fa30424 bt %eax, (%esp);
flush_tlb_others+0x03a: 19c0 sbb %eax, %eax;
flush_tlb_others+0x03c: 85c0 test %eax, %eax;
flush_tlb_others+0x03e: 7408 je flush_tlb_others+0x48
flush_tlb_others+0x040: 0f0b ud2;
flush_tlb_others+0x042: 6a0126032ec0 ; put __LINE__ __FILE__ here for trap
handler
362 arch/i386/kernel/smp.c
flush_tlb_others+0x048: 85db test %ebx, %ebx;
flush_tlb_others+0x04a: 7508 jne flush_tlb_others+0x54
flush_tlb_others+0x04c: 0f0b ud2;
flush_tlb_others+0x04e: 6b0126032ec0 ; put __LINE__ __FILE__ here for trap
handler
363 arch/i386/kernel/smp.c
flush_tlb_others+0x054: b83c1c32c0 mov $0xc0321c3c, %eax;
flush_tlb_others+0x059: e8cd931b00 call <_spin_lock>
flush_tlb_others+0x05e: 891d609140c0 mov 0xc0409160, %ebx;
flush_tlb_others+0x064: 8b0424 mov (%esp),%eax;
flush_tlb_others+0x067: 8935649140c0 mov %esi, 0xc0409164;
flush_tlb_others+0x06d: f009055c9140c0 lock or %eax, 0xc040915c;
flush_tlb_others+0x074: 8b0d00f331c0 mov 0xc031f300, %ecx;
flush_tlb_others+0x07a: bafd000000 mov $0xfd, %edx;
flush_tlb_others+0x07f: 8b0424 mov (%esp), %eax;
flush_tlb_others+0x082: ff9180000000 call *0x80(%ecx);
flush_tlb_others+0x088: eb06 jmp flush_tlb_others+0x90
flush_tlb_others+0x08a: 0faef0 mfence
flush_tlb_others+0x08d: 8d7600 lea 0x0(%esi), %esi;
flush_tlb_others+0x090: 833d5c9140c000 cmp 0x0, 0xc040915c;
flush_tlb_others+0x097: 75f1 jne flush_tlb_others+0x8a
flush_tlb_others+0x099: c705609140c000000000 mov $0x0, 0xc0409160;
flush_tlb_others+0x0a3: b83c1c32c0 mov 0xc0321c3c, %eax
flush_tlb_others+0x0a8: c705649140c000000000 mov $0x0, 0xc0409164;
flush_tlb_others+0x0b2: e8e5931b00 call <_spin_unlock>
flush_tlb_others+0x0b7: 5b pop %ebx;
flush_tlb_others+0x0b8: 5b pop %ebx;
flush_tlb_others+0x0b9: 5e pop %esi;
flush_tlb_others+0x0ba: c3 ret;
18 years, 8 months
Re: [Crash-utility] Problem with disassembling functions that use BUG()
by Dave Anderson
>> #define BUG() \
>> __asm__ __volatile__( "ud2\n" \
>> "\t.word %c0\n" \
>> "\t.long %c1\n" \
>> : : "i" (__LINE__), "i" (__FILE__))
>>
> Ideally it should be handled in the gdb code, which is what's screwing it up.
Actually, I can't really blame it on gdb either. How can gdb be expected
to recognize this kind of asm-generated code sequence?
Dave
18 years, 8 months