Hi Dave & David,
Thanks very much for your time and effort!
After installing /lib/modules/3.12.49-6-xen/updates/crash.ko, all seems OK:
# crash
crash 7.1.3
Copyright (C) 2002-2014 Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation
Copyright (C) 1999-2006 Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited
Copyright (C) 2006, 2007 VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011 NEC Corporation
Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions. Enter "help copying" to see the conditions.
This program has absolutely no warranty. Enter "help warranty" for details.
crash: /boot/xen-4.5.gz: original filename unknown
Use "-f /boot/xen-4.5.gz" on command line to prevent this message.
WARNING: machine type mismatch:
crash utility: X86_64
/var/tmp/xen-4.5.gz_ud3IRy: X86
crash: /boot/symtypes-3.12.49-6-default.gz: original filename unknown
Use "-f /boot/symtypes-3.12.49-6-default.gz" on command line to
prevent this message.
crash: /boot/symvers-3.12.49-6-default.gz: original filename unknown
Use "-f /boot/symvers-3.12.49-6-default.gz" on command line to
prevent this message.
GNU gdb (GDB) 7.6
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-unknown-linux-gnu"...
KERNEL: /boot/vmlinux-3.12.49-6-xen.gz
DEBUGINFO: /usr/lib/debug/boot/vmlinux-3.12.49-6-xen.debug
DUMPFILE: /dev/crash
CPUS: 128
DATE: Fri Nov 20 06:55:06 2015
UPTIME: 18:51:36
LOAD AVERAGE: 1.76, 1.48, 1.21
TASKS: 1230
NODENAME: dl980-5
RELEASE: 3.12.49-6-xen
VERSION: #1 SMP Mon Oct 26 16:05:37 UTC 2015 (11560c3)
MACHINE: x86_64 (1995 Mhz)
MEMORY: 125.9 GB
PID: 6618
COMMAND: "crash"
TASK: ffff881ea93b2140 [THREAD_INFO: ffff881e869f2000]
CPU: 112
STATE: TASK_RUNNING (ACTIVE)
So just from a user's viewpoint, I think it is enough for normal use.
Is it right?
Thanks in advance!
Best Regards
Nan Xiao
On Fri, Nov 20, 2015 at 6:14 AM, David Mair <dmair(a)suse.com> wrote:
On 11/19/2015 02:38 PM, Dave Anderson wrote:
>
>
> ----- Original Message -----
>> On 11/19/2015 12:20 PM, Dave Anderson wrote:
>>>
>>>
>>> ----- Original Message -----
>>>> On 11/19/2015 08:37 AM, David Mair wrote:
>>>>> On 11/19/2015 07:45 AM, Dave Anderson wrote:
>>>>>
>>>>> Hi Dave,
>>>>>
>>>>>> ----- Original Message -----
>>>>> <snip>
>>>>>>
>>>>>>> (2) Execute "crash -d8" on physical machine will
cause crash utility
>>>>>>> core
>>>>>>> dump.
>>>>>>
>>>>>> I can reproduce this, so I'll look into it. It's related
to the
>>>>>> /dev/mem "test"
>>>>>> to determine whether the kernel was configured with
>>>>>> CONFIG_STRICT_DEVMEM, where
>>>>>> it tries to read from pfn 257 (just above the
CONFIG_STRICT_DEVMEM
>>>>>> limit), but
>>>>>> gets into an infinite loop when used in conjunction with -d.
>>>>>>
>>>>>> Anyway, just continue to use /proc/kcore and you should be fine.
>>>>>
>>>>> This is the cause in readmem():
>>>>>
>>>>> switch(READMEM(...))
>>>>> {
>>>>> .
>>>>> .
>>>>> .
>>>>> case READ_ERROR:
>>>>> if (PRINT_ERROR_MESSAGE) ********** THIS ***********
>>>>> {
>>>>> causes a nested readmem() call before the goto gives it
>>>>> to the caller to deal with
>>>>> }
>>>>> goto readmem_error
>>>>> }
>>>>> .
>>>>> .
>>>>> .
>>>>> switch(error_handle)
>>>>> {
>>>>> case (RETURN_ON_ERROR):
>>>>> }
>>>>>
>>>>> The PRINT_ERROR_MESSAGE I assume is an escalation from -d 8 in this
case.
>>>>>
>>>>
>>>> The whole switch_to_proc_kcore() probably shouldn't be conditional
on
>>>> the presence of PRINT_ERROR_MESSAGE, only the actual error message
>>>> should be.
>>>>
>>>> Patch shortly.
>>>>
>>>> --
>>>> David.
>>>
>>> Yeah, I agree. It was done that way to catch the very first non-QUIET
>>> readmem()
>>> call from kernel_init(), and make the bait-and-switch right there. Without
>>> the
>>> CRASHDEBUG(x) override, it works as-is because the readmem() calls in
>>> devmem_is_restricted() are purposely set to QUIET.
>>>
>>> So we're thinking something like this, right?:
>>>
>>> diff --git a/memory.c b/memory.c
>>> index 824b3ae..2282ba9 100644
>>> --- a/memory.c
>>> +++ b/memory.c
>>> @@ -2207,13 +2207,12 @@ readmem(ulonglong addr, int memtype, void *buffer,
>>> long size,
>>> goto readmem_error;
>>>
>>> case READ_ERROR:
>>> - if (PRINT_ERROR_MESSAGE) {
>>> - if ((pc->flags & DEVMEM) &&
(kt->flags &
>>> PRE_KERNEL_INIT) &&
>>> - devmem_is_restricted() &&
>>> switch_to_proc_kcore())
>>> - return(readmem(addr, memtype,
>>> bufptr, size,
>>> - type, error_handle));
>>> + if ((pc->flags & DEVMEM) &&
(kt->flags &
>>> PRE_KERNEL_INIT) &&
>>> + !(error_handle & QUIET) &&
>>> devmem_is_restricted() && switch_to_proc_kcore())
>>> + return(readmem(addr, memtype, bufptr, size,
>>> + type, error_handle));
>>> + if (PRINT_ERROR_MESSAGE)
>>> error(INFO, READ_ERRMSG,
>>> memtype_string(memtype, 0), addr, type);
>>> - }
>>> goto readmem_error;
>>>
>>> case PAGE_EXCLUDED:
>>>
>>> Works for me, and is better than hiding a recursive-call check in
>>> devmem_is_restricted().
>>
>> That protects from nesting devmem_is_restricted() as long as
>> devmem_is_restricted() continues to do QUIET readmem()s but fails to
>> switch_to_proc_kcore() for QUIET readmem() at other random failing
>> addresses that could be fixed via /proc/kcore. QUIET isn't a global flag
>> only for being in devmem_is_restricted().
>
> I'm not following...
>
> There are the 3 QUIET readmem() calls in devmem_is_restricted(). As soon
> as the first one fails, it comes into the code segment above, and will
> call switch_to_proc_kcore(). There are no readmem() calls there. So
> if /proc/kcore is usable, it switches to it, and we continue just fine.
>
> And, for example, if I force switch_to_proc_kcore() to return FALSE,
> it still works:
>
> $ crash -d1 /dev/mem
> ... [ cut ] ...
> GNU gdb (GDB) 7.6
> Copyright (C) 2013 Free Software Foundation, Inc.
> License GPLv3+: GNU GPL version 3 or later <
http://gnu.org/licenses/gpl.html>
> This is free software: you are free to change and redistribute it.
> There is NO WARRANTY, to the extent permitted by law. Type "show copying"
> and "show warranty" for details.
> This GDB was configured as "x86_64-unknown-linux-gnu"...
>
> crash: read error: physical address: 101000 type: "devmem_is_allowed - pfn
257"
> crash: read error: kernel virtual address: ffffffff81e08000 type:
"devmem_is_allowed - jiffies"
> crash: this kernel may be configured with CONFIG_STRICT_DEVMEM, which
> renders /dev/mem unusable as a live memory source.
> crash: read error: kernel virtual address: ffffffff81a114b0 type:
"cpu_possible_mask"
> #
>
> The first QUIET read fails, tries /proc/kcore and it fails, and the error message
> is printed. It returns back to devmem_is_restricted(), which calls readmem() again,
> but since DEVMEM has been cleared, it won't retry the /proc/kcore switch, prints
the
> "jiffies" error message and returns. At that point, it is marked as
restricted,
> returns back to readmem() which fails the original "cpu_possible_mask"
readmem()
> that came from kernel_init(). At that point the crash session is killed.
>
> Note that because we have /dev/crash built-in, I had to put /dev/mem on the command
line,
> but I wouldn't think that would make a difference.
>
> Am I missing something? Do you see something different?
I was originally thinking about the same as yourself but decided against
it due to thinking first that, in English, QUIET is a bit abstract for
"don't try to switch devmem source" as well as avoid output.
Plus, if any other readmem() that's QUIET fails READ_ERROR for any
random reason and devmem_is_restricted() will fail a
switch_to_proc_kcore() would have resolved it then that caller's use of
QUIET will prevent devmem_is_restricted() being tried and it won't
switch_to_proc_kcore(). I admit that means the status of
devmem_is_restricted() has changed between initialization and now, but
it's a live environment with it's own behavior.
Due to the small number of error_handle flags here's one that uses the
best of your idea without expanding the meaning of the word QUIET. It
adds another flag for the sole purpose of saying "don't try to switch
devmem source this time" and it avoids all the overhead I originally had.
The name for the flag is long enough that the value tab alignment
doesn't match with the other flags (I can't think of a shorter name for
the flag but maybe you can). I chose not to appear to make three extra
lines of changes just to expand the tab alignment on the other flags. It
also leaves PRINT_ERROR_MESSAGE over-riding QUIET as at present but I
don't have a preference for that -d8 has it's useful moments:
diff --git a/defs.h b/defs.h
index b0cdd42..29f46b4 100644
--- a/defs.h
+++ b/defs.h
@@ -328,6 +328,7 @@ struct number_option {
#define HEX_BIAS (0x8)
#define LONG_LONG (0x10)
#define RETURN_PARTIAL (0x20)
+#define NO_DEVMEM_SWITCH (0x40)
#define SEEK_ERROR (-1)
#define READ_ERROR (-2)
diff --git a/memory.c b/memory.c
index 72218e7..b4629a6 100644
--- a/memory.c
+++ b/memory.c
@@ -2204,14 +2204,14 @@ readmem(ulonglong addr, int memtype, void
*buffer, long size,
goto readmem_error;
case READ_ERROR:
- if (PRINT_ERROR_MESSAGE) {
- if ((pc->flags & DEVMEM) && (kt->flags
& PRE_KERNEL_INIT) &&
- devmem_is_restricted() &&
switch_to_proc_kcore())
- return(readmem(addr, memtype, bufptr, size,
- type, error_handle));
+ if (PRINT_ERROR_MESSAGE)
error(INFO, READ_ERRMSG, memtype_string(memtype, 0),
addr, type);
- }
- goto readmem_error;
+
+ if ((pc->flags & DEVMEM) && (kt->flags &
PRE_KERNEL_INIT) &&
+ !(error_handle & NO_DEVMEM_SWITCH) &&
devmem_is_restricted() &&
switch_to_proc_kcore())
+ return(readmem(addr, memtype, bufptr, size,
+ type, error_handle));
+ goto readmem_error;
case PAGE_EXCLUDED:
RETURN_ON_PARTIAL_READ();
@@ -2410,16 +2410,16 @@ devmem_is_restricted(void)
if (machine_type("X86") || machine_type("X86_64")) {
if (readmem(255*PAGESIZE(), PHYSADDR, &tmp,
sizeof(long), "devmem_is_allowed - pfn 255",
- QUIET|RETURN_ON_ERROR) &&
+ QUIET|RETURN_ON_ERROR|NO_DEVMEM_SWITCH) &&
!(readmem(257*PAGESIZE(), PHYSADDR, &tmp,
sizeof(long), "devmem_is_allowed - pfn 257",
- QUIET|RETURN_ON_ERROR)))
+ QUIET|RETURN_ON_ERROR|NO_DEVMEM_SWITCH)))
restricted = TRUE;
}
if (kernel_symbol_exists("jiffies") &&
!readmem(symbol_value("jiffies"), KVADDR, &tmp,
sizeof(ulong), "devmem_is_allowed - jiffies",
- QUIET|RETURN_ON_ERROR))
+ QUIET|RETURN_ON_ERROR|NO_DEVMEM_SWITCH))
restricted = TRUE;
if (restricted)
--
Crash-utility mailing list
Crash-utility(a)redhat.com
https://www.redhat.com/mailman/listinfo/crash-utility