Hi Matsumoto-san,
-----Original Message-----
We sometimes overlook logs written to printk safe buffers
(safe_print_seq/nmi_print_seq) which have not been flushed yet.
This patch will output unflushed logs of the safe buffers
at the bottom of log command output as follows:
[nmi_print_seq] CPU: 0 LEN: 188 MESSAGE_LOST: 0 WORK: ffff8c31fbc19ce8 BUFFER:
ffff8c31fbc19d00
message2 message2 message2 message2
Uhhuh. NMI received for unknown reason 30 on CPU 0.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
[safe_print_seq] CPU: 1 LEN: 38 MESSAGE_LOST: 0 WORK: ffff8c31fbc9ad08 BUFFER:
ffff8c31fbc9ad20
message1 message1 message1 message1
Thank you for updating the patch and providing a test module!
I tried them and felt that it would be better to print only the ring
buffer simply with the "log" command and print the safe buffer separately,
as they have different formats and also the output of safe buffers doesn't
follow the "log" options.
-----
crash> log
...
[ 6.261629] Key type id_resolver registered
[ 6.261924] Key type id_legacy registered
[1717970.628952] safebuffer: loading out-of-tree module taints kernel.
[1717970.629299] safebuffer: module verification failed: signature and/or required key
missing - tainting kernel
[nmi_print_seq] CPU: 0 LEN: 188 MESSAGE_LOST: 0 WORK: ffff94af3ba16a28 BUFFER:
ffff94af3ba16a40
message2 message2 message2 message2
Uhhuh. NMI received for unknown reason 20 on CPU 0.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
[safe_print_seq] CPU: 0 LEN: 38 MESSAGE_LOST: 0 WORK: ffff94af3ba18a48 BUFFER:
ffff94af3ba18a60
message1 message1 message1 message1
[safe_print_seq] CPU: 1 LEN: 38 MESSAGE_LOST: 0 WORK: ffff94af3ba98a48 BUFFER:
ffff94af3ba98a60
message1 message1 message1 message1
-----
This looks irregular in crash.
So I would suggest adding a new option e.g. "log -s" to display only the safe
buffers, add some help text to "help log", and print something like this:
-----
crash> log -s
PRINTK_SAFE_SEQ_BUF: nmi_print_seq
CPU: 0 ADDR: ffff94af3ba16a20 LEN: 188 MESSAGE_LOST: 0
message2 message2 message2 message2
Uhhuh. NMI received for unknown reason 20 on CPU 0.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
CPU: 1 ADDR: ... LEN: 0 MESSAGE_LOST: 0
(empty)
...
PRINTK_SAFE_SEQ_BUF: safe_print_seq
CPU: 0 ADDR: ffff94af3ba18a40 LEN: 38 MESSAGE_LOST: 0
message1 message1 message1 message1
CPU: 1 ADDR: ffff94af3ba98a40 LEN: 38 MESSAGE_LOST: 0
message1 message1 message1 message1
...
-----
With this information above, we can also look into the raw values, e.g.:
crash> p nmi_print_seq:all
per_cpu(nmi_print_seq, 0) = $8 = {
len = {
counter = 188
},
message_lost = {
counter = 0
...
crash> printk_safe_seq_buf -o ffff9d4efba16a20
struct printk_safe_seq_buf {
[ffff9d4efba16a20] atomic_t len;
[ffff9d4efba16a24] atomic_t message_lost;
[ffff9d4efba16a28] struct irq_work work;
[ffff9d4efba16a40] unsigned char buffer[8160];
}
SIZE: 8192
crash> printk_safe_seq_buf.work ffff9d4efba16a20
work = {
flags = 0,
llnode = {
next = 0x0
},
func = 0xffffffffc04a3000
},
I think this is a crash-like output and usage. What do you think?
Note that the safe buffer (struct printk_safe_seq_buf) was introduced
in kernel-4.11 (f92bac3b141b8233e34ddf32d227e12bfba07b48,
099f1c84c0052ec1b27f1c3942eed5830d86bdbb, ddb9baa822265b55afffd9815a2758a4b70006c1)
Replaced with "(Merge commit 7d91de74436a69c2b78a7a72f1e7f97f8b4396fa)"
will be better for readability.
and removed in kernel-5.15
(93d102f094be9beab28e5afb656c188b16a3793b).
v2 Changes:
- add all members of struct printk_safe_seq_buf
- support help -o
- consider OFFSET(atomic_t_counter) for atomic_t member
- some code refactoring
Signed-off-by: Shogo Matsumoto <shogo.matsumoto(a)fujitsu.com>
---
defs.h | 5 ++++
kernel.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
symbols.c | 6 ++++
3 files changed, 98 insertions(+)
diff --git a/defs.h b/defs.h
index b63741c..f590910 100644
--- a/defs.h
+++ b/defs.h
@@ -2146,6 +2146,10 @@ struct offset_table { /* stash of commonly-used
offsets */
long wait_queue_entry_private;
long wait_queue_head_head;
long wait_queue_entry_entry;
+ long printk_safe_seq_buf_len;
+ long printk_safe_seq_buf_message_lost;
+ long printk_safe_seq_buf_work;
+ long printk_safe_seq_buf_buffer;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2310,6 +2314,7 @@ struct size_table { /* stash of commonly-used sizes */
long prb_desc;
long wait_queue_entry;
long task_struct_state;
+ long printk_safe_seq_buf_buffer;
};
struct array_table {
diff --git a/kernel.c b/kernel.c
index 37b7af7..b2a597c 100644
--- a/kernel.c
+++ b/kernel.c
@@ -93,6 +93,7 @@ static void source_tree_init(void);
static ulong dump_audit_skb_queue(ulong);
static ulong __dump_audit(char *);
static void dump_audit(void);
+static void dump_printk_safe_seq_buf(void);
static char *vmcoreinfo_read_string(const char *);
static void check_vmcoreinfo(void);
static int is_pvops_xen(void);
@@ -5048,6 +5049,7 @@ cmd_log(void)
}
dump_log(msg_flags);
+ dump_printk_safe_seq_buf();
}
@@ -11544,6 +11546,91 @@ dump_audit(void)
error(INFO, "kernel audit log is empty\n");
}
+static void
+__dump_printk_safe_seq_buf(char *buf_name)
+{
+ int cpu, buffer_size;
+ char *buffer;
+ ulong len_addr, message_lost_addr, work_addr, buffer_addr;
+
+ if (!symbol_exists(buf_name)) {
+ return;
+ }
+
+ len_addr = symbol_value(buf_name) + OFFSET(printk_safe_seq_buf_len)
+ + OFFSET(atomic_t_counter);
+ message_lost_addr = symbol_value(buf_name)
+ + OFFSET(printk_safe_seq_buf_message_lost)
+ + OFFSET(atomic_t_counter);
+ work_addr = symbol_value(buf_name) + OFFSET(printk_safe_seq_buf_work);
+ buffer_addr = symbol_value(buf_name) + OFFSET(printk_safe_seq_buf_buffer);
+
+ buffer_size = SIZE(printk_safe_seq_buf_buffer);
+ buffer = GETBUF(buffer_size);
+ for (cpu = 0; cpu < kt->cpus; cpu++) {
+ int len;
+ ulong per_cpu_offset;
+ per_cpu_offset = kt->__per_cpu_offset[cpu];
+
+ readmem(len_addr + per_cpu_offset, KVADDR, &len, sizeof(int),
+ "printk_safe_seq_buf len", FAULT_ON_ERROR);
+
+ if (len > 0) {
+ int message_lost;
+ int i, n;
+ char *p;
+
+ readmem(message_lost_addr + per_cpu_offset, KVADDR,
+ &message_lost, sizeof(int),
+ "printk_safe_seq_buf message_lost", FAULT_ON_ERROR);
+ readmem(buffer_addr + per_cpu_offset, KVADDR,
+ buffer, buffer_size,
+ "printk_safe_seq_buf buffer", FAULT_ON_ERROR);
+
+ fprintf(fp, "[%s] CPU: %d LEN: %d MESSAGE_LOST: %d"
+ " WORK: %lx BUFFER: %lx\n",
+ buf_name, cpu, len, message_lost,
+ work_addr + per_cpu_offset,
+ buffer_addr + per_cpu_offset);
If we add a new command, does it need to print the work_addr?
The buffer_addr is fine as we need its offset in the offset_table..
+
+ n = (len <= buffer_size) ? len : buffer_size;
+ for (i = 0, p = buffer; i < n; i++, p++) {
+ if (*p == 0x1) { //SOH
+ i++; p++;
+ continue;
According to include/linux/kern_levels.h, the byte right after the SOH
can be printed as log level?
crash> printk_safe_seq_buf.buffer ffff9d4efba16a20
buffer = "\001\066message2 message2 message2 message2\n\001\060Uhhuh. NMI ...
this is printed like
<6>messages2 message2 message2 message2
<0>Uhhuh. NMI received for unknown reason 20 on CPU 0.
But it's ok to support this later or another patch, just idea.
Thanks.
Kazu
+ } else {
+ fputc(ascii(*p) ? *p : '.', fp);
+ }
+ }
+ fputc('\n', fp);
+ }
+ }
+ FREEBUF(buffer);
+}
+
+static void
+dump_printk_safe_seq_buf(void)
+{
+ if (!STRUCT_EXISTS("printk_safe_seq_buf"))
+ return;
+
+ if (INVALID_SIZE(printk_safe_seq_buf_buffer)) {
+ MEMBER_OFFSET_INIT(printk_safe_seq_buf_len,
+ "printk_safe_seq_buf", "len");
+ MEMBER_OFFSET_INIT(printk_safe_seq_buf_message_lost,
+ "printk_safe_seq_buf", "message_lost");
+ MEMBER_OFFSET_INIT(printk_safe_seq_buf_work,
+ "printk_safe_seq_buf", "work");
+ MEMBER_OFFSET_INIT(printk_safe_seq_buf_buffer,
+ "printk_safe_seq_buf", "buffer");
+ MEMBER_SIZE_INIT(printk_safe_seq_buf_buffer,
+ "printk_safe_seq_buf", "buffer");
+ }
+
+ __dump_printk_safe_seq_buf("nmi_print_seq");
+ __dump_printk_safe_seq_buf("safe_print_seq");
+}
+
/*
* Reads a string value from the VMCOREINFO data stored in (live) memory.
*
diff --git a/symbols.c b/symbols.c
index 73baa95..5c575a9 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10523,6 +10523,11 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " prb_data_ring_size_bits: %ld\n",
OFFSET(prb_data_ring_size_bits));
fprintf(fp, " prb_data_ring_data: %ld\n",
OFFSET(prb_data_ring_data));
fprintf(fp, " atomit_long_t_counter: %ld\n",
OFFSET(atomic_long_t_counter));
+ fprintf(fp, " printk_safe_seq_buf_len: %ld\n",
OFFSET(printk_safe_seq_buf_len));
+ fprintf(fp, "printk_safe_seq_buf_message_lost: %ld\n",
+ OFFSET(printk_safe_seq_buf_message_lost));
+ fprintf(fp, " printk_safe_seq_buf_work: %ld\n",
OFFSET(printk_safe_seq_buf_work));
+ fprintf(fp, " printk_safe_seq_buf_buffer: %ld\n",
OFFSET(printk_safe_seq_buf_buffer));
fprintf(fp, " sched_rt_entity_my_q: %ld\n",
OFFSET(sched_rt_entity_my_q));
@@ -10954,6 +10959,7 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " printk_info: %ld\n", SIZE(printk_info));
fprintf(fp, " printk_ringbuffer: %ld\n",
SIZE(printk_ringbuffer));
fprintf(fp, " prb_desc: %ld\n", SIZE(prb_desc));
+ fprintf(fp, " printk_safe_seq_buf_buffer: %ld\n",
SIZE(printk_safe_seq_buf_buffer));
fprintf(fp, "\n array_table:\n");
--
2.26.2
I also attach a test module printk_safe_buf_test.c
which I used to test this patch.
How to use:
1. Load the test module
2. Generate a NMI interrupt
3. Start crash on a live system and execute log command
I tested with the module in Fedora 32 (5.6.8-300.fc32.x86_64) and
CentOS 8 (4.18.0-348.el8.x86_64).
kallsyms_lookup_name should be replaced with the specific address
if kernel >= 5.7 is used.
printk_safe_buf_test.c:
=====
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/irq_work.h>
#include <linux/nmi.h>
static int msg1_line = 0;
static int output_msg1(struct kprobe *p, struct pt_regs *regs) {
if (msg1_line++ < 1)
printk(KERN_INFO "message1 message1 message1 message1\n");
return 0;
}
static int output_msg2(unsigned int cmd, struct pt_regs *regs) {
int i;
for (i = 0; i < 1; i++)
printk(KERN_INFO "message2 message2 message2 message2\n");
return 0;
}
static void empty_work(struct irq_work *work) {
}
static struct kprobe kp = {
.symbol_name = "__printk_safe_exit",
.pre_handler = output_msg1,
};
static int __init printk_safe_buf_test_init(void) {
int i, ret = 0;
ulong safe_print_seq, nmi_print_seq, per_cpu_offset;
struct dummy_printk_safe_seq_buf {
atomic_t len;
atomic_t message_lost;
struct irq_work work;
};
safe_print_seq = kallsyms_lookup_name("safe_print_seq");
nmi_print_seq = kallsyms_lookup_name("nmi_print_seq");
per_cpu_offset = kallsyms_lookup_name("__per_cpu_offset");
if (!(safe_print_seq && nmi_print_seq && per_cpu_offset))
return -1;
for (i = 0; i < num_possible_cpus(); i++) {
ulong offset = ((ulong*)per_cpu_offset)[i];
init_irq_work(&((struct dummy_printk_safe_seq_buf*)(offset +
safe_print_seq))->work,
empty_work);
init_irq_work(&((struct dummy_printk_safe_seq_buf*)(offset +
nmi_print_seq))->work,
empty_work);
}
ret = register_kprobe(&kp);
if (ret < 0) {
pr_err("register kp failed%d\n", ret);
return ret;
}
register_nmi_handler(NMI_LOCAL, output_msg2, 0, "output_msg2");
return 0;
}
static void __exit printk_safe_buf_test_exit(void) {
unregister_nmi_handler(NMI_LOCAL, "output_msg2");
unregister_kprobe(&kp);
return;
}
module_init(printk_safe_buf_test_init);
module_exit(printk_safe_buf_test_exit);
MODULE_LICENSE("GPL");
=====