[PATCH] Add blk_mq shared tags support for dev -d/-D
by Tao Liu
When blk_mq shared tags enabled for devices like scsi, the IO status is
incorrect, e.g:
crash> dev -d
MAJOR GENDISK NAME REQUEST_QUEUE TOTAL ASYNC SYNC
8 ffff90528df86000 sda ffff9052a3d61800 144 144 0
8 ffff905280718c00 sdb ffff9052a3d63c00 48 48 0
crash> epython rqlist
ffff90528e94a5c0 sda is unknown, deadline: 89.992 (90) rq_alloc: 0.196
ffff90528e92f700 sda is unknown, deadline: 89.998 (90) rq_alloc: 0.202
ffff90528e95ccc0 sda is unknown, deadline: 89.999 (90) rq_alloc: 0.203
ffff90528e968bc0 sdb is unknown, deadline: 89.997 (90) rq_alloc: 0.201
The root cause is: for shared tags case, only the shared tags are put
into count. Without this patch, tags of all the hw_ctx are counted,
which is incorrect.
After apply the patch:
crash> dev -d
MAJOR GENDISK NAME REQUEST_QUEUE TOTAL READ WRITE
8 ffff90528df86000 sda ffff9052a3d61800 3 3 0
8 ffff905280718c00 sdb ffff9052a3d63c00 1 1 0
This patch makes the following modification:
1) blk_mq shared tag support.
2) Function renaming: queue_for_each_hw_ctx -> blk_mq_queue_tag_busy_iter,
because the latter is more close to the corresponding kernel function.
3) Extract a new queue_for_each_hw_ctx() function to be called for both
shared-tags case and the hw_ctx case.
Note:
The patch is safe for earlier kernels which have no blk_mq shared tags
implemented, because the blk_mq_is_shared_tags() check will exit safely.
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
Please discard the previous patch "Filter repeated rq for cmd dev -d/-D",
because filtering is an incorrect fix.
---
defs.h | 3 ++
dev.c | 96 ++++++++++++++++++++++++++++++++++++++-----------------
symbols.c | 6 ++++
3 files changed, 76 insertions(+), 29 deletions(-)
diff --git a/defs.h b/defs.h
index bbd6d4b..4fecb83 100644
--- a/defs.h
+++ b/defs.h
@@ -2271,6 +2271,9 @@ struct offset_table { /* stash of commonly-used offsets */
long task_struct_thread_context_x28;
long neigh_table_hash_heads;
long neighbour_hash;
+ long request_queue_tag_set;
+ long blk_mq_tag_set_flags;
+ long blk_mq_tag_set_shared_tags;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/dev.c b/dev.c
index 9d38aef..0a4d5c9 100644
--- a/dev.c
+++ b/dev.c
@@ -4326,6 +4326,12 @@ struct bt_iter_data {
#define MQ_RQ_IN_FLIGHT 1
#define REQ_OP_BITS 8
#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
+#define BLK_MQ_F_TAG_HCTX_SHARED (1 << 3)
+
+static bool blk_mq_is_shared_tags(unsigned int flags)
+{
+ return flags & BLK_MQ_F_TAG_HCTX_SHARED;
+}
static uint op_is_write(uint op)
{
@@ -4403,43 +4409,72 @@ static void bt_for_each(ulong q, ulong tags, ulong sbq, uint reserved, uint nr_r
sbitmap_for_each_set(&sc, bt_iter, &iter_data);
}
-static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio)
+static bool queue_for_each_hw_ctx(ulong q, ulong blk_mq_tags_ptr,
+ bool bitmap_tags_is_ptr, struct diskio *dio)
{
- uint i;
+ uint i, nr_reserved_tags = 0;
+ ulong tags = 0, addr = 0;
+ bool ret = FALSE;
+
+ if (!readmem(blk_mq_tags_ptr, KVADDR, &tags, sizeof(ulong),
+ "blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
+ goto out;
+
+ addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
+ if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
+ "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
+ goto out;
+
+ if (nr_reserved_tags) {
+ addr = tags + OFFSET(blk_mq_tags_breserved_tags);
+ if (bitmap_tags_is_ptr &&
+ !readmem(addr, KVADDR, &addr, sizeof(ulong),
+ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
+ goto out;
+ bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
+ }
+ addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
+ if (bitmap_tags_is_ptr &&
+ !readmem(addr, KVADDR, &addr, sizeof(ulong),
+ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
+ goto out;
+ bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
+
+ ret = TRUE;
+out:
+ return ret;
+}
+
+/*
+ * Replica of kernel block/blk-mq-tag.c:blk_mq_queue_tag_busy_iter()
+*/
+static void blk_mq_queue_tag_busy_iter(ulong q, ulong *hctx, uint cnt,
+ struct diskio *dio)
+{
+ uint i, flags;
int bitmap_tags_is_ptr = 0;
+ ulong addr = 0;
if (MEMBER_TYPE("blk_mq_tags", "bitmap_tags") == TYPE_CODE_PTR)
bitmap_tags_is_ptr = 1;
- for (i = 0; i < cnt; i++) {
- ulong addr = 0, tags = 0;
- uint nr_reserved_tags = 0;
+ readmem(q + OFFSET(request_queue_tag_set), KVADDR, &addr,
+ sizeof(ulong), "request_queue.tag_set", RETURN_ON_ERROR);
- /* Tags owned by the block driver */
- addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
- if (!readmem(addr, KVADDR, &tags, sizeof(ulong),
- "blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
- break;
+ readmem(addr + OFFSET(blk_mq_tag_set_flags), KVADDR,
+ &flags, sizeof(uint), "blk_mq_tag_set.flags", RETURN_ON_ERROR);
- addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
- if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
- "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
- break;
+ if (blk_mq_is_shared_tags(flags)) {
+ addr = addr + OFFSET(blk_mq_tag_set_shared_tags);
+ queue_for_each_hw_ctx(q, addr, bitmap_tags_is_ptr, dio);
+ return;
+ }
- if (nr_reserved_tags) {
- addr = tags + OFFSET(blk_mq_tags_breserved_tags);
- if (bitmap_tags_is_ptr &&
- !readmem(addr, KVADDR, &addr, sizeof(ulong),
- "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
- break;
- bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
- }
- addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
- if (bitmap_tags_is_ptr &&
- !readmem(addr, KVADDR, &addr, sizeof(ulong),
- "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
- break;
- bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
+ for (i = 0; i < cnt; i++) {
+ /* Tags owned by the block driver */
+ addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
+ if (queue_for_each_hw_ctx(q, addr, bitmap_tags_is_ptr, dio) == FALSE)
+ return;
}
}
@@ -4489,7 +4524,7 @@ static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio)
return;
}
- queue_for_each_hw_ctx(q, hctx_array, cnt, dio);
+ blk_mq_queue_tag_busy_iter(q, hctx_array, cnt, dio);
FREEBUF(hctx_array);
}
@@ -4914,6 +4949,9 @@ void diskio_init(void)
MEMBER_SIZE_INIT(class_private_devices, "class_private",
"class_devices");
MEMBER_OFFSET_INIT(disk_stats_in_flight, "disk_stats", "in_flight");
+ MEMBER_OFFSET_INIT(request_queue_tag_set, "request_queue", "tag_set");
+ MEMBER_OFFSET_INIT(blk_mq_tag_set_flags, "blk_mq_tag_set", "flags");
+ MEMBER_OFFSET_INIT(blk_mq_tag_set_shared_tags, "blk_mq_tag_set", "shared_tags");
dt->flags |= DISKIO_INIT;
}
diff --git a/symbols.c b/symbols.c
index e30fafe..794519a 100644
--- a/symbols.c
+++ b/symbols.c
@@ -11487,6 +11487,12 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(blk_mq_tags_nr_reserved_tags));
fprintf(fp, " blk_mq_tags_rqs: %ld\n",
OFFSET(blk_mq_tags_rqs));
+ fprintf(fp, " request_queue_tag_set: %ld\n",
+ OFFSET(request_queue_tag_set));
+ fprintf(fp, " blk_mq_tag_set_flags: %ld\n",
+ OFFSET(blk_mq_tag_set_flags));
+ fprintf(fp, " blk_mq_tag_set_shared_tags: %ld\n",
+ OFFSET(blk_mq_tag_set_shared_tags));
fprintf(fp, " subsys_private_subsys: %ld\n", OFFSET(subsys_private_subsys));
fprintf(fp, " subsys_private_klist_devices: %ld\n",
--
2.47.0
2 weeks, 6 days
[PATCH] Disable DT_DEBUG lookup by GDB inside the vmcore
by Shivang Upadhyay
Crash with GDB 16.2, the following warnings are printed:
crash>
crash: page excluded: kernel virtual address: c0000000022d6098 type: "gdb_readmem_callback"
crash: page excluded: kernel virtual address: c0000000022d6098 type: "gdb_readmem_callback"
This occurs because the elf_locate_base function in GDB 16.2 attempts
to read the address of the dynamic linker runtime structure, which is
present in the .dynamic section of the executable. However, this section
may be excluded from the dump by makedumpfile.
Commit e906eaca2b1a ("Fix the issue of "page excluded" messages flooding")
attempted fix this by suppressing these warnings for regular users, but the
warnings still appear when crash is started in debug mode.
To fix this, remove the elf_locate_base call in GDB that tries to read the
.dynamic section, as this information is not useful for debugging kernel
images in either dump or live kernel scenarios.
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Lianbo Jiang <lijiang(a)redhat.com>
Signed-off-by: Shivang Upadhyay <shivangu(a)linux.ibm.com>
---
gdb-16.2.patch | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/gdb-16.2.patch b/gdb-16.2.patch
index 151e4e2..7b79cdf 100644
--- a/gdb-16.2.patch
+++ b/gdb-16.2.patch
@@ -1952,3 +1952,19 @@ exit 0
}
/* Remember the bfd indexes for the .text, .data, .bss and
+--- gdb-16.2/gdb/solib-svr4.c.orig
++++ gdb-16.2/gdb/solib-svr4.c
+@@ -742,11 +742,13 @@ elf_locate_base (void)
+ return extract_typed_address (pbuf, ptr_type);
+ }
+
++#ifndef CRASH_MERGE
+ /* Find DT_DEBUG. */
+ if (gdb_bfd_scan_elf_dyntag (DT_DEBUG, current_program_space->exec_bfd (),
+ &dyn_ptr, NULL)
+ || scan_dyntag_auxv (DT_DEBUG, &dyn_ptr, NULL))
+ return dyn_ptr;
++#endif
+
+ /* This may be a static executable. Look for the symbol
+ conventionally named _r_debug, as a last resort. */
--
2.49.0
3 weeks, 5 days
[PATCH] x86_64: filter unwanted warning message for "bt -T" cmd
by Tao Liu
After patch "x86_64: Add gdb multi-stack unwind support" applied, a
warning message is observed for "bt -T" cmd:
crash> bt -T
bt: seek error: kernel virtual address: fffffffffffffffb type: "gdb_readmem_callback"
[ffffbaebc60d6fa8] srso_return_thunk at ffffffff82246fa5
...
The root cause is, "bt -T" will set BT_TEXT_SYMBOLS_ALL for bt->flags,
and eip is set to be 0 in kernel.c:back_trace(). Later in
x86_64_low_budget_back_trace_cmd(), eip - 5, or 0xfffffffffffffffb is
used for address disassembly by gdb "x/1i 0x%lx". This address is invalid so
the warning message is output.
In fact, multi-stack unwind isn't designed for "bt -T" and eip = 0 case.
To avoid the warning message, let's simply bypass the "bt -T" case for
x86_64. Other archs(arm64/ppc64) aren't affected by the issue because
the gdb "x/1i 0x%lx" are not applied on those archs.
After apply the patch:
crash> bt -T
[ffffbaebc60d6fa8] srso_return_thunk at ffffffff82246fa5
...
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
x86_64.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/x86_64.c b/x86_64.c
index cfefe3f..d7da536 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -3636,7 +3636,8 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
level++;
}
- if (is_task_active(bt->task) && bt->flags & BT_DUMPFILE_SEARCH) {
+ if (is_task_active(bt->task) && bt->flags & BT_DUMPFILE_SEARCH &&
+ !(bt->flags & BT_TEXT_SYMBOLS_ALL)) {
if (!extra_stacks_regs[extra_stacks_idx]) {
extra_stacks_regs[extra_stacks_idx] =
(struct user_regs_bitmap_struct *)
--
2.47.0
1 month
[PATCH] Fix a regression for eppic extension on gdb-16.2
by Tao Liu
There is a regression found when testing eppic extension on gdb-16.2
crash:
crash> cgroup
/root/.eppic/cgroup.c : line 99 : Error: undefined variable 'cgroup_roots'
The root cause is when doing gdb upgrading, the replacement of
gdb_get_datatype() is incorrect:
The original gdb-10.2 version:
long value = SYMBOL_VALUE(expr->elts[2].symbol);
The incorrect gdb-16.2 replacement:
long value = value_as_long(expr->evaluate());
According to gdb/tracepoint.c, the correct gdb-16.2 replacement should be:
symbol *sym;
expr::var_value_operation *vvop
= (gdb::checked_static_cast<expr::var_value_operation *>
(exp->op.get ()));
sym = vvop->get_symbol ();
long value = sym->value_longest ();
Otherwise, the value_as_long() will throw an exception when trying to
convert a struct into long, such as "cgroup_roots". The reason why this
issue only observed on crash extensions, is the faulty code block
triggered with "req->tcb", which is a callback for gdb_interface(), and
the callback is used by eppic extension, but the normal crash internal calls
hardly use it.
After:
crash> cgroup
0:/user.slice/user-1000.slice/session-2.scope
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
gdb-16.2.patch | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/gdb-16.2.patch b/gdb-16.2.patch
index 151e4e2..eb620f7 100644
--- a/gdb-16.2.patch
+++ b/gdb-16.2.patch
@@ -1952,3 +1952,32 @@ exit 0
}
/* Remember the bfd indexes for the .text, .data, .bss and
+--- gdb-16.2/gdb/symtab.c.orig
++++ gdb-16.2/gdb/symtab.c
+@@ -7690,7 +7690,11 @@
+ console("expr->first_opcode(): OP_VAR_VALUE\n");
+ type = expr->evaluate_type()->type();
+ if (req->tcb) {
+- long value = value_as_long(expr->evaluate());
++ expr::var_value_operation *vvop
++ = (gdb::checked_static_cast<expr::var_value_operation *>
++ (expr->op.get ()));
++ sym = vvop->get_symbol ();
++ long value = sym->value_longest ();
+ /* callback with symbol value */
+ req->typecode = TYPE_CODE(type);
+ req->tcb(EOP_VALUE, req, &value, 0, 0, 0);
+@@ -7701,8 +7705,12 @@
+ req->length = type->length();
+ }
+ if (TYPE_CODE(type) == TYPE_CODE_ENUM) {
++ expr::var_value_operation *vvop
++ = (gdb::checked_static_cast<expr::var_value_operation *>
++ (expr->op.get ()));
++ sym = vvop->get_symbol ();
+ req->typecode = TYPE_CODE(type);
+- req->value = value_as_long(expr->evaluate());
++ req->value = sym->value_longest ();
+ req->tagname = (char *)TYPE_TAG_NAME(type);
+ if (!req->tagname) {
+ val = expr->evaluate_type();
--
2.47.0
1 month