When blk_mq shared tags enabled for devices like scsi, the IO status is
incorrect, e.g:
crash> dev -d
MAJOR GENDISK NAME REQUEST_QUEUE TOTAL ASYNC SYNC
8 ffff90528df86000 sda ffff9052a3d61800 144 144 0
8 ffff905280718c00 sdb ffff9052a3d63c00 48 48 0
crash> epython rqlist
ffff90528e94a5c0 sda is unknown, deadline: 89.992 (90) rq_alloc: 0.196
ffff90528e92f700 sda is unknown, deadline: 89.998 (90) rq_alloc: 0.202
ffff90528e95ccc0 sda is unknown, deadline: 89.999 (90) rq_alloc: 0.203
ffff90528e968bc0 sdb is unknown, deadline: 89.997 (90) rq_alloc: 0.201
The root cause is: for shared tags case, only the shared tags are put
into count. Without this patch, tags of all the hw_ctx are counted,
which is incorrect.
After apply the patch:
crash> dev -d
MAJOR GENDISK NAME REQUEST_QUEUE TOTAL READ WRITE
8 ffff90528df86000 sda ffff9052a3d61800 3 3 0
8 ffff905280718c00 sdb ffff9052a3d63c00 1 1 0
This patch makes the following modification:
1) blk_mq shared tag support.
2) Function renaming: queue_for_each_hw_ctx -> blk_mq_queue_tag_busy_iter,
because the latter is more close to the corresponding kernel function.
3) Extract a new queue_for_each_hw_ctx() function to be called for both
shared-tags case and the hw_ctx case.
Note:
The patch is safe for earlier kernels which have no blk_mq shared tags
implemented, because the blk_mq_is_shared_tags() check will exit safely.
Signed-off-by: Tao Liu <ltao(a)redhat.com>
---
Please discard the previous patch "Filter repeated rq for cmd dev -d/-D",
because filtering is an incorrect fix.
---
defs.h | 3 ++
dev.c | 96 ++++++++++++++++++++++++++++++++++++++-----------------
symbols.c | 6 ++++
3 files changed, 76 insertions(+), 29 deletions(-)
diff --git a/defs.h b/defs.h
index bbd6d4b..4fecb83 100644
--- a/defs.h
+++ b/defs.h
@@ -2271,6 +2271,9 @@ struct offset_table { /* stash of commonly-used
offsets */
long task_struct_thread_context_x28;
long neigh_table_hash_heads;
long neighbour_hash;
+ long request_queue_tag_set;
+ long blk_mq_tag_set_flags;
+ long blk_mq_tag_set_shared_tags;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/dev.c b/dev.c
index 9d38aef..0a4d5c9 100644
--- a/dev.c
+++ b/dev.c
@@ -4326,6 +4326,12 @@ struct bt_iter_data {
#define MQ_RQ_IN_FLIGHT 1
#define REQ_OP_BITS 8
#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
+#define BLK_MQ_F_TAG_HCTX_SHARED (1 << 3)
+
+static bool blk_mq_is_shared_tags(unsigned int flags)
+{
+ return flags & BLK_MQ_F_TAG_HCTX_SHARED;
+}
static uint op_is_write(uint op)
{
@@ -4403,43 +4409,72 @@ static void bt_for_each(ulong q, ulong tags, ulong sbq, uint
reserved, uint nr_r
sbitmap_for_each_set(&sc, bt_iter, &iter_data);
}
-static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio)
+static bool queue_for_each_hw_ctx(ulong q, ulong blk_mq_tags_ptr,
+ bool bitmap_tags_is_ptr, struct diskio *dio)
{
- uint i;
+ uint i, nr_reserved_tags = 0;
+ ulong tags = 0, addr = 0;
+ bool ret = FALSE;
+
+ if (!readmem(blk_mq_tags_ptr, KVADDR, &tags, sizeof(ulong),
+ "blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
+ goto out;
+
+ addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
+ if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
+ "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
+ goto out;
+
+ if (nr_reserved_tags) {
+ addr = tags + OFFSET(blk_mq_tags_breserved_tags);
+ if (bitmap_tags_is_ptr &&
+ !readmem(addr, KVADDR, &addr, sizeof(ulong),
+ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
+ goto out;
+ bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
+ }
+ addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
+ if (bitmap_tags_is_ptr &&
+ !readmem(addr, KVADDR, &addr, sizeof(ulong),
+ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
+ goto out;
+ bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
+
+ ret = TRUE;
+out:
+ return ret;
+}
+
+/*
+ * Replica of kernel block/blk-mq-tag.c:blk_mq_queue_tag_busy_iter()
+*/
+static void blk_mq_queue_tag_busy_iter(ulong q, ulong *hctx, uint cnt,
+ struct diskio *dio)
+{
+ uint i, flags;
int bitmap_tags_is_ptr = 0;
+ ulong addr = 0;
if (MEMBER_TYPE("blk_mq_tags", "bitmap_tags") == TYPE_CODE_PTR)
bitmap_tags_is_ptr = 1;
- for (i = 0; i < cnt; i++) {
- ulong addr = 0, tags = 0;
- uint nr_reserved_tags = 0;
+ readmem(q + OFFSET(request_queue_tag_set), KVADDR, &addr,
+ sizeof(ulong), "request_queue.tag_set", RETURN_ON_ERROR);
- /* Tags owned by the block driver */
- addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
- if (!readmem(addr, KVADDR, &tags, sizeof(ulong),
- "blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
- break;
+ readmem(addr + OFFSET(blk_mq_tag_set_flags), KVADDR,
+ &flags, sizeof(uint), "blk_mq_tag_set.flags", RETURN_ON_ERROR);
- addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
- if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
- "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
- break;
+ if (blk_mq_is_shared_tags(flags)) {
+ addr = addr + OFFSET(blk_mq_tag_set_shared_tags);
+ queue_for_each_hw_ctx(q, addr, bitmap_tags_is_ptr, dio);
+ return;
+ }
- if (nr_reserved_tags) {
- addr = tags + OFFSET(blk_mq_tags_breserved_tags);
- if (bitmap_tags_is_ptr &&
- !readmem(addr, KVADDR, &addr, sizeof(ulong),
- "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
- break;
- bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
- }
- addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
- if (bitmap_tags_is_ptr &&
- !readmem(addr, KVADDR, &addr, sizeof(ulong),
- "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
- break;
- bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
+ for (i = 0; i < cnt; i++) {
+ /* Tags owned by the block driver */
+ addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
+ if (queue_for_each_hw_ctx(q, addr, bitmap_tags_is_ptr, dio) == FALSE)
+ return;
}
}
@@ -4489,7 +4524,7 @@ static void get_mq_diskio_from_hw_queues(ulong q, struct diskio
*dio)
return;
}
- queue_for_each_hw_ctx(q, hctx_array, cnt, dio);
+ blk_mq_queue_tag_busy_iter(q, hctx_array, cnt, dio);
FREEBUF(hctx_array);
}
@@ -4914,6 +4949,9 @@ void diskio_init(void)
MEMBER_SIZE_INIT(class_private_devices, "class_private",
"class_devices");
MEMBER_OFFSET_INIT(disk_stats_in_flight, "disk_stats",
"in_flight");
+ MEMBER_OFFSET_INIT(request_queue_tag_set, "request_queue",
"tag_set");
+ MEMBER_OFFSET_INIT(blk_mq_tag_set_flags, "blk_mq_tag_set",
"flags");
+ MEMBER_OFFSET_INIT(blk_mq_tag_set_shared_tags, "blk_mq_tag_set",
"shared_tags");
dt->flags |= DISKIO_INIT;
}
diff --git a/symbols.c b/symbols.c
index e30fafe..794519a 100644
--- a/symbols.c
+++ b/symbols.c
@@ -11487,6 +11487,12 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(blk_mq_tags_nr_reserved_tags));
fprintf(fp, " blk_mq_tags_rqs: %ld\n",
OFFSET(blk_mq_tags_rqs));
+ fprintf(fp, " request_queue_tag_set: %ld\n",
+ OFFSET(request_queue_tag_set));
+ fprintf(fp, " blk_mq_tag_set_flags: %ld\n",
+ OFFSET(blk_mq_tag_set_flags));
+ fprintf(fp, " blk_mq_tag_set_shared_tags: %ld\n",
+ OFFSET(blk_mq_tag_set_shared_tags));
fprintf(fp, " subsys_private_subsys: %ld\n",
OFFSET(subsys_private_subsys));
fprintf(fp, " subsys_private_klist_devices: %ld\n",
--
2.47.0