There have been some reports that the "dev -d|-D" options displayed
incorrect I/O stats due to racy blk_mq_ctx.rq_completed value.
To fix it, make the options use sbitmap to count I/O stats on RHEL8
and adjust to its blk_mq_tags structure.
Signed-off-by: Kazuhito Hagio <k-hagio-ab(a)nec.com>
---
The patch tested ok with upstream 4.0 to 5.18 kernels and several
RHEL7 and RHEL8 ones.
dev.c | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/dev.c b/dev.c
index 0172c83ffaea..733e3a8a40cd 100644
--- a/dev.c
+++ b/dev.c
@@ -4339,6 +4339,10 @@ static void bt_for_each(ulong q, ulong tags, ulong sbq, uint
reserved, uint nr_r
static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio)
{
uint i;
+ int bitmap_tags_is_ptr = 0;
+
+ if (MEMBER_TYPE("blk_mq_tags", "bitmap_tags") == TYPE_CODE_PTR)
+ bitmap_tags_is_ptr = 1;
for (i = 0; i < cnt; i++) {
ulong addr = 0, tags = 0;
@@ -4357,9 +4361,17 @@ static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt,
struct diskio
if (nr_reserved_tags) {
addr = tags + OFFSET(blk_mq_tags_breserved_tags);
+ if (bitmap_tags_is_ptr &&
+ !readmem(addr, KVADDR, &addr, sizeof(ulong),
+ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
+ break;
bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
}
addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
+ if (bitmap_tags_is_ptr &&
+ !readmem(addr, KVADDR, &addr, sizeof(ulong),
+ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
+ break;
bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
}
}
@@ -4423,8 +4435,13 @@ get_mq_diskio(unsigned long q, unsigned long *mq_count)
unsigned long mctx_addr;
struct diskio tmp = {0};
- if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
- INVALID_MEMBER(blk_mq_ctx_rq_completed)) {
+ /*
+ * Currently it does not support earlier sbitmap and blk-mq
+ * implementation e.g. RHEL7, so filter them out.
+ */
+ if (VALID_STRUCT(sbitmap) &&
+ VALID_MEMBER(sbitmap_word_cleared) &&
+ VALID_MEMBER(request_state)) {
get_mq_diskio_from_hw_queues(q, &tmp);
mq_count[0] = tmp.read;
mq_count[1] = tmp.write;
--
2.27.0