few additional comments.
On 2022/05/26 16:34, HAGIO KAZUHITO(萩尾 一仁) wrote:
Hi Lianbo,
thank you for the update. Nice work, it became more readable :)
On 2022/05/23 19:06, Lianbo Jiang wrote:
> Currently, crash doesn't support to display disk I/O statistics
> for blk-mq devices. For more details, please refer to the following
> commit: <98b417fc6346> ("Handle blk_mq_ctx member changes for kernels
> 5.16-rc1 and later").
>
> Lets parse the bitmap in blk-mq layer to achieve it.
>
> Signed-off-by: Lianbo Jiang <lijiang(a)redhat.com>
> ---
> defs.h | 12 +++
> dev.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++--------
> symbols.c | 24 +++++
> 3 files changed, 289 insertions(+), 39 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index ecbced24d2e3..c1626bc79d59 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2170,6 +2170,17 @@ struct offset_table { /* stash of
commonly-used offsets */
> long sbq_wait_state_wait;
> long sbitmap_alloc_hint;
> long sbitmap_round_robin;
> + long request_q;
> + long request_cmd_flags;
> + long request_queue_queue_hw_ctx;
> + long request_queue_nr_hw_queues;
> + long blk_mq_hw_ctx_tags;
> + long blk_mq_hw_ctx_sched_tags;
> + long blk_mq_tags_bitmap_tags;
> + long blk_mq_tags_breserved_tags;
> + long blk_mq_tags_nr_reserved_tags;
> + long blk_mq_tags_rqs;
> + long blk_mq_tags_static_rqs;
> };
>
> struct size_table { /* stash of commonly-used sizes */
> @@ -2339,6 +2350,7 @@ struct size_table { /* stash of commonly-used sizes
*/
> long sbitmap;
> long sbitmap_queue;
> long sbq_wait_state;
> + long blk_mq_tags;
> };
>
> struct array_table {
> diff --git a/dev.c b/dev.c
> index a493e51ac95c..7e0b3d27888d 100644
> --- a/dev.c
> +++ b/dev.c
> @@ -4238,19 +4238,216 @@ get_one_mctx_diskio(unsigned long mctx, struct diskio *io)
> io->write = (dispatch[1] - comp[1]);
> }
>
> +typedef uint (busy_tag_iter_fn)(ulong rq, void *data);
> +
> +struct bt_iter_data {
> + ulong q;
> + struct diskio *dio;
> +};
> +
> +struct bt_tags_iter_data {
> + ulong tags;
> + uint flags;
> + uint nr_reserved_tags;
> + busy_tag_iter_fn *fn;
> + void *data;
> +};
> +
> +static uint op_is_write(uint op)
> +{
> +#define REQ_OP_BITS 8
> +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
> +
> + return (op & REQ_OP_MASK) & 1;
> +}
> +
> +static uint calculate_disk_io(ulong rq, void *data)
> +{
> + uint cmd_flags = 0;
> + ulong addr = 0, queue = 0;
> + struct bt_iter_data *iter = data;
> +
> + if (!IS_KVADDR(rq))
> + return 1;
For confirmation, this 1 is intended/correct?
And please use TRUE or FALSE for readability.
> +
> + addr = rq + OFFSET(request_q);
> + if (!readmem(addr, KVADDR, &queue, sizeof(ulong), "request.q",
RETURN_ON_ERROR))
> + return 0;
> +
> + addr = rq + OFFSET(request_cmd_flags);
> + if (!readmem(addr, KVADDR, &cmd_flags, sizeof(uint),
"request.cmd_flags", RETURN_ON_ERROR))
> + return 0;
> +
> + if (queue == iter->q) {
> + if (op_is_write(cmd_flags))
> + iter->dio->write++;
> + else
> + iter->dio->read++;
> + }
> +
> + return 1;
> +}
> +
> +static bool bt_tags_iter(uint bitnr, void *data)
> +{
> + uint ret = 0;
Same as above.
Thanks,
kazu
>> + ulong addr = 0, rqs_addr = 0, rq = 0;
>> + struct bt_tags_iter_data *tag_iter = data;
>> + ulong tag = tag_iter->tags;
>> +
>> + if (!tag_iter->flags)
>> + bitnr += tag_iter->nr_reserved_tags;
>> +
>> + /* rqs */
>> + addr = tag + OFFSET(blk_mq_tags_rqs);
>> + if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *),
"blk_mq_tags.rqs", RETURN_ON_ERROR))
>> + return ret;
>> + addr = rqs_addr + bitnr * sizeof(ulong); /* rqs[bitnr] */
>> + if (!readmem(addr, KVADDR, &rq, sizeof(ulong),
"blk_mq_tags.rqs[]", RETURN_ON_ERROR))
>> + return ret;
>> + ret = tag_iter->fn(rq, tag_iter->data);
>> + if (!ret)
>> + return ret;
>> +
>
>> + /* static_rqs */
>> + ret = 0;
>> + addr = tag + OFFSET(blk_mq_tags_static_rqs);
>> + if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *),
"blk_mq_tags.static_rqs", RETURN_ON_ERROR))
>> + return ret;
>> + addr = rqs_addr + bitnr * sizeof(ulong); /* static_rqs[bitnr] */
>> + if (!readmem(addr, KVADDR, &rq, sizeof(ulong),
"blk_mq_tags.static_rqs[]", RETURN_ON_ERROR))
>> + return ret;
>> + ret = tag_iter->fn(rq, tag_iter->data);
>> +
>> + return ret;
>
> I'm not familiar with blk-mq, I'd like some information.
>
> I think that the "dev -d" displays the same inflight stats in
/proc/diskstats
> for devices without mq:
>
> diskstats_show
> part_in_flight
> count up per_cpu disk_stats->in_flight
>
> so, if we do the same thing for mq devices, we should imitate this path:
>
> diskstats_show
> blk_mq_in_flight
> blk_mq_queue_tag_busy_iter
> queue_for_each_hw_ctx
> bt_for_each
> sbitmap_for_each_set
> bt_iter
> blk_mq_check_inflight
>
> On the path, I cannot see the static_rqs being counted.
> Why does it need to be counted?
>
>> +}
>> +
>> +static void bt_tags_for_each(ulong q, ulong tags, ulong sbq, uint flags, uint
nr_resvd_tags, struct diskio *dio)
>> +{
>> + struct sbitmap_context sc = {0};
>> + struct diskio tmp = {0};
>> + struct bt_iter_data iter_data = {
>> + .q = q,
>> + .dio = &tmp,
>> + };
>> + struct bt_tags_iter_data tags_iter_data = {
>> + .tags = tags,
>> + .flags = flags,
>> + .nr_reserved_tags = nr_resvd_tags,
>> + .fn = calculate_disk_io,
>> + .data = &iter_data,
>> + };
>> +
>> + sbitmap_context_load(sbq + OFFSET(sbitmap_queue_sb), &sc);
>> + sbitmap_for_each_set(&sc, bt_tags_iter, &tags_iter_data);
>> +
>> + dio->read = tmp.read;
>> + dio->write = tmp.write;
>> +}
>> +
>> +static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio
*dio)
>> +{
>> + uint i;
>> + struct diskio tmp = {0};
>> +
>> + if (!hctx || !dio)
>> + error(FATAL, "%s: Invalid parameter.\n", __func__);
>> +
>> + for (i = 0; i < cnt; i++) {
>> + ulong addr = 0, tags = 0, sched_tags = 0;
>> + uint nr_reserved_tags = 0;
>> +
>> + if (!IS_KVADDR(hctx[i]))
>> + continue;
>> +
>> + /* Tags owned by the block driver */
>> + addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
>> + if (!readmem(addr, KVADDR, &tags, sizeof(ulong),
>> + "blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
>> + break;
>> +
>> + addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
>> + if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
>> + "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
>> + break;
>> +
>> + if (nr_reserved_tags) {
>> + addr = tags + OFFSET(blk_mq_tags_breserved_tags);
>> + bt_tags_for_each(q, tags, addr, 1, nr_reserved_tags, &tmp);
>> + }
>> + addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
>> + bt_tags_for_each(q, tags, addr, 0, nr_reserved_tags, &tmp);
>> +
>
>> + /* Tags owned by I/O scheduler */
>> + addr = hctx[i] + OFFSET(blk_mq_hw_ctx_sched_tags);
>> + if (!readmem(addr, KVADDR, &sched_tags, sizeof(ulong),
>> + "blk_mq_hw_ctx.sched_tags", RETURN_ON_ERROR))
>> + break;
>> +
>> + addr = sched_tags + OFFSET(blk_mq_tags_nr_reserved_tags);
>> + if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
>> + "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
>> + break;
>> +
>> + if (nr_reserved_tags) {
>> + addr = sched_tags + OFFSET(blk_mq_tags_breserved_tags);
>> + bt_tags_for_each(q, sched_tags, addr, 1, nr_reserved_tags, &tmp);
>> + }
>> + addr = sched_tags + OFFSET(blk_mq_tags_bitmap_tags);
>> + bt_tags_for_each(q, sched_tags, addr, 0, nr_reserved_tags, &tmp);
>
> Same as above, why does the sched_tags need to be counted?
>
>> + }
>> +
>> + dio->read = tmp.read;
>> + dio->write = tmp.write;
>> +}
>> +
>> +static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio)
>> +{
>> + uint cnt = 0;
>> + ulong addr = 0, hctx_addr = 0;
>> + ulong *hctx_array = NULL;
>> + struct diskio tmp = {0};
>> +
>> + addr = q + OFFSET(request_queue_nr_hw_queues);
>> + readmem(addr, KVADDR, &cnt, sizeof(uint),
>> + "request_queue.nr_hw_queues", FAULT_ON_ERROR);
>> +
>> + addr = q + OFFSET(request_queue_queue_hw_ctx);
>> + readmem(addr, KVADDR, &hctx_addr, sizeof(void *),
>> + "request_queue.queue_hw_ctx", FAULT_ON_ERROR);
>> +
>> + hctx_array = (ulong *)GETBUF(sizeof(void *) * cnt);
>> + if (!hctx_array)
>> + error(FATAL, "fail to get memory for the hctx_array\n");
>> +
>> + if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt,
>> + "request_queue.queue_hw_ctx[]", RETURN_ON_ERROR)) {
>> + FREEBUF(hctx_array);
>> + return;
>> + }
>> +
>> + queue_for_each_hw_ctx(q, hctx_array, cnt, &tmp);
>> + dio->read = tmp.read;
>> + dio->write = tmp.write;
>> +
>> + FREEBUF(hctx_array);
>> +}
>> +
>> static void
>> get_mq_diskio(unsigned long q, unsigned long *mq_count)
>> {
>> int cpu;
>> unsigned long queue_ctx;
>> unsigned long mctx_addr;
>> - struct diskio tmp;
>> + struct diskio tmp = {0};
>>
>> if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
>> - INVALID_MEMBER(blk_mq_ctx_rq_completed))
>> + INVALID_MEMBER(blk_mq_ctx_rq_completed)) {
>> + get_mq_diskio_from_hw_queues(q, &tmp);
>> + mq_count[0] = tmp.read;
>> + mq_count[1] = tmp.write;
>> return;
>> -
>> - memset(&tmp, 0x00, sizeof(struct diskio));
>> + }
>>
>> readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx,
>> sizeof(ulong), "request_queue.queue_ctx",
>> @@ -4479,41 +4676,24 @@ display_one_diskio(struct iter *i, unsigned long gendisk,
ulong flags)
>> && (io.read + io.write == 0))
>> return;
>>
>> - if (use_mq_interface(queue_addr) &&
>> - (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
>> - INVALID_MEMBER(blk_mq_ctx_rq_completed)))
>> - fprintf(fp, "%s%s%s %s%s%s%s %s%s%s",
>> - mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
>> - space(MINSPACE),
>> - mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
>> - space(MINSPACE),
>> - mkstring(buf2, 10, LJUST, disk_name),
>> - space(MINSPACE),
>> - mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
>> - LJUST|LONG_HEX, (char *)queue_addr),
>> - space(MINSPACE),
>> - mkstring(buf4, 17, RJUST, "(not supported)"),
>> - space(MINSPACE));
>> -
>> - else
>> - fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s",
>> - mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
>> - space(MINSPACE),
>> - mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
>> - space(MINSPACE),
>> - mkstring(buf2, 10, LJUST, disk_name),
>> - space(MINSPACE),
>> - mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
>> - LJUST|LONG_HEX, (char *)queue_addr),
>> - space(MINSPACE),
>> - io.read + io.write,
>> - space(MINSPACE),
>> - mkstring(buf4, 5, RJUST|INT_DEC,
>> - (char *)(unsigned long)io.read),
>> - space(MINSPACE),
>> - mkstring(buf5, 5, RJUST|INT_DEC,
>> - (char *)(unsigned long)io.write),
>> - space(MINSPACE));
>> + fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s",
>> + mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
>> + space(MINSPACE),
>> + mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
>> + space(MINSPACE),
>> + mkstring(buf2, 10, LJUST, disk_name),
>> + space(MINSPACE),
>> + mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
>> + LJUST|LONG_HEX, (char *)queue_addr),
>> + space(MINSPACE),
>> + io.read + io.write,
>> + space(MINSPACE),
>> + mkstring(buf4, 5, RJUST|INT_DEC,
>> + (char *)(unsigned long)io.read),
>> + space(MINSPACE),
>> + mkstring(buf5, 5, RJUST|INT_DEC,
>> + (char *)(unsigned long)io.write),
>> + space(MINSPACE));
>>
>> if (VALID_MEMBER(request_queue_in_flight)) {
>> if (!use_mq_interface(queue_addr)) {
>> @@ -4603,15 +4783,49 @@ void diskio_init(void)
>> MEMBER_OFFSET_INIT(request_queue_rq, "request_queue",
"rq");
>> else
>> MEMBER_OFFSET_INIT(request_queue_rq, "request_queue",
"root_rl");
>
>> + if (MEMBER_EXISTS("request", "q"))
>> + MEMBER_OFFSET_INIT(request_q, "request", "q");
>> + if (MEMBER_EXISTS("request", "cmd_flags"))
>> + MEMBER_OFFSET_INIT(request_cmd_flags, "request",
"cmd_flags");
>
> No need to have these "if MEMBER_EXISTS()", because it doesn't save
> a bunch of MEMBER_OFFSET_INIT().
>
>> if (MEMBER_EXISTS("request_queue", "mq_ops")) {
>> MEMBER_OFFSET_INIT(request_queue_mq_ops, "request_queue",
>> "mq_ops");
>> ANON_MEMBER_OFFSET_INIT(request_queue_queue_ctx,
>> "request_queue", "queue_ctx");
>> + MEMBER_OFFSET_INIT(request_queue_queue_hw_ctx,
>> + "request_queue", "queue_hw_ctx");
>> + MEMBER_OFFSET_INIT(request_queue_nr_hw_queues,
>> + "request_queue", "nr_hw_queues");
>> MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx",
>> "rq_dispatched");
>> MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx",
>> "rq_completed");
>> + MEMBER_OFFSET_INIT(blk_mq_hw_ctx_tags, "blk_mq_hw_ctx",
>> + "tags");
>> + MEMBER_OFFSET_INIT(blk_mq_hw_ctx_sched_tags, "blk_mq_hw_ctx",
>> + "sched_tags");
>> + MEMBER_OFFSET_INIT(blk_mq_tags_bitmap_tags, "blk_mq_tags",
>> + "bitmap_tags");
>> + MEMBER_OFFSET_INIT(blk_mq_tags_breserved_tags, "blk_mq_tags",
>> + "breserved_tags");
>> + MEMBER_OFFSET_INIT(blk_mq_tags_nr_reserved_tags, "blk_mq_tags",
>> + "nr_reserved_tags");
>> + MEMBER_OFFSET_INIT(blk_mq_tags_rqs, "blk_mq_tags",
>> + "rqs");
>> + MEMBER_OFFSET_INIT(blk_mq_tags_static_rqs, "blk_mq_tags",
>> + "static_rqs");
>> + STRUCT_SIZE_INIT(blk_mq_tags, "blk_mq_tags");
>> + STRUCT_SIZE_INIT(sbitmap, "sbitmap");
>> + STRUCT_SIZE_INIT(sbitmap_word, "sbitmap_word");
>> + MEMBER_OFFSET_INIT(sbitmap_word_depth, "sbitmap_word",
"depth");
>> + MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word",
"word");
>> + MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word",
"cleared");
>> + MEMBER_OFFSET_INIT(sbitmap_depth, "sbitmap", "depth");
>> + MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift");
>> + MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
>> + MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
>> + MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue",
"sb");
>> +
>> }
>> MEMBER_OFFSET_INIT(subsys_private_klist_devices,
"subsys_private",
>> "klist_devices");
>> diff --git a/symbols.c b/symbols.c
>> index bfe506388fa3..c432469671a4 100644
>> --- a/symbols.c
>> +++ b/symbols.c
>> @@ -10385,6 +10385,10 @@ dump_offset_table(char *spec, ulong makestruct)
>> OFFSET(kset_list));
>> fprintf(fp, " request_list_count: %ld\n",
>> OFFSET(request_list_count));
>> + fprintf(fp, " request_q: %ld\n",
>> + OFFSET(request_q));
>> + fprintf(fp, " request_cmd_flags: %ld\n",
>> + OFFSET(request_cmd_flags));
>> fprintf(fp, " request_queue_in_flight: %ld\n",
>> OFFSET(request_queue_in_flight));
>> fprintf(fp, " request_queue_rq: %ld\n",
>> @@ -10393,10 +10397,29 @@ dump_offset_table(char *spec, ulong makestruct)
>> OFFSET(request_queue_mq_ops));
>> fprintf(fp, " request_queue_queue_ctx: %ld\n",
>> OFFSET(request_queue_queue_ctx));
>> + fprintf(fp, " request_queue_queue_hw_ctx: %ld\n",
>> + OFFSET(request_queue_queue_hw_ctx));
>> + fprintf(fp, " request_queue_nr_hw_queues: %ld\n",
>> + OFFSET(request_queue_nr_hw_queues));
>> fprintf(fp, " blk_mq_ctx_rq_dispatched: %ld\n",
>> OFFSET(blk_mq_ctx_rq_dispatched));
>> fprintf(fp, " blk_mq_ctx_rq_completed: %ld\n",
>> OFFSET(blk_mq_ctx_rq_completed));
>> + fprintf(fp, " blk_mq_hw_ctx_tags: %ld\n",
>> + OFFSET(blk_mq_hw_ctx_tags));
>> + fprintf(fp, " blk_mq_hw_ctx_sched_tags: %ld\n",
>> + OFFSET(blk_mq_hw_ctx_sched_tags));
>> + fprintf(fp, " blk_mq_tags_bitmap_tags: %ld\n",
>> + OFFSET(blk_mq_tags_bitmap_tags));
>> + fprintf(fp, " blk_mq_tags_breserved_tags: %ld\n",
>> + OFFSET(blk_mq_tags_breserved_tags));
>> + fprintf(fp, " blk_mq_tags_nr_reserved_tags: %ld\n",
>> + OFFSET(blk_mq_tags_nr_reserved_tags));
>> + fprintf(fp, " blk_mq_tags_rqs: %ld\n",
>> + OFFSET(blk_mq_tags_rqs));
>> + fprintf(fp, " blk_mq_tags_static_rqs: %ld\n",
>> + OFFSET(blk_mq_tags_static_rqs));
>
> Please arrange the position of colons:
>
> kobject_entry: 8
> kset_list: 0
> request_list_count: -1
> request_q: 0
> request_cmd_flags: 24
> ^
>
> The current patch:
>
> kobject_entry: 8
> kset_list: 0
> request_list_count: -1
> request_q: 0
> request_cmd_flags: 24
>
>> +
>> fprintf(fp, " subsys_private_klist_devices: %ld\n",
>> OFFSET(subsys_private_klist_devices));
>> fprintf(fp, " subsystem_kset: %ld\n",
>> @@ -11003,6 +11026,7 @@ dump_offset_table(char *spec, ulong makestruct)
>> fprintf(fp, " sbitmap: %ld\n",
SIZE(sbitmap));
>> fprintf(fp, " sbitmap_queue: %ld\n",
SIZE(sbitmap_queue));
>> fprintf(fp, " sbq_wait_state: %ld\n",
SIZE(sbq_wait_state));
>> + fprintf(fp, " blk_mq_tags: %ld\n",
SIZE(blk_mq_tags));
>
> Same as above.
>
> Thanks,
> Kazu
> --
> Crash-utility mailing list
> Crash-utility(a)redhat.com
>
https://listman.redhat.com/mailman/listinfo/crash-utility
> Contribution Guidelines:
https://github.com/crash-utility/crash/wiki