Sure, thanks!
On Thu, Jul 24, 2025 at 3:36 PM lijiang <lijiang(a)redhat.com> wrote:
On Thu, Jul 24, 2025 at 9:18 AM Tao Liu <ltao(a)redhat.com> wrote:
>
> Hi lianbo,
>
> Kindly ping for this patch...
>
Sorry about it. I will take a look next week.
Thanks
Lianbo
>
> Thanks,
> Tao Liu
>
> On Fri, Jun 27, 2025 at 11:33 PM Tao Liu <ltao(a)redhat.com> wrote:
> >
> > When blk_mq shared tags enabled for devices like scsi, the IO status is
> > incorrect, e.g:
> >
> > crash> dev -d
> > MAJOR GENDISK NAME REQUEST_QUEUE TOTAL ASYNC SYNC
> > 8 ffff90528df86000 sda ffff9052a3d61800 144 144 0
> > 8 ffff905280718c00 sdb ffff9052a3d63c00 48 48 0
> >
> > crash> epython rqlist
> > ffff90528e94a5c0 sda is unknown, deadline: 89.992 (90) rq_alloc:
0.196
> > ffff90528e92f700 sda is unknown, deadline: 89.998 (90) rq_alloc:
0.202
> > ffff90528e95ccc0 sda is unknown, deadline: 89.999 (90) rq_alloc:
0.203
> > ffff90528e968bc0 sdb is unknown, deadline: 89.997 (90) rq_alloc:
0.201
> >
> > The root cause is: for shared tags case, only the shared tags are put
> > into count. Without this patch, tags of all the hw_ctx are counted,
> > which is incorrect.
> >
> > After apply the patch:
> >
> > crash> dev -d
> > MAJOR GENDISK NAME REQUEST_QUEUE TOTAL READ WRITE
> > 8 ffff90528df86000 sda ffff9052a3d61800 3 3 0
> > 8 ffff905280718c00 sdb ffff9052a3d63c00 1 1 0
> >
> > This patch makes the following modification:
> > 1) blk_mq shared tag support.
> > 2) Function renaming: queue_for_each_hw_ctx -> blk_mq_queue_tag_busy_iter,
> > because the latter is more close to the corresponding kernel function.
> > 3) Extract a new queue_for_each_hw_ctx() function to be called for both
> > shared-tags case and the hw_ctx case.
> >
> > Note:
> > The patch is safe for earlier kernels which have no blk_mq shared tags
> > implemented, because the blk_mq_is_shared_tags() check will exit safely.
> >
> > Signed-off-by: Tao Liu <ltao(a)redhat.com>
> > ---
> >
> > Please discard the previous patch "Filter repeated rq for cmd dev
-d/-D",
> > because filtering is an incorrect fix.
> >
> > ---
> > defs.h | 3 ++
> > dev.c | 96 ++++++++++++++++++++++++++++++++++++++-----------------
> > symbols.c | 6 ++++
> > 3 files changed, 76 insertions(+), 29 deletions(-)
> >
> > diff --git a/defs.h b/defs.h
> > index bbd6d4b..4fecb83 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -2271,6 +2271,9 @@ struct offset_table { /* stash of
commonly-used offsets */
> > long task_struct_thread_context_x28;
> > long neigh_table_hash_heads;
> > long neighbour_hash;
> > + long request_queue_tag_set;
> > + long blk_mq_tag_set_flags;
> > + long blk_mq_tag_set_shared_tags;
> > };
> >
> > struct size_table { /* stash of commonly-used sizes */
> > diff --git a/dev.c b/dev.c
> > index 9d38aef..0a4d5c9 100644
> > --- a/dev.c
> > +++ b/dev.c
> > @@ -4326,6 +4326,12 @@ struct bt_iter_data {
> > #define MQ_RQ_IN_FLIGHT 1
> > #define REQ_OP_BITS 8
> > #define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
> > +#define BLK_MQ_F_TAG_HCTX_SHARED (1 << 3)
> > +
> > +static bool blk_mq_is_shared_tags(unsigned int flags)
> > +{
> > + return flags & BLK_MQ_F_TAG_HCTX_SHARED;
> > +}
> >
> > static uint op_is_write(uint op)
> > {
> > @@ -4403,43 +4409,72 @@ static void bt_for_each(ulong q, ulong tags, ulong sbq,
uint reserved, uint nr_r
> > sbitmap_for_each_set(&sc, bt_iter, &iter_data);
> > }
> >
> > -static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio
*dio)
> > +static bool queue_for_each_hw_ctx(ulong q, ulong blk_mq_tags_ptr,
> > + bool bitmap_tags_is_ptr, struct diskio *dio)
> > {
> > - uint i;
> > + uint i, nr_reserved_tags = 0;
> > + ulong tags = 0, addr = 0;
> > + bool ret = FALSE;
> > +
> > + if (!readmem(blk_mq_tags_ptr, KVADDR, &tags, sizeof(ulong),
> > + "blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
> > + goto out;
> > +
> > + addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
> > + if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
> > + "blk_mq_tags_nr_reserved_tags",
RETURN_ON_ERROR))
> > + goto out;
> > +
> > + if (nr_reserved_tags) {
> > + addr = tags + OFFSET(blk_mq_tags_breserved_tags);
> > + if (bitmap_tags_is_ptr &&
> > + !readmem(addr, KVADDR, &addr, sizeof(ulong),
> > + "blk_mq_tags.bitmap_tags",
RETURN_ON_ERROR))
> > + goto out;
> > + bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
> > + }
> > + addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
> > + if (bitmap_tags_is_ptr &&
> > + !readmem(addr, KVADDR, &addr, sizeof(ulong),
> > + "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
> > + goto out;
> > + bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
> > +
> > + ret = TRUE;
> > +out:
> > + return ret;
> > +}
> > +
> > +/*
> > + * Replica of kernel block/blk-mq-tag.c:blk_mq_queue_tag_busy_iter()
> > +*/
> > +static void blk_mq_queue_tag_busy_iter(ulong q, ulong *hctx, uint cnt,
> > + struct diskio *dio)
> > +{
> > + uint i, flags;
> > int bitmap_tags_is_ptr = 0;
> > + ulong addr = 0;
> >
> > if (MEMBER_TYPE("blk_mq_tags", "bitmap_tags") ==
TYPE_CODE_PTR)
> > bitmap_tags_is_ptr = 1;
> >
> > - for (i = 0; i < cnt; i++) {
> > - ulong addr = 0, tags = 0;
> > - uint nr_reserved_tags = 0;
> > + readmem(q + OFFSET(request_queue_tag_set), KVADDR, &addr,
> > + sizeof(ulong), "request_queue.tag_set",
RETURN_ON_ERROR);
> >
> > - /* Tags owned by the block driver */
> > - addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
> > - if (!readmem(addr, KVADDR, &tags, sizeof(ulong),
> > - "blk_mq_hw_ctx.tags",
RETURN_ON_ERROR))
> > - break;
> > + readmem(addr + OFFSET(blk_mq_tag_set_flags), KVADDR,
> > + &flags, sizeof(uint), "blk_mq_tag_set.flags",
RETURN_ON_ERROR);
> >
> > - addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
> > - if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
> > - "blk_mq_tags_nr_reserved_tags",
RETURN_ON_ERROR))
> > - break;
> > + if (blk_mq_is_shared_tags(flags)) {
> > + addr = addr + OFFSET(blk_mq_tag_set_shared_tags);
> > + queue_for_each_hw_ctx(q, addr, bitmap_tags_is_ptr, dio);
> > + return;
> > + }
> >
> > - if (nr_reserved_tags) {
> > - addr = tags + OFFSET(blk_mq_tags_breserved_tags);
> > - if (bitmap_tags_is_ptr &&
> > - !readmem(addr, KVADDR, &addr, sizeof(ulong),
> > - "blk_mq_tags.bitmap_tags",
RETURN_ON_ERROR))
> > - break;
> > - bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
> > - }
> > - addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
> > - if (bitmap_tags_is_ptr &&
> > - !readmem(addr, KVADDR, &addr, sizeof(ulong),
> > - "blk_mq_tags.bitmap_tags",
RETURN_ON_ERROR))
> > - break;
> > - bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
> > + for (i = 0; i < cnt; i++) {
> > + /* Tags owned by the block driver */
> > + addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
> > + if (queue_for_each_hw_ctx(q, addr, bitmap_tags_is_ptr, dio) ==
FALSE)
> > + return;
> > }
> > }
> >
> > @@ -4489,7 +4524,7 @@ static void get_mq_diskio_from_hw_queues(ulong q, struct
diskio *dio)
> > return;
> > }
> >
> > - queue_for_each_hw_ctx(q, hctx_array, cnt, dio);
> > + blk_mq_queue_tag_busy_iter(q, hctx_array, cnt, dio);
> >
> > FREEBUF(hctx_array);
> > }
> > @@ -4914,6 +4949,9 @@ void diskio_init(void)
> > MEMBER_SIZE_INIT(class_private_devices, "class_private",
> > "class_devices");
> > MEMBER_OFFSET_INIT(disk_stats_in_flight, "disk_stats",
"in_flight");
> > + MEMBER_OFFSET_INIT(request_queue_tag_set, "request_queue",
"tag_set");
> > + MEMBER_OFFSET_INIT(blk_mq_tag_set_flags, "blk_mq_tag_set",
"flags");
> > + MEMBER_OFFSET_INIT(blk_mq_tag_set_shared_tags,
"blk_mq_tag_set", "shared_tags");
> >
> > dt->flags |= DISKIO_INIT;
> > }
> > diff --git a/symbols.c b/symbols.c
> > index e30fafe..794519a 100644
> > --- a/symbols.c
> > +++ b/symbols.c
> > @@ -11487,6 +11487,12 @@ dump_offset_table(char *spec, ulong makestruct)
> > OFFSET(blk_mq_tags_nr_reserved_tags));
> > fprintf(fp, " blk_mq_tags_rqs: %ld\n",
> > OFFSET(blk_mq_tags_rqs));
> > + fprintf(fp, " request_queue_tag_set: %ld\n",
> > + OFFSET(request_queue_tag_set));
> > + fprintf(fp, " blk_mq_tag_set_flags: %ld\n",
> > + OFFSET(blk_mq_tag_set_flags));
> > + fprintf(fp, " blk_mq_tag_set_shared_tags: %ld\n",
> > + OFFSET(blk_mq_tag_set_shared_tags));
> >
> > fprintf(fp, " subsys_private_subsys: %ld\n",
OFFSET(subsys_private_subsys));
> > fprintf(fp, " subsys_private_klist_devices: %ld\n",
> > --
> > 2.47.0
> >
>