blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()
We have to remove synchronize_rcu() from blk_queue_cleanup(),
otherwise long delay can be caused during lun probe. For removing
it, we have to avoid to iterate the set->tag_list in IO path, eg,
blk_mq_sched_restart().
This patch reverts 5b79413946d (Revert "blk-mq: don't handle
TAG_SHARED in restart"). Given we have fixed enough IO hang issue,
and there isn't any reason to restart all queues in one tags any more,
see the following reasons:
1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(),
which can wake up queues waiting for driver tag.
2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue,
target or host is ready, but SCSI built-in restart can cover all these well,
see scsi_end_request(), queue will be rerun after any request initiated from
this host/target is completed.
In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30%
when running I/O on these 8 luns concurrently.
Fixes: 705cda97ee
("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list")
Cc: Omar Sandoval <osandov@fb.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Reported-by: Andrew Jones <drjones@redhat.com>
Tested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
5815839b3c
commit
97889f9ac2
@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
|||||||
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
|
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||||
struct request_queue *q = hctx->queue;
|
|
||||||
|
|
||||||
if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
|
||||||
atomic_inc(&q->shared_hctx_restart);
|
|
||||||
} else
|
|
||||||
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
|
||||||
{
|
{
|
||||||
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||||
return false;
|
return;
|
||||||
|
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||||
|
|
||||||
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
|
blk_mq_run_hw_queue(hctx, true);
|
||||||
struct request_queue *q = hctx->queue;
|
|
||||||
|
|
||||||
if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
|
||||||
atomic_dec(&q->shared_hctx_restart);
|
|
||||||
} else
|
|
||||||
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
|
||||||
|
|
||||||
return blk_mq_run_hw_queue(hctx, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
|
|
||||||
* @pos: loop cursor.
|
|
||||||
* @skip: the list element that will not be examined. Iteration starts at
|
|
||||||
* @skip->next.
|
|
||||||
* @head: head of the list to examine. This list must have at least one
|
|
||||||
* element, namely @skip.
|
|
||||||
* @member: name of the list_head structure within typeof(*pos).
|
|
||||||
*/
|
|
||||||
#define list_for_each_entry_rcu_rr(pos, skip, head, member) \
|
|
||||||
for ((pos) = (skip); \
|
|
||||||
(pos = (pos)->member.next != (head) ? list_entry_rcu( \
|
|
||||||
(pos)->member.next, typeof(*pos), member) : \
|
|
||||||
list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
|
|
||||||
(pos) != (skip); )
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Called after a driver tag has been freed to check whether a hctx needs to
|
|
||||||
* be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
|
|
||||||
* queues in a round-robin fashion if the tag set of @hctx is shared with other
|
|
||||||
* hardware queues.
|
|
||||||
*/
|
|
||||||
void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
|
|
||||||
{
|
|
||||||
struct blk_mq_tags *const tags = hctx->tags;
|
|
||||||
struct blk_mq_tag_set *const set = hctx->queue->tag_set;
|
|
||||||
struct request_queue *const queue = hctx->queue, *q;
|
|
||||||
struct blk_mq_hw_ctx *hctx2;
|
|
||||||
unsigned int i, j;
|
|
||||||
|
|
||||||
if (set->flags & BLK_MQ_F_TAG_SHARED) {
|
|
||||||
/*
|
|
||||||
* If this is 0, then we know that no hardware queues
|
|
||||||
* have RESTART marked. We're done.
|
|
||||||
*/
|
|
||||||
if (!atomic_read(&queue->shared_hctx_restart))
|
|
||||||
return;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
|
|
||||||
tag_set_list) {
|
|
||||||
queue_for_each_hw_ctx(q, hctx2, i)
|
|
||||||
if (hctx2->tags == tags &&
|
|
||||||
blk_mq_sched_restart_hctx(hctx2))
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
j = hctx->queue_num + 1;
|
|
||||||
for (i = 0; i < queue->nr_hw_queues; i++, j++) {
|
|
||||||
if (j == queue->nr_hw_queues)
|
|
||||||
j = 0;
|
|
||||||
hctx2 = queue->queue_hw_ctx[j];
|
|
||||||
if (hctx2->tags == tags &&
|
|
||||||
blk_mq_sched_restart_hctx(hctx2))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
done:
|
|
||||||
rcu_read_unlock();
|
|
||||||
} else {
|
|
||||||
blk_mq_sched_restart_hctx(hctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
||||||
bool run_queue, bool async)
|
bool run_queue, bool async)
|
||||||
{
|
{
|
||||||
|
@ -2335,15 +2335,10 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
queue_for_each_hw_ctx(q, hctx, i) {
|
||||||
if (shared) {
|
if (shared)
|
||||||
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
|
||||||
atomic_inc(&q->shared_hctx_restart);
|
|
||||||
hctx->flags |= BLK_MQ_F_TAG_SHARED;
|
hctx->flags |= BLK_MQ_F_TAG_SHARED;
|
||||||
} else {
|
else
|
||||||
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
|
||||||
atomic_dec(&q->shared_hctx_restart);
|
|
||||||
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
|
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2374,7 +2369,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
|
|||||||
blk_mq_update_tag_set_depth(set, false);
|
blk_mq_update_tag_set_depth(set, false);
|
||||||
}
|
}
|
||||||
mutex_unlock(&set->tag_list_lock);
|
mutex_unlock(&set->tag_list_lock);
|
||||||
synchronize_rcu();
|
|
||||||
INIT_LIST_HEAD(&q->tag_set_list);
|
INIT_LIST_HEAD(&q->tag_set_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -442,8 +442,6 @@ struct request_queue {
|
|||||||
int nr_rqs[2]; /* # allocated [a]sync rqs */
|
int nr_rqs[2]; /* # allocated [a]sync rqs */
|
||||||
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
|
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
|
||||||
|
|
||||||
atomic_t shared_hctx_restart;
|
|
||||||
|
|
||||||
struct blk_queue_stats *stats;
|
struct blk_queue_stats *stats;
|
||||||
struct rq_wb *rq_wb;
|
struct rq_wb *rq_wb;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user