drbd: simplify retry path of failed READ requests
If a local or remote READ request fails, just push it back to the retry workqueue. It will re-enter __drbd_make_request, and be re-assigned to a suitable local or remote path, or failed, if we do not have access to good data anymore. This obsoletes w_read_retry_remote(), and eliminates two goto...retry blocks in __req_mod() Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
parent
2415308eb9
commit
4439c400ab
@ -1439,7 +1439,6 @@ extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *
|
|||||||
extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *,
|
extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *,
|
||||||
struct drbd_peer_request *, void *);
|
struct drbd_peer_request *, void *);
|
||||||
/* worker callbacks */
|
/* worker callbacks */
|
||||||
extern int w_read_retry_remote(struct drbd_work *, int);
|
|
||||||
extern int w_e_end_data_req(struct drbd_work *, int);
|
extern int w_e_end_data_req(struct drbd_work *, int);
|
||||||
extern int w_e_end_rsdata_req(struct drbd_work *, int);
|
extern int w_e_end_rsdata_req(struct drbd_work *, int);
|
||||||
extern int w_e_end_csum_rs_req(struct drbd_work *, int);
|
extern int w_e_end_csum_rs_req(struct drbd_work *, int);
|
||||||
|
@ -263,7 +263,6 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m)
|
|||||||
{
|
{
|
||||||
const unsigned long s = req->rq_state;
|
const unsigned long s = req->rq_state;
|
||||||
struct drbd_conf *mdev = req->w.mdev;
|
struct drbd_conf *mdev = req->w.mdev;
|
||||||
int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
|
|
||||||
|
|
||||||
/* we must not complete the master bio, while it is
|
/* we must not complete the master bio, while it is
|
||||||
* still being processed by _drbd_send_zc_bio (drbd_send_dblock)
|
* still being processed by _drbd_send_zc_bio (drbd_send_dblock)
|
||||||
@ -282,6 +281,8 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
if (req->master_bio) {
|
if (req->master_bio) {
|
||||||
|
int rw = bio_rw(req->master_bio);
|
||||||
|
|
||||||
/* this is DATA_RECEIVED (remote read)
|
/* this is DATA_RECEIVED (remote read)
|
||||||
* or protocol C P_WRITE_ACK
|
* or protocol C P_WRITE_ACK
|
||||||
* or protocol B P_RECV_ACK
|
* or protocol B P_RECV_ACK
|
||||||
@ -326,7 +327,18 @@ void req_may_be_completed(struct drbd_request *req, struct bio_and_error *m)
|
|||||||
/* Update disk stats */
|
/* Update disk stats */
|
||||||
_drbd_end_io_acct(mdev, req);
|
_drbd_end_io_acct(mdev, req);
|
||||||
|
|
||||||
if (!(s & RQ_POSTPONED)) {
|
/* if READ failed,
|
||||||
|
* have it be pushed back to the retry work queue,
|
||||||
|
* so it will re-enter __drbd_make_request,
|
||||||
|
* and be re-assigned to a suitable local or remote path,
|
||||||
|
* or failed if we do not have access to good data anymore.
|
||||||
|
* READA may fail.
|
||||||
|
* WRITE should have used all available paths already.
|
||||||
|
*/
|
||||||
|
if (!ok && rw == READ)
|
||||||
|
req->rq_state |= RQ_POSTPONED;
|
||||||
|
|
||||||
|
if (!(req->rq_state & RQ_POSTPONED)) {
|
||||||
m->error = ok ? 0 : (error ?: -EIO);
|
m->error = ok ? 0 : (error ?: -EIO);
|
||||||
m->bio = req->master_bio;
|
m->bio = req->master_bio;
|
||||||
req->master_bio = NULL;
|
req->master_bio = NULL;
|
||||||
@ -420,10 +432,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
|||||||
|
|
||||||
case ABORT_DISK_IO:
|
case ABORT_DISK_IO:
|
||||||
req->rq_state |= RQ_LOCAL_ABORTED;
|
req->rq_state |= RQ_LOCAL_ABORTED;
|
||||||
if (req->rq_state & RQ_WRITE)
|
req_may_be_completed_not_susp(req, m);
|
||||||
req_may_be_completed_not_susp(req, m);
|
|
||||||
else
|
|
||||||
goto goto_queue_for_net_read;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case WRITE_COMPLETED_WITH_ERROR:
|
case WRITE_COMPLETED_WITH_ERROR:
|
||||||
@ -451,20 +460,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
|||||||
D_ASSERT(!(req->rq_state & RQ_NET_MASK));
|
D_ASSERT(!(req->rq_state & RQ_NET_MASK));
|
||||||
|
|
||||||
__drbd_chk_io_error(mdev, false);
|
__drbd_chk_io_error(mdev, false);
|
||||||
|
break;
|
||||||
goto_queue_for_net_read:
|
|
||||||
|
|
||||||
/* no point in retrying if there is no good remote data,
|
|
||||||
* or we have no connection. */
|
|
||||||
if (mdev->state.pdsk != D_UP_TO_DATE) {
|
|
||||||
req_may_be_completed_not_susp(req, m);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* _req_mod(req,TO_BE_SENT); oops, recursion... */
|
|
||||||
req->rq_state |= RQ_NET_PENDING;
|
|
||||||
inc_ap_pending(mdev);
|
|
||||||
/* fall through: _req_mod(req,QUEUE_FOR_NET_READ); */
|
|
||||||
|
|
||||||
case QUEUE_FOR_NET_READ:
|
case QUEUE_FOR_NET_READ:
|
||||||
/* READ or READA, and
|
/* READ or READA, and
|
||||||
@ -483,10 +479,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
|||||||
set_bit(UNPLUG_REMOTE, &mdev->flags);
|
set_bit(UNPLUG_REMOTE, &mdev->flags);
|
||||||
|
|
||||||
D_ASSERT(req->rq_state & RQ_NET_PENDING);
|
D_ASSERT(req->rq_state & RQ_NET_PENDING);
|
||||||
|
D_ASSERT((req->rq_state & RQ_LOCAL_MASK) == 0);
|
||||||
req->rq_state |= RQ_NET_QUEUED;
|
req->rq_state |= RQ_NET_QUEUED;
|
||||||
req->w.cb = (req->rq_state & RQ_LOCAL_MASK)
|
req->w.cb = w_send_read_req;
|
||||||
? w_read_retry_remote
|
|
||||||
: w_send_read_req;
|
|
||||||
drbd_queue_work(&mdev->tconn->data.work, &req->w);
|
drbd_queue_work(&mdev->tconn->data.work, &req->w);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -604,13 +599,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
|||||||
if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
|
if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
|
||||||
atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
|
atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
|
||||||
|
|
||||||
/* if it is still queued, we may not complete it here.
|
req_may_be_completed(req, m); /* Allowed while state.susp */
|
||||||
* it will be canceled soon. */
|
|
||||||
if (!(req->rq_state & RQ_NET_QUEUED)) {
|
|
||||||
if (p)
|
|
||||||
goto goto_read_retry_local;
|
|
||||||
req_may_be_completed(req, m); /* Allowed while state.susp */
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case DISCARD_WRITE:
|
case DISCARD_WRITE:
|
||||||
@ -668,27 +657,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
|||||||
|
|
||||||
req->rq_state |= RQ_NET_DONE;
|
req->rq_state |= RQ_NET_DONE;
|
||||||
|
|
||||||
if (!(req->rq_state & RQ_WRITE))
|
|
||||||
goto goto_read_retry_local;
|
|
||||||
|
|
||||||
maybe_wakeup_conflicting_requests(req);
|
maybe_wakeup_conflicting_requests(req);
|
||||||
req_may_be_completed_not_susp(req, m);
|
req_may_be_completed_not_susp(req, m);
|
||||||
/* else: done by HANDED_OVER_TO_NETWORK */
|
/* else: done by HANDED_OVER_TO_NETWORK */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
goto_read_retry_local:
|
|
||||||
if (!drbd_may_do_local_read(mdev, req->i.sector, req->i.size)) {
|
|
||||||
req_may_be_completed_not_susp(req, m);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
D_ASSERT(!(req->rq_state & RQ_LOCAL_PENDING));
|
|
||||||
req->rq_state |= RQ_LOCAL_PENDING;
|
|
||||||
|
|
||||||
get_ldev(mdev);
|
|
||||||
req->w.cb = w_restart_disk_io;
|
|
||||||
drbd_queue_work(&mdev->tconn->data.work, &req->w);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case FAIL_FROZEN_DISK_IO:
|
case FAIL_FROZEN_DISK_IO:
|
||||||
if (!(req->rq_state & RQ_LOCAL_COMPLETED))
|
if (!(req->rq_state & RQ_LOCAL_COMPLETED))
|
||||||
break;
|
break;
|
||||||
|
@ -244,26 +244,6 @@ void drbd_request_endio(struct bio *bio, int error)
|
|||||||
complete_master_bio(mdev, &m);
|
complete_master_bio(mdev, &m);
|
||||||
}
|
}
|
||||||
|
|
||||||
int w_read_retry_remote(struct drbd_work *w, int cancel)
|
|
||||||
{
|
|
||||||
struct drbd_request *req = container_of(w, struct drbd_request, w);
|
|
||||||
struct drbd_conf *mdev = w->mdev;
|
|
||||||
|
|
||||||
/* We should not detach for read io-error,
|
|
||||||
* but try to WRITE the P_DATA_REPLY to the failed location,
|
|
||||||
* to give the disk the chance to relocate that block */
|
|
||||||
|
|
||||||
spin_lock_irq(&mdev->tconn->req_lock);
|
|
||||||
if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
|
|
||||||
_req_mod(req, READ_RETRY_REMOTE_CANCELED);
|
|
||||||
spin_unlock_irq(&mdev->tconn->req_lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
spin_unlock_irq(&mdev->tconn->req_lock);
|
|
||||||
|
|
||||||
return w_send_read_req(w, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
|
void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
|
||||||
struct drbd_peer_request *peer_req, void *digest)
|
struct drbd_peer_request *peer_req, void *digest)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user