From a4b2673e3c040bcd7cc8749e27607098aef9a2c2 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 25 Jul 2023 12:03:59 +0800 Subject: [PATCH] ceph: defer stopping mdsc delayed_work commit e7e607bd00481745550389a29ecabe33e13d67cf upstream. Flushing the dirty buffer may take a long time if the cluster is overloaded or if there is network issue. So we should ping the MDSs periodically to keep alive, else the MDS will blocklist the kclient. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61843 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 4 ++-- fs/ceph/mds_client.h | 5 +++++ fs/ceph/super.c | 10 ++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 87a9e9096421..df1ecb8bfebf 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4511,7 +4511,7 @@ static void delayed_work(struct work_struct *work) dout("mdsc delayed_work\n"); - if (mdsc->stopping) + if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) return; mutex_lock(&mdsc->mutex); @@ -4701,7 +4701,7 @@ void send_flush_mdlog(struct ceph_mds_session *s) void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) { dout("pre_umount\n"); - mdsc->stopping = 1; + mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN; ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index a92e42e8a9f8..1c958510f00f 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -372,6 +372,11 @@ struct cap_wait { int want; }; +enum { + CEPH_MDSC_STOPPING_BEGIN = 1, + CEPH_MDSC_STOPPING_FLUSHED = 2, +}; + /* * mds client state */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 08c8d34c9809..f2aff97348bc 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1222,6 +1222,16 @@ static void ceph_kill_sb(struct super_block *s) ceph_mdsc_pre_umount(fsc->mdsc); flush_fs_workqueues(fsc); + /* + * Though the kill_anon_super() will finally trigger the + * sync_filesystem() anyway, we still need to do it here + * and then bump the stage of shutdown to stop the work + * queue as earlier as possible. + */ + sync_filesystem(s); + + fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; + kill_anon_super(s); fsc->client->extra_mon_dispatch = NULL;