btrfs: scrub: try to fix super block errors
[ Upstream commit f9eab5f0bba76742af654f33d517bf62a0db8f12 ] [BUG] The following script shows that, although scrub can detect super block errors, it never tries to fix it: mkfs.btrfs -f -d raid1 -m raid1 $dev1 $dev2 xfs_io -c "pwrite 67108864 4k" $dev2 mount $dev1 $mnt btrfs scrub start -B $dev2 btrfs scrub start -Br $dev2 umount $mnt The first scrub reports the super error correctly: scrub done for f3289218-abd3-41ac-a630-202f766c0859 Scrub started: Tue Aug 2 14:44:11 2022 Status: finished Duration: 0:00:00 Total to scrub: 1.26GiB Rate: 0.00B/s Error summary: super=1 Corrected: 0 Uncorrectable: 0 Unverified: 0 But the second read-only scrub still reports the same super error: Scrub started: Tue Aug 2 14:44:11 2022 Status: finished Duration: 0:00:00 Total to scrub: 1.26GiB Rate: 0.00B/s Error summary: super=1 Corrected: 0 Uncorrectable: 0 Unverified: 0 [CAUSE] The comments already shows that super block can be easily fixed by committing a transaction: /* * If we find an error in a super block, we just report it. * They will get written with the next transaction commit * anyway */ But the truth is, such assumption is not always true, and since scrub should try to repair every error it found (except for read-only scrub), we should really actively commit a transaction to fix this. [FIX] Just commit a transaction if we found any super block errors, after everything else is done. We cannot do this just after scrub_supers(), as btrfs_commit_transaction() will try to pause and wait for the running scrub, thus we can not call it with scrub_lock hold. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
8054f824a7
commit
715fe15785
@ -3849,6 +3849,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
int ret;
|
||||
struct btrfs_device *dev;
|
||||
unsigned int nofs_flag;
|
||||
bool need_commit = false;
|
||||
|
||||
if (btrfs_fs_closing(fs_info))
|
||||
return -EAGAIN;
|
||||
@ -3961,6 +3962,12 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
if (!is_dev_replace) {
|
||||
u64 old_super_errors;
|
||||
|
||||
spin_lock(&sctx->stat_lock);
|
||||
old_super_errors = sctx->stat.super_errors;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
|
||||
btrfs_info(fs_info, "scrub: started on devid %llu", devid);
|
||||
/*
|
||||
* by holding device list mutex, we can
|
||||
@ -3969,6 +3976,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
ret = scrub_supers(sctx, dev);
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
|
||||
spin_lock(&sctx->stat_lock);
|
||||
/*
|
||||
* Super block errors found, but we can not commit transaction
|
||||
* at current context, since btrfs_commit_transaction() needs
|
||||
* to pause the current running scrub (hold by ourselves).
|
||||
*/
|
||||
if (sctx->stat.super_errors > old_super_errors && !sctx->readonly)
|
||||
need_commit = true;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
@ -3995,6 +4012,25 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
scrub_workers_put(fs_info);
|
||||
scrub_put_ctx(sctx);
|
||||
|
||||
/*
|
||||
* We found some super block errors before, now try to force a
|
||||
* transaction commit, as scrub has finished.
|
||||
*/
|
||||
if (need_commit) {
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
trans = btrfs_start_transaction(fs_info->tree_root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
btrfs_err(fs_info,
|
||||
"scrub: failed to start transaction to fix super block errors: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret < 0)
|
||||
btrfs_err(fs_info,
|
||||
"scrub: failed to commit transaction to fix super block errors: %d", ret);
|
||||
}
|
||||
return ret;
|
||||
out:
|
||||
scrub_workers_put(fs_info);
|
||||
|
Loading…
Reference in New Issue
Block a user