BACKPORT: erofs: fix infinite loop due to a race of filling compressed_bvecs
I encountered a race issue after lengthy (~594647 sec) stress tests on a 64k-page arm64 VM with several 4k-block EROFS images. The timing is like below: z_erofs_try_inplace_io z_erofs_fill_bio_vec cmpxchg(&compressed_bvecs[].page, NULL, ..) [access bufvec] compressed_bvecs[] = *bvec; Previously, z_erofs_submit_queue() just accessed bufvec->page only, so other fields in bufvec didn't matter. After the subpage block support is landed, .offset and .end can be used too, but filling bufvec isn't an atomic operation which can cause inconsistency. Let's use a spinlock to keep the atomicity of each bufvec. Fixes: 192351616a9d ("erofs: support I/O submission for sub-page compressed blocks") Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Sandeep Dhavale <dhavale@google.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Link: https://lore.kernel.org/r/20240125120039.3228103-1-hsiangkao@linux.alibaba.com Bug: 324640522 (cherry picked from commit cc4b2dd95f0d1eba8c691b36e8f4d1795582f1ff https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ master) [dhavale: introduced spinlock in struct erofs_workgroup, upstream has a change where atomic is replaced by lockref but pulling that and related changes will cause unnecessary churn. Adding spinlock keeps the spirit of the change in-tact by fixing the race. Also updated commit message as we are not using lockref.] Change-Id: Id20a0a433277ab71d46bce48c81824564d1b391d Signed-off-by: Sandeep Dhavale <dhavale@google.com>
This commit is contained in:
parent
886c9d1fc2
commit
8481b97df5
@ -208,6 +208,7 @@ struct erofs_workgroup {
|
||||
|
||||
/* overall workgroup reference count */
|
||||
atomic_t refcount;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
|
||||
|
@ -575,21 +575,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
|
||||
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
|
||||
unsigned int i;
|
||||
|
||||
if (i_blocksize(fe->inode) != PAGE_SIZE)
|
||||
return;
|
||||
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
|
||||
if (i_blocksize(fe->inode) != PAGE_SIZE ||
|
||||
fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
|
||||
return;
|
||||
|
||||
for (i = 0; i < pclusterpages; ++i) {
|
||||
struct page *page, *newpage;
|
||||
void *t; /* mark pages just found for debugging */
|
||||
|
||||
/* the compressed page was loaded before */
|
||||
/* Inaccurate check w/o locking to avoid unneeded lookups */
|
||||
if (READ_ONCE(pcl->compressed_bvecs[i].page))
|
||||
continue;
|
||||
|
||||
page = find_get_page(mc, pcl->obj.index + i);
|
||||
|
||||
if (page) {
|
||||
t = (void *)((unsigned long)page | 1);
|
||||
newpage = NULL;
|
||||
@ -609,9 +607,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
|
||||
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
|
||||
t = (void *)((unsigned long)newpage | 1);
|
||||
}
|
||||
|
||||
if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t))
|
||||
spin_lock(&pcl->obj.lock);
|
||||
if (!pcl->compressed_bvecs[i].page) {
|
||||
pcl->compressed_bvecs[i].page = t;
|
||||
spin_unlock(&pcl->obj.lock);
|
||||
continue;
|
||||
}
|
||||
spin_unlock(&pcl->obj.lock);
|
||||
|
||||
if (page)
|
||||
put_page(page);
|
||||
@ -729,31 +731,25 @@ int erofs_init_managed_cache(struct super_block *sb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
|
||||
struct z_erofs_bvec *bvec)
|
||||
{
|
||||
struct z_erofs_pcluster *const pcl = fe->pcl;
|
||||
|
||||
while (fe->icur > 0) {
|
||||
if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
|
||||
NULL, bvec->page)) {
|
||||
pcl->compressed_bvecs[fe->icur] = *bvec;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* callers must be with pcluster lock held */
|
||||
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
|
||||
struct z_erofs_bvec *bvec, bool exclusive)
|
||||
{
|
||||
struct z_erofs_pcluster *pcl = fe->pcl;
|
||||
int ret;
|
||||
|
||||
if (exclusive) {
|
||||
/* give priority for inplaceio to use file pages first */
|
||||
if (z_erofs_try_inplace_io(fe, bvec))
|
||||
spin_lock(&pcl->obj.lock);
|
||||
while (fe->icur > 0) {
|
||||
if (pcl->compressed_bvecs[--fe->icur].page)
|
||||
continue;
|
||||
pcl->compressed_bvecs[fe->icur] = *bvec;
|
||||
spin_unlock(&pcl->obj.lock);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&pcl->obj.lock);
|
||||
|
||||
/* otherwise, check if it can be used as a bvpage */
|
||||
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
|
||||
!fe->candidate_bvpage)
|
||||
@ -803,6 +799,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
|
||||
if (IS_ERR(pcl))
|
||||
return PTR_ERR(pcl);
|
||||
|
||||
spin_lock_init(&pcl->obj.lock);
|
||||
atomic_set(&pcl->obj.refcount, 1);
|
||||
pcl->algorithmformat = map->m_algorithmformat;
|
||||
pcl->length = 0;
|
||||
@ -1450,23 +1447,26 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
||||
{
|
||||
gfp_t gfp = mapping_gfp_mask(mc);
|
||||
bool tocache = false;
|
||||
struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr;
|
||||
struct z_erofs_bvec zbv;
|
||||
struct address_space *mapping;
|
||||
struct page *page, *oldpage;
|
||||
struct page *page;
|
||||
int justfound, bs = i_blocksize(f->inode);
|
||||
|
||||
/* Except for inplace pages, the entire page can be used for I/Os */
|
||||
bvec->bv_offset = 0;
|
||||
bvec->bv_len = PAGE_SIZE;
|
||||
repeat:
|
||||
oldpage = READ_ONCE(zbv->page);
|
||||
if (!oldpage)
|
||||
spin_lock(&pcl->obj.lock);
|
||||
zbv = pcl->compressed_bvecs[nr];
|
||||
page = zbv.page;
|
||||
justfound = (unsigned long)page & 1UL;
|
||||
page = (struct page *)((unsigned long)page & ~1UL);
|
||||
pcl->compressed_bvecs[nr].page = page;
|
||||
spin_unlock(&pcl->obj.lock);
|
||||
if (!page)
|
||||
goto out_allocpage;
|
||||
|
||||
justfound = (unsigned long)oldpage & 1UL;
|
||||
page = (struct page *)((unsigned long)oldpage & ~1UL);
|
||||
bvec->bv_page = page;
|
||||
|
||||
DBG_BUGON(z_erofs_is_shortlived_page(page));
|
||||
/*
|
||||
* Handle preallocated cached pages. We tried to allocate such pages
|
||||
@ -1475,7 +1475,6 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
||||
*/
|
||||
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
|
||||
set_page_private(page, 0);
|
||||
WRITE_ONCE(zbv->page, page);
|
||||
tocache = true;
|
||||
goto out_tocache;
|
||||
}
|
||||
@ -1486,9 +1485,9 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
||||
* therefore it is impossible for `mapping` to be NULL.
|
||||
*/
|
||||
if (mapping && mapping != mc) {
|
||||
if (zbv->offset < 0)
|
||||
bvec->bv_offset = round_up(-zbv->offset, bs);
|
||||
bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset;
|
||||
if (zbv.offset < 0)
|
||||
bvec->bv_offset = round_up(-zbv.offset, bs);
|
||||
bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1498,7 +1497,6 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
||||
|
||||
/* the cached page is still in managed cache */
|
||||
if (page->mapping == mc) {
|
||||
WRITE_ONCE(zbv->page, page);
|
||||
/*
|
||||
* The cached page is still available but without a valid
|
||||
* `->private` pcluster hint. Let's reconnect them.
|
||||
@ -1530,11 +1528,15 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
||||
put_page(page);
|
||||
out_allocpage:
|
||||
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
|
||||
if (oldpage != cmpxchg(&zbv->page, oldpage, page)) {
|
||||
spin_lock(&pcl->obj.lock);
|
||||
if (pcl->compressed_bvecs[nr].page) {
|
||||
erofs_pagepool_add(&f->pagepool, page);
|
||||
spin_unlock(&pcl->obj.lock);
|
||||
cond_resched();
|
||||
goto repeat;
|
||||
}
|
||||
pcl->compressed_bvecs[nr].page = page;
|
||||
spin_unlock(&pcl->obj.lock);
|
||||
bvec->bv_page = page;
|
||||
out_tocache:
|
||||
if (!tocache || bs != PAGE_SIZE ||
|
||||
@ -1712,6 +1714,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
|
||||
|
||||
if (cur + bvec.bv_len > end)
|
||||
bvec.bv_len = end - cur;
|
||||
DBG_BUGON(bvec.bv_len < sb->s_blocksize);
|
||||
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
|
||||
bvec.bv_offset))
|
||||
goto submit_bio_retry;
|
||||
|
Loading…
Reference in New Issue
Block a user