Revert "ext4: properly sync file size update after O_SYNC direct IO"
This reverts commit dde4c1e166
which is
commit 91562895f8030cb9a0470b1db49de79346a69f91 upstream.
It breaks the Android kernel tests, and can be brought back in the future
if it is really needed.
Bug: 161946584
Change-Id: Ia4acc78370c913f2c3691a68148db000f3034eaf
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
parent
f46870ab3d
commit
ce691439c0
157
fs/ext4/file.c
157
fs/ext4/file.c
@ -282,38 +282,80 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
|
||||
}
|
||||
|
||||
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
|
||||
ssize_t count)
|
||||
ssize_t written, size_t count)
|
||||
{
|
||||
handle_t *handle;
|
||||
bool truncate = false;
|
||||
u8 blkbits = inode->i_blkbits;
|
||||
ext4_lblk_t written_blk, end_blk;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held_write(&inode->i_rwsem);
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
/*
|
||||
* Note that EXT4_I(inode)->i_disksize can get extended up to
|
||||
* inode->i_size while the I/O was running due to writeback of delalloc
|
||||
* blocks. But, the code in ext4_iomap_alloc() is careful to use
|
||||
* zeroed/unwritten extents if this is possible; thus we won't leave
|
||||
* uninitialized blocks in a file even if we didn't succeed in writing
|
||||
* as much as we intended.
|
||||
*/
|
||||
WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
|
||||
if (offset + count <= EXT4_I(inode)->i_disksize) {
|
||||
/*
|
||||
* We need to ensure that the inode is removed from the orphan
|
||||
* list if it has been added prematurely, due to writeback of
|
||||
* delalloc blocks.
|
||||
*/
|
||||
if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
|
||||
if (ext4_update_inode_size(inode, offset + count)) {
|
||||
int ret = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(ret)) {
|
||||
if (IS_ERR(handle)) {
|
||||
ext4_orphan_del(NULL, inode);
|
||||
return PTR_ERR(handle);
|
||||
}
|
||||
|
||||
ext4_orphan_del(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
if (written < 0)
|
||||
goto truncate;
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
written = PTR_ERR(handle);
|
||||
goto truncate;
|
||||
}
|
||||
|
||||
if (ext4_update_inode_size(inode, offset + written)) {
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(ret)) {
|
||||
written = ret;
|
||||
ext4_journal_stop(handle);
|
||||
goto truncate;
|
||||
}
|
||||
}
|
||||
|
||||
if (inode->i_nlink)
|
||||
/*
|
||||
* We may need to truncate allocated but not written blocks beyond EOF.
|
||||
*/
|
||||
written_blk = ALIGN(offset + written, 1 << blkbits);
|
||||
end_blk = ALIGN(offset + count, 1 << blkbits);
|
||||
if (written_blk < end_blk && ext4_can_truncate(inode))
|
||||
truncate = true;
|
||||
|
||||
/*
|
||||
* Remove the inode from the orphan list if it has been extended and
|
||||
* everything went OK.
|
||||
*/
|
||||
if (!truncate && inode->i_nlink)
|
||||
ext4_orphan_del(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up the inode after DIO or DAX extending write has completed and the
|
||||
* inode size has been updated using ext4_handle_inode_extension().
|
||||
*/
|
||||
static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count)
|
||||
{
|
||||
lockdep_assert_held_write(&inode->i_rwsem);
|
||||
if (count < 0) {
|
||||
if (truncate) {
|
||||
truncate:
|
||||
ext4_truncate_failed_write(inode);
|
||||
/*
|
||||
* If the truncate operation failed early, then the inode may
|
||||
@ -322,28 +364,9 @@ static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count)
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* If i_disksize got extended due to writeback of delalloc blocks while
|
||||
* the DIO was running we could fail to cleanup the orphan list in
|
||||
* ext4_handle_inode_extension(). Do it now.
|
||||
*/
|
||||
if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
|
||||
handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
|
||||
if (IS_ERR(handle)) {
|
||||
/*
|
||||
* The write has successfully completed. Not much to
|
||||
* do with the error here so just cleanup the orphan
|
||||
* list and hope for the best.
|
||||
*/
|
||||
ext4_orphan_del(NULL, inode);
|
||||
return;
|
||||
}
|
||||
ext4_orphan_del(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
}
|
||||
return written;
|
||||
}
|
||||
|
||||
static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
|
||||
@ -352,22 +375,31 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
|
||||
loff_t pos = iocb->ki_pos;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
if (!error && size && flags & IOMAP_DIO_UNWRITTEN)
|
||||
error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (size && flags & IOMAP_DIO_UNWRITTEN) {
|
||||
error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
|
||||
if (error < 0)
|
||||
return error;
|
||||
}
|
||||
/*
|
||||
* Note that EXT4_I(inode)->i_disksize can get extended up to
|
||||
* inode->i_size while the I/O was running due to writeback of delalloc
|
||||
* blocks. But the code in ext4_iomap_alloc() is careful to use
|
||||
* zeroed/unwritten extents if this is possible; thus we won't leave
|
||||
* uninitialized blocks in a file even if we didn't succeed in writing
|
||||
* as much as we intended.
|
||||
* If we are extending the file, we have to update i_size here before
|
||||
* page cache gets invalidated in iomap_dio_rw(). Otherwise racing
|
||||
* buffered reads could zero out too much from page cache pages. Update
|
||||
* of on-disk size will happen later in ext4_dio_write_iter() where
|
||||
* we have enough information to also perform orphan list handling etc.
|
||||
* Note that we perform all extending writes synchronously under
|
||||
* i_rwsem held exclusively so i_size update is safe here in that case.
|
||||
* If the write was not extending, we cannot see pos > i_size here
|
||||
* because operations reducing i_size like truncate wait for all
|
||||
* outstanding DIO before updating i_size.
|
||||
*/
|
||||
WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize));
|
||||
if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize))
|
||||
return size;
|
||||
return ext4_handle_inode_extension(inode, pos, size);
|
||||
pos += size;
|
||||
if (pos > i_size_read(inode))
|
||||
i_size_write(inode, pos);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct iomap_dio_ops ext4_dio_write_ops = {
|
||||
@ -542,16 +574,9 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
is_sync_kiocb(iocb) || unaligned_io || extend);
|
||||
if (ret == -ENOTBLK)
|
||||
ret = 0;
|
||||
if (extend) {
|
||||
/*
|
||||
* We always perform extending DIO write synchronously so by
|
||||
* now the IO is completed and ext4_handle_inode_extension()
|
||||
* was called. Cleanup the inode in case of error or race with
|
||||
* writeback of delalloc blocks.
|
||||
*/
|
||||
WARN_ON_ONCE(ret == -EIOCBQUEUED);
|
||||
ext4_inode_extension_cleanup(inode, ret);
|
||||
}
|
||||
|
||||
if (extend)
|
||||
ret = ext4_handle_inode_extension(inode, offset, ret, count);
|
||||
|
||||
out:
|
||||
if (ilock_shared)
|
||||
@ -632,10 +657,8 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
|
||||
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
|
||||
|
||||
if (extend) {
|
||||
ret = ext4_handle_inode_extension(inode, offset, ret);
|
||||
ext4_inode_extension_cleanup(inode, ret);
|
||||
}
|
||||
if (extend)
|
||||
ret = ext4_handle_inode_extension(inode, offset, ret, count);
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
if (ret > 0)
|
||||
|
Loading…
Reference in New Issue
Block a user