ceph: fix splice read for no Fc capability case
When iov_iter type is ITER_PIPE, copy_page_to_iter() increases the page's reference and add the page to a pipe_buffer. It also set the pipe_buffer's ops to page_cache_pipe_buf_ops. The comfirm callback in page_cache_pipe_buf_ops expects the page is from page cache and uptodate, otherwise it return error. For ceph_sync_read() case, pages are not from page cache. So we can't call copy_page_to_iter() when iov_iter type is ITER_PIPE. The fix is using iov_iter_get_pages_alloc() to allocate pages for the pipe. (the code is similar to default_file_splice_read) Signed-off-by: Yan, Zheng <zyan@redhat.com>
This commit is contained in:
parent
2b1ac852eb
commit
7ce469a53e
118
fs/ceph/file.c
118
fs/ceph/file.c
@ -458,71 +458,60 @@ enum {
|
|||||||
* only return a short read to the caller if we hit EOF.
|
* only return a short read to the caller if we hit EOF.
|
||||||
*/
|
*/
|
||||||
static int striped_read(struct inode *inode,
|
static int striped_read(struct inode *inode,
|
||||||
u64 off, u64 len,
|
u64 pos, u64 len,
|
||||||
struct page **pages, int num_pages,
|
struct page **pages, int num_pages,
|
||||||
int *checkeof)
|
int page_align, int *checkeof)
|
||||||
{
|
{
|
||||||
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
|
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
|
||||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||||
u64 pos, this_len, left;
|
u64 this_len;
|
||||||
loff_t i_size;
|
loff_t i_size;
|
||||||
int page_align, pages_left;
|
int page_idx;
|
||||||
int read, ret;
|
int ret, read = 0;
|
||||||
struct page **page_pos;
|
|
||||||
bool hit_stripe, was_short;
|
bool hit_stripe, was_short;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we may need to do multiple reads. not atomic, unfortunately.
|
* we may need to do multiple reads. not atomic, unfortunately.
|
||||||
*/
|
*/
|
||||||
pos = off;
|
|
||||||
left = len;
|
|
||||||
page_pos = pages;
|
|
||||||
pages_left = num_pages;
|
|
||||||
read = 0;
|
|
||||||
|
|
||||||
more:
|
more:
|
||||||
page_align = pos & ~PAGE_MASK;
|
this_len = len;
|
||||||
this_len = left;
|
page_idx = (page_align + read) >> PAGE_SHIFT;
|
||||||
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
|
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
|
||||||
&ci->i_layout, pos, &this_len,
|
&ci->i_layout, pos, &this_len,
|
||||||
ci->i_truncate_seq,
|
ci->i_truncate_seq, ci->i_truncate_size,
|
||||||
ci->i_truncate_size,
|
pages + page_idx, num_pages - page_idx,
|
||||||
page_pos, pages_left, page_align);
|
((page_align + read) & ~PAGE_MASK));
|
||||||
if (ret == -ENOENT)
|
if (ret == -ENOENT)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
hit_stripe = this_len < left;
|
hit_stripe = this_len < len;
|
||||||
was_short = ret >= 0 && ret < this_len;
|
was_short = ret >= 0 && ret < this_len;
|
||||||
dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
|
dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, len, read,
|
||||||
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
|
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
|
||||||
|
|
||||||
i_size = i_size_read(inode);
|
i_size = i_size_read(inode);
|
||||||
if (ret >= 0) {
|
if (ret >= 0) {
|
||||||
int didpages;
|
|
||||||
if (was_short && (pos + ret < i_size)) {
|
if (was_short && (pos + ret < i_size)) {
|
||||||
int zlen = min(this_len - ret, i_size - pos - ret);
|
int zlen = min(this_len - ret, i_size - pos - ret);
|
||||||
int zoff = (off & ~PAGE_MASK) + read + ret;
|
int zoff = page_align + read + ret;
|
||||||
dout(" zero gap %llu to %llu\n",
|
dout(" zero gap %llu to %llu\n",
|
||||||
pos + ret, pos + ret + zlen);
|
pos + ret, pos + ret + zlen);
|
||||||
ceph_zero_page_vector_range(zoff, zlen, pages);
|
ceph_zero_page_vector_range(zoff, zlen, pages);
|
||||||
ret += zlen;
|
ret += zlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
didpages = (page_align + ret) >> PAGE_SHIFT;
|
read += ret;
|
||||||
pos += ret;
|
pos += ret;
|
||||||
read = pos - off;
|
len -= ret;
|
||||||
left -= ret;
|
|
||||||
page_pos += didpages;
|
|
||||||
pages_left -= didpages;
|
|
||||||
|
|
||||||
/* hit stripe and need continue*/
|
/* hit stripe and need continue*/
|
||||||
if (left && hit_stripe && pos < i_size)
|
if (len && hit_stripe && pos < i_size)
|
||||||
goto more;
|
goto more;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (read > 0) {
|
if (read > 0) {
|
||||||
ret = read;
|
ret = read;
|
||||||
/* did we bounce off eof? */
|
/* did we bounce off eof? */
|
||||||
if (pos + left > i_size)
|
if (pos + len > i_size)
|
||||||
*checkeof = CHECK_EOF;
|
*checkeof = CHECK_EOF;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -536,15 +525,16 @@ static int striped_read(struct inode *inode,
|
|||||||
*
|
*
|
||||||
* If the read spans object boundary, just do multiple reads.
|
* If the read spans object boundary, just do multiple reads.
|
||||||
*/
|
*/
|
||||||
static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
|
static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
|
||||||
int *checkeof)
|
int *checkeof)
|
||||||
{
|
{
|
||||||
struct file *file = iocb->ki_filp;
|
struct file *file = iocb->ki_filp;
|
||||||
struct inode *inode = file_inode(file);
|
struct inode *inode = file_inode(file);
|
||||||
struct page **pages;
|
struct page **pages;
|
||||||
u64 off = iocb->ki_pos;
|
u64 off = iocb->ki_pos;
|
||||||
int num_pages, ret;
|
int num_pages;
|
||||||
size_t len = iov_iter_count(i);
|
ssize_t ret;
|
||||||
|
size_t len = iov_iter_count(to);
|
||||||
|
|
||||||
dout("sync_read on file %p %llu~%u %s\n", file, off,
|
dout("sync_read on file %p %llu~%u %s\n", file, off,
|
||||||
(unsigned)len,
|
(unsigned)len,
|
||||||
@ -563,35 +553,56 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
|
|||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
num_pages = calc_pages_for(off, len);
|
if (unlikely(to->type & ITER_PIPE)) {
|
||||||
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
|
size_t page_off;
|
||||||
if (IS_ERR(pages))
|
ret = iov_iter_get_pages_alloc(to, &pages, len,
|
||||||
return PTR_ERR(pages);
|
&page_off);
|
||||||
ret = striped_read(inode, off, len, pages,
|
if (ret <= 0)
|
||||||
num_pages, checkeof);
|
return -ENOMEM;
|
||||||
if (ret > 0) {
|
num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE);
|
||||||
int l, k = 0;
|
|
||||||
size_t left = ret;
|
|
||||||
|
|
||||||
while (left) {
|
ret = striped_read(inode, off, ret, pages, num_pages,
|
||||||
size_t page_off = off & ~PAGE_MASK;
|
page_off, checkeof);
|
||||||
size_t copy = min_t(size_t, left,
|
if (ret > 0) {
|
||||||
PAGE_SIZE - page_off);
|
iov_iter_advance(to, ret);
|
||||||
l = copy_page_to_iter(pages[k++], page_off, copy, i);
|
off += ret;
|
||||||
off += l;
|
} else {
|
||||||
left -= l;
|
iov_iter_advance(to, 0);
|
||||||
if (l < copy)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
ceph_put_page_vector(pages, num_pages, false);
|
||||||
|
} else {
|
||||||
|
num_pages = calc_pages_for(off, len);
|
||||||
|
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
|
||||||
|
if (IS_ERR(pages))
|
||||||
|
return PTR_ERR(pages);
|
||||||
|
|
||||||
|
ret = striped_read(inode, off, len, pages, num_pages,
|
||||||
|
(off & ~PAGE_MASK), checkeof);
|
||||||
|
if (ret > 0) {
|
||||||
|
int l, k = 0;
|
||||||
|
size_t left = ret;
|
||||||
|
|
||||||
|
while (left) {
|
||||||
|
size_t page_off = off & ~PAGE_MASK;
|
||||||
|
size_t copy = min_t(size_t, left,
|
||||||
|
PAGE_SIZE - page_off);
|
||||||
|
l = copy_page_to_iter(pages[k++], page_off,
|
||||||
|
copy, to);
|
||||||
|
off += l;
|
||||||
|
left -= l;
|
||||||
|
if (l < copy)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ceph_release_page_vector(pages, num_pages);
|
||||||
}
|
}
|
||||||
ceph_release_page_vector(pages, num_pages);
|
|
||||||
|
|
||||||
if (off > iocb->ki_pos) {
|
if (off > iocb->ki_pos) {
|
||||||
ret = off - iocb->ki_pos;
|
ret = off - iocb->ki_pos;
|
||||||
iocb->ki_pos = off;
|
iocb->ki_pos = off;
|
||||||
}
|
}
|
||||||
|
|
||||||
dout("sync_read result %d\n", ret);
|
dout("sync_read result %zd\n", ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1771,6 +1782,7 @@ const struct file_operations ceph_file_fops = {
|
|||||||
.fsync = ceph_fsync,
|
.fsync = ceph_fsync,
|
||||||
.lock = ceph_lock,
|
.lock = ceph_lock,
|
||||||
.flock = ceph_flock,
|
.flock = ceph_flock,
|
||||||
|
.splice_read = generic_file_splice_read,
|
||||||
.splice_write = iter_file_splice_write,
|
.splice_write = iter_file_splice_write,
|
||||||
.unlocked_ioctl = ceph_ioctl,
|
.unlocked_ioctl = ceph_ioctl,
|
||||||
.compat_ioctl = ceph_ioctl,
|
.compat_ioctl = ceph_ioctl,
|
||||||
|
Loading…
Reference in New Issue
Block a user