Changes in 5.10.225 fuse: Initialize beyond-EOF page contents before setting uptodate ALSA: usb-audio: Support Yamaha P-125 quirk entry xhci: Fix Panther point NULL pointer deref at full-speed re-enumeration thunderbolt: Mark XDomain as unplugged when router is removed s390/dasd: fix error recovery leading to data corruption on ESE devices arm64: ACPI: NUMA: initialize all values of acpi_early_node_map to NUMA_NO_NODE dm resume: don't return EINVAL when signalled dm persistent data: fix memory allocation failure vfs: Don't evict inode under the inode lru traversing context bitmap: introduce generic optimized bitmap_size() fix bitmap corruption on close_range() with CLOSE_RANGE_UNSHARE selinux: fix potential counting error in avc_add_xperms_decision() btrfs: tree-checker: add dev extent item checks drm/amdgpu: Actually check flags for all context ops. memcg_write_event_control(): fix a user-triggerable oops drm/amdgpu/jpeg2: properly set atomics vmid field s390/cio: rename bitmap_size() -> idset_bitmap_size() btrfs: rename bitmap_set_bits() -> btrfs_bitmap_set_bits() s390/uv: Panic for set and remove shared access UVC errors net/mlx5e: Correctly report errors for ethtool rx flows atm: idt77252: prevent use after free in dequeue_rx() net: axienet: Fix register defines comment description net: dsa: vsc73xx: pass value in phy_write operation net: dsa: vsc73xx: use read_poll_timeout instead delay loop net: dsa: vsc73xx: check busy flag in MDIO operations mptcp: correct MPTCP_SUBFLOW_ATTR_SSN_OFFSET reserved size netfilter: flowtable: initialise extack before use net: hns3: fix wrong use of semaphore up net: hns3: fix a deadlock problem when config TC during resetting ALSA: hda/realtek: Fix noise from speakers on Lenovo IdeaPad 3 15IAU7 ssb: Fix division by zero issue in ssb_calc_clock_rate wifi: mac80211: fix BA session teardown race wifi: cw1200: Avoid processing an invalid TIM IE i2c: riic: avoid potential division by zero RDMA/rtrs: Fix the problem of variable not initialized fully s390/smp,mcck: fix early IPI handling media: radio-isa: use dev_name to fill in bus_info staging: iio: resolver: ad2s1210: fix use before initialization drm/amd/display: Validate hw_points_num before using it staging: ks7010: disable bh on tx_dev_lock binfmt_misc: cleanup on filesystem umount media: qcom: venus: fix incorrect return value scsi: spi: Fix sshdr use gfs2: setattr_chown: Add missing initialization wifi: iwlwifi: abort scan when rfkill on but device enabled IB/hfi1: Fix potential deadlock on &irq_src_lock and &dd->uctxt_lock powerpc/xics: Check return value of kasprintf in icp_native_map_one_cpu nvmet-trace: avoid dereferencing pointer too early ext4: do not trim the group with corrupted block bitmap quota: Remove BUG_ON from dqget() media: pci: cx23885: check cx23885_vdev_init() return fs: binfmt_elf_efpic: don't use missing interpreter's properties scsi: lpfc: Initialize status local variable in lpfc_sli4_repost_sgl_list() net/sun3_82586: Avoid reading past buffer in debug output drm/lima: set gp bus_stop bit before hard reset virtiofs: forbid newlines in tags netlink: hold nlk->cb_mutex longer in __netlink_dump_start() md: clean up invalid BUG_ON in md_ioctl x86: Increase brk randomness entropy for 64-bit systems memory: stm32-fmc2-ebi: check regmap_read return value parisc: Use irq_enter_rcu() to fix warning at kernel/context_tracking.c:367 powerpc/boot: Handle allocation failure in simple_realloc() powerpc/boot: Only free if realloc() succeeds btrfs: change BUG_ON to assertion when checking for delayed_node root btrfs: handle invalid root reference found in may_destroy_subvol() btrfs: send: handle unexpected data in header buffer in begin_cmd() btrfs: delete pointless BUG_ON check on quota root in btrfs_qgroup_account_extent() f2fs: fix to do sanity check in update_sit_entry usb: gadget: fsl: Increase size of name buffer for endpoints Bluetooth: bnep: Fix out-of-bound access net: hns3: add checking for vf id of mailbox nvmet-tcp: do not continue for invalid icreq NFS: avoid infinite loop in pnfs_update_layout. openrisc: Call setup_memory() earlier in the init sequence s390/iucv: fix receive buffer virtual vs physical address confusion usb: dwc3: core: Skip setting event buffers for host only controllers irqchip/gic-v3-its: Remove BUG_ON in its_vpe_irq_domain_alloc ext4: set the type of max_zeroout to unsigned int to avoid overflow nvmet-rdma: fix possible bad dereference when freeing rsps hrtimer: Prevent queuing of hrtimer without a function callback gtp: pull network headers in gtp_dev_xmit() block: use "unsigned long" for blk_validate_block_size(). media: solo6x10: replace max(a, min(b, c)) by clamp(b, a, c) dm suspend: return -ERESTARTSYS instead of -EINTR Bluetooth: hci_core: Fix LE quote calculation Bluetooth: SMP: Fix assumption of Central always being Initiator tc-testing: don't access non-existent variable on exception kcm: Serialise kcm_sendmsg() for the same socket. netfilter: nft_counter: Synchronize nft_counter_reset() against reader. ip6_tunnel: Fix broken GRO bonding: fix bond_ipsec_offload_ok return type bonding: fix null pointer deref in bond_ipsec_offload_ok bonding: fix xfrm real_dev null pointer dereference bonding: fix xfrm state handling when clearing active slave ice: fix ICE_LAST_OFFSET formula net: dsa: mv88e6xxx: read FID when handling ATU violations net: dsa: mv88e6xxx: replace ATU violation prints with trace points net: dsa: mv88e6xxx: Fix out-of-bound access netem: fix return value if duplicate enqueue fails ipv6: prevent UAF in ip6_send_skb() net: xilinx: axienet: Always disable promiscuous mode net: xilinx: axienet: Fix dangling multicast addresses drm/msm/dpu: don't play tricks with debug macros drm/msm/dp: reset the link phy params before link training mmc: mmc_test: Fix NULL dereference on allocation failure Bluetooth: MGMT: Add error handling to pair_device() binfmt_misc: pass binfmt_misc flags to the interpreter MIPS: Loongson64: Set timer mode in cpu-probe HID: wacom: Defer calculation of resolution until resolution_code is known HID: microsoft: Add rumble support to latest xbox controllers cxgb4: add forgotten u64 ivlan cast before shift KVM: arm64: Make ICC_*SGI*_EL1 undef in the absence of a vGICv3 mmc: dw_mmc: allow biu and ciu clocks to defer Revert "drm/amd/display: Validate hw_points_num before using it" ALSA: timer: Relax start tick time check for slave timer elements nfsd: Don't call freezable_schedule_timeout() after each successful page allocation in svc_alloc_arg(). Bluetooth: hci_ldisc: check HCI_UART_PROTO_READY flag in HCIUARTGETPROTO Input: MT - limit max slots tools: move alignment-related macros to new <linux/align.h> drm/amdgpu: Using uninitialized value *size when calling amdgpu_vce_cs_reloc KVM: arm64: Don't use cbz/adr with external symbols pinctrl: rockchip: correct RK3328 iomux width flag for GPIO2-B pins pinctrl: single: fix potential NULL dereference in pcs_get_function() wifi: mwifiex: duplicate static structs used in driver instances mptcp: sched: check both backup in retrans ipc: replace costly bailout check in sysvipc_find_ipc() drm/amdkfd: don't allow mapping the MMIO HDP page with large pages media: uvcvideo: Fix integer overflow calculating timestamp Revert "Input: ioc3kbd - convert to platform remove callback returning void" ata: libata-core: Fix null pointer dereference on error cgroup/cpuset: Prevent UAF in proc_cpuset_show() net:rds: Fix possible deadlock in rds_message_put ovl: do not fail because of O_NOATIME soundwire: stream: fix programming slave ports for non-continous port maps dmaengine: dw: Add peripheral bus width verification dmaengine: dw: Add memory bus width verification ethtool: check device is present when getting link settings gtp: fix a potential NULL pointer dereference net: busy-poll: use ktime_get_ns() instead of local_clock() nfc: pn533: Add poll mod list filling check soc: qcom: cmd-db: Map shared memory as WC, not WB cdc-acm: Add DISABLE_ECHO quirk for GE HealthCare UI Controller USB: serial: option: add MeiG Smart SRM825L usb: dwc3: omap: add missing depopulate in probe error path usb: dwc3: core: Prevent USB core invalid event buffer address access usb: dwc3: st: fix probed platform device ref count on probe error path usb: dwc3: st: add missing depopulate in probe error path usb: core: sysfs: Unmerge @usb3_hardware_lpm_attr_group in remove_power_attributes() scsi: aacraid: Fix double-free on probe failure apparmor: fix policy_unpack_test on big endian systems Linux 5.10.225 Change-Id: I5028ef07db680262d45fba4096094fe8b19dd052 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
802 lines
18 KiB
C
802 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2017 Red Hat, Inc.
|
|
*/
|
|
|
|
#include <linux/cred.h>
|
|
#include <linux/file.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/xattr.h>
|
|
#include <linux/uio.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/splice.h>
|
|
#include <linux/security.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
#include "overlayfs.h"
|
|
|
|
#define OVL_IOCB_MASK (IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
|
|
|
|
struct ovl_aio_req {
|
|
struct kiocb iocb;
|
|
refcount_t ref;
|
|
struct kiocb *orig_iocb;
|
|
};
|
|
|
|
static struct kmem_cache *ovl_aio_request_cachep;
|
|
|
|
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
|
|
{
|
|
if (realinode != ovl_inode_upper(inode))
|
|
return 'l';
|
|
if (ovl_has_upperdata(inode))
|
|
return 'u';
|
|
else
|
|
return 'm';
|
|
}
|
|
|
|
/* No atime modificaton nor notify on underlying */
|
|
#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
|
|
|
|
static struct file *ovl_open_realfile(const struct file *file,
|
|
struct inode *realinode)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct file *realfile;
|
|
const struct cred *old_cred;
|
|
int flags = file->f_flags | OVL_OPEN_FLAGS;
|
|
int acc_mode = ACC_MODE(flags);
|
|
int err;
|
|
|
|
if (flags & O_APPEND)
|
|
acc_mode |= MAY_APPEND;
|
|
|
|
old_cred = ovl_override_creds(inode->i_sb);
|
|
err = inode_permission(realinode, MAY_OPEN | acc_mode);
|
|
if (err) {
|
|
realfile = ERR_PTR(err);
|
|
} else {
|
|
if (!inode_owner_or_capable(realinode))
|
|
flags &= ~O_NOATIME;
|
|
|
|
realfile = open_with_fake_path(&file->f_path, flags, realinode,
|
|
current_cred());
|
|
}
|
|
ovl_revert_creds(inode->i_sb, old_cred);
|
|
|
|
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
|
|
file, file, ovl_whatisit(inode, realinode), file->f_flags,
|
|
realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
|
|
|
|
return realfile;
|
|
}
|
|
|
|
#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
|
|
|
|
static int ovl_change_flags(struct file *file, unsigned int flags)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
int err;
|
|
|
|
flags &= OVL_SETFL_MASK;
|
|
|
|
if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
|
|
return -EPERM;
|
|
|
|
if (flags & O_DIRECT) {
|
|
if (!file->f_mapping->a_ops ||
|
|
!file->f_mapping->a_ops->direct_IO)
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (file->f_op->check_flags) {
|
|
err = file->f_op->check_flags(flags);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
spin_lock(&file->f_lock);
|
|
file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
|
|
spin_unlock(&file->f_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
|
|
bool allow_meta)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct inode *realinode;
|
|
|
|
real->flags = 0;
|
|
real->file = file->private_data;
|
|
|
|
if (allow_meta)
|
|
realinode = ovl_inode_real(inode);
|
|
else
|
|
realinode = ovl_inode_realdata(inode);
|
|
|
|
/* Has it been copied up since we'd opened it? */
|
|
if (unlikely(file_inode(real->file) != realinode)) {
|
|
real->flags = FDPUT_FPUT;
|
|
real->file = ovl_open_realfile(file, realinode);
|
|
|
|
return PTR_ERR_OR_ZERO(real->file);
|
|
}
|
|
|
|
/* Did the flags change since open? */
|
|
if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
|
|
return ovl_change_flags(real->file, file->f_flags);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ovl_real_fdget(const struct file *file, struct fd *real)
|
|
{
|
|
if (d_is_dir(file_dentry(file))) {
|
|
real->flags = 0;
|
|
real->file = ovl_dir_real_file(file, false);
|
|
|
|
return PTR_ERR_OR_ZERO(real->file);
|
|
}
|
|
|
|
return ovl_real_fdget_meta(file, real, false);
|
|
}
|
|
|
|
static int ovl_open(struct inode *inode, struct file *file)
|
|
{
|
|
struct file *realfile;
|
|
int err;
|
|
|
|
err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
|
|
if (err)
|
|
return err;
|
|
|
|
/* No longer need these flags, so don't pass them on to underlying fs */
|
|
file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
|
|
|
|
realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
|
|
if (IS_ERR(realfile))
|
|
return PTR_ERR(realfile);
|
|
|
|
file->private_data = realfile;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ovl_release(struct inode *inode, struct file *file)
|
|
{
|
|
fput(file->private_data);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct fd real;
|
|
const struct cred *old_cred;
|
|
loff_t ret;
|
|
|
|
/*
|
|
* The two special cases below do not need to involve real fs,
|
|
* so we can optimizing concurrent callers.
|
|
*/
|
|
if (offset == 0) {
|
|
if (whence == SEEK_CUR)
|
|
return file->f_pos;
|
|
|
|
if (whence == SEEK_SET)
|
|
return vfs_setpos(file, 0, 0);
|
|
}
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*
|
|
* Overlay file f_pos is the master copy that is preserved
|
|
* through copy up and modified on read/write, but only real
|
|
* fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
|
|
* limitations that are more strict than ->s_maxbytes for specific
|
|
* files, so we use the real file to perform seeks.
|
|
*/
|
|
ovl_inode_lock(inode);
|
|
real.file->f_pos = file->f_pos;
|
|
|
|
old_cred = ovl_override_creds(inode->i_sb);
|
|
ret = vfs_llseek(real.file, offset, whence);
|
|
ovl_revert_creds(inode->i_sb, old_cred);
|
|
|
|
file->f_pos = real.file->f_pos;
|
|
ovl_inode_unlock(inode);
|
|
|
|
fdput(real);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void ovl_file_accessed(struct file *file)
|
|
{
|
|
struct inode *inode, *upperinode;
|
|
|
|
if (file->f_flags & O_NOATIME)
|
|
return;
|
|
|
|
inode = file_inode(file);
|
|
upperinode = ovl_inode_upper(inode);
|
|
|
|
if (!upperinode)
|
|
return;
|
|
|
|
if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
|
|
!timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
|
|
inode->i_mtime = upperinode->i_mtime;
|
|
inode->i_ctime = upperinode->i_ctime;
|
|
}
|
|
|
|
touch_atime(&file->f_path);
|
|
}
|
|
|
|
static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
|
|
{
|
|
if (refcount_dec_and_test(&aio_req->ref)) {
|
|
fput(aio_req->iocb.ki_filp);
|
|
kmem_cache_free(ovl_aio_request_cachep, aio_req);
|
|
}
|
|
}
|
|
|
|
static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
|
|
{
|
|
struct kiocb *iocb = &aio_req->iocb;
|
|
struct kiocb *orig_iocb = aio_req->orig_iocb;
|
|
|
|
if (iocb->ki_flags & IOCB_WRITE) {
|
|
struct inode *inode = file_inode(orig_iocb->ki_filp);
|
|
|
|
/* Actually acquired in ovl_write_iter() */
|
|
__sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
|
|
SB_FREEZE_WRITE);
|
|
file_end_write(iocb->ki_filp);
|
|
ovl_copyattr(ovl_inode_real(inode), inode);
|
|
}
|
|
|
|
orig_iocb->ki_pos = iocb->ki_pos;
|
|
ovl_aio_put(aio_req);
|
|
}
|
|
|
|
static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
|
|
{
|
|
struct ovl_aio_req *aio_req = container_of(iocb,
|
|
struct ovl_aio_req, iocb);
|
|
struct kiocb *orig_iocb = aio_req->orig_iocb;
|
|
|
|
ovl_aio_cleanup_handler(aio_req);
|
|
orig_iocb->ki_complete(orig_iocb, res, res2);
|
|
}
|
|
|
|
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|
{
|
|
struct file *file = iocb->ki_filp;
|
|
struct fd real;
|
|
const struct cred *old_cred;
|
|
ssize_t ret;
|
|
|
|
if (!iov_iter_count(iter))
|
|
return 0;
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = -EINVAL;
|
|
if (iocb->ki_flags & IOCB_DIRECT &&
|
|
(!real.file->f_mapping->a_ops ||
|
|
!real.file->f_mapping->a_ops->direct_IO))
|
|
goto out_fdput;
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
if (is_sync_kiocb(iocb)) {
|
|
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
|
|
iocb_to_rw_flags(iocb->ki_flags,
|
|
OVL_IOCB_MASK));
|
|
} else {
|
|
struct ovl_aio_req *aio_req;
|
|
|
|
ret = -ENOMEM;
|
|
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
|
|
if (!aio_req)
|
|
goto out;
|
|
|
|
real.flags = 0;
|
|
aio_req->orig_iocb = iocb;
|
|
kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
|
|
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
|
|
refcount_set(&aio_req->ref, 2);
|
|
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
|
|
ovl_aio_put(aio_req);
|
|
if (ret != -EIOCBQUEUED)
|
|
ovl_aio_cleanup_handler(aio_req);
|
|
}
|
|
out:
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
|
|
|
ovl_file_accessed(file);
|
|
out_fdput:
|
|
fdput(real);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|
{
|
|
struct file *file = iocb->ki_filp;
|
|
struct inode *inode = file_inode(file);
|
|
struct fd real;
|
|
const struct cred *old_cred;
|
|
ssize_t ret;
|
|
int ifl = iocb->ki_flags;
|
|
|
|
if (!iov_iter_count(iter))
|
|
return 0;
|
|
|
|
inode_lock(inode);
|
|
/* Update mode */
|
|
ovl_copyattr(ovl_inode_real(inode), inode);
|
|
ret = file_remove_privs(file);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
ret = -EINVAL;
|
|
if (iocb->ki_flags & IOCB_DIRECT &&
|
|
(!real.file->f_mapping->a_ops ||
|
|
!real.file->f_mapping->a_ops->direct_IO))
|
|
goto out_fdput;
|
|
|
|
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
|
|
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
if (is_sync_kiocb(iocb)) {
|
|
file_start_write(real.file);
|
|
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
|
|
iocb_to_rw_flags(ifl, OVL_IOCB_MASK));
|
|
file_end_write(real.file);
|
|
/* Update size */
|
|
ovl_copyattr(ovl_inode_real(inode), inode);
|
|
} else {
|
|
struct ovl_aio_req *aio_req;
|
|
|
|
ret = -ENOMEM;
|
|
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
|
|
if (!aio_req)
|
|
goto out;
|
|
|
|
file_start_write(real.file);
|
|
/* Pacify lockdep, same trick as done in aio_write() */
|
|
__sb_writers_release(file_inode(real.file)->i_sb,
|
|
SB_FREEZE_WRITE);
|
|
real.flags = 0;
|
|
aio_req->orig_iocb = iocb;
|
|
kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
|
|
aio_req->iocb.ki_flags = ifl;
|
|
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
|
|
refcount_set(&aio_req->ref, 2);
|
|
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
|
|
ovl_aio_put(aio_req);
|
|
if (ret != -EIOCBQUEUED)
|
|
ovl_aio_cleanup_handler(aio_req);
|
|
}
|
|
out:
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
|
out_fdput:
|
|
fdput(real);
|
|
|
|
out_unlock:
|
|
inode_unlock(inode);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Calling iter_file_splice_write() directly from overlay's f_op may deadlock
|
|
* due to lock order inversion between pipe->mutex in iter_file_splice_write()
|
|
* and file_start_write(real.file) in ovl_write_iter().
|
|
*
|
|
* So do everything ovl_write_iter() does and call iter_file_splice_write() on
|
|
* the real file.
|
|
*/
|
|
static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
|
|
loff_t *ppos, size_t len, unsigned int flags)
|
|
{
|
|
struct fd real;
|
|
const struct cred *old_cred;
|
|
struct inode *inode = file_inode(out);
|
|
struct inode *realinode = ovl_inode_real(inode);
|
|
ssize_t ret;
|
|
|
|
inode_lock(inode);
|
|
/* Update mode */
|
|
ovl_copyattr(realinode, inode);
|
|
ret = file_remove_privs(out);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
ret = ovl_real_fdget(out, &real);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
old_cred = ovl_override_creds(inode->i_sb);
|
|
file_start_write(real.file);
|
|
|
|
ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
|
|
|
|
file_end_write(real.file);
|
|
/* Update size */
|
|
ovl_copyattr(realinode, inode);
|
|
ovl_revert_creds(inode->i_sb, old_cred);
|
|
fdput(real);
|
|
|
|
out_unlock:
|
|
inode_unlock(inode);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
|
{
|
|
struct fd real;
|
|
const struct cred *old_cred;
|
|
int ret;
|
|
|
|
ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
|
|
if (ret <= 0)
|
|
return ret;
|
|
|
|
ret = ovl_real_fdget_meta(file, &real, !datasync);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* Don't sync lower file for fear of receiving EROFS error */
|
|
if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
ret = vfs_fsync_range(real.file, start, end, datasync);
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
|
}
|
|
|
|
fdput(real);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
|
|
{
|
|
struct file *realfile = file->private_data;
|
|
const struct cred *old_cred;
|
|
int ret;
|
|
|
|
if (!realfile->f_op->mmap)
|
|
return -ENODEV;
|
|
|
|
if (WARN_ON(file != vma->vm_file))
|
|
return -EIO;
|
|
|
|
vma->vm_file = get_file(realfile);
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
ret = call_mmap(vma->vm_file, vma);
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
|
|
|
if (ret) {
|
|
/* Drop reference count from new vm_file value */
|
|
fput(realfile);
|
|
} else {
|
|
/* Drop reference count from previous vm_file value */
|
|
fput(file);
|
|
}
|
|
|
|
ovl_file_accessed(file);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
struct fd real;
|
|
const struct cred *old_cred;
|
|
int ret;
|
|
|
|
inode_lock(inode);
|
|
/* Update mode */
|
|
ovl_copyattr(ovl_inode_real(inode), inode);
|
|
ret = file_remove_privs(file);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
ret = vfs_fallocate(real.file, mode, offset, len);
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
|
|
|
/* Update size */
|
|
ovl_copyattr(ovl_inode_real(inode), inode);
|
|
|
|
fdput(real);
|
|
|
|
out_unlock:
|
|
inode_unlock(inode);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
|
|
{
|
|
struct fd real;
|
|
const struct cred *old_cred;
|
|
int ret;
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
if (ret)
|
|
return ret;
|
|
|
|
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
|
ret = vfs_fadvise(real.file, offset, len, advice);
|
|
ovl_revert_creds(file_inode(file)->i_sb, old_cred);
|
|
|
|
fdput(real);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static long ovl_real_ioctl(struct file *file, unsigned int cmd,
|
|
unsigned long arg)
|
|
{
|
|
struct fd real;
|
|
long ret;
|
|
|
|
ret = ovl_real_fdget(file, &real);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = security_file_ioctl(real.file, cmd, arg);
|
|
if (!ret) {
|
|
/*
|
|
* Don't override creds, since we currently can't safely check
|
|
* permissions before doing so.
|
|
*/
|
|
ret = vfs_ioctl(real.file, cmd, arg);
|
|
}
|
|
|
|
fdput(real);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
|
|
unsigned long arg)
|
|
{
|
|
long ret;
|
|
struct inode *inode = file_inode(file);
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EACCES;
|
|
|
|
ret = mnt_want_write_file(file);
|
|
if (ret)
|
|
return ret;
|
|
|
|
inode_lock(inode);
|
|
|
|
/*
|
|
* Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE
|
|
* capability.
|
|
*/
|
|
ret = -EPERM;
|
|
if (!ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) &&
|
|
!capable(CAP_LINUX_IMMUTABLE))
|
|
goto unlock;
|
|
|
|
ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
|
|
if (ret)
|
|
goto unlock;
|
|
|
|
ret = ovl_real_ioctl(file, cmd, arg);
|
|
|
|
ovl_copyflags(ovl_inode_real(inode), inode);
|
|
unlock:
|
|
inode_unlock(inode);
|
|
|
|
mnt_drop_write_file(file);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
long ret;
|
|
|
|
switch (cmd) {
|
|
case FS_IOC_GETFLAGS:
|
|
case FS_IOC_FSGETXATTR:
|
|
ret = ovl_real_ioctl(file, cmd, arg);
|
|
break;
|
|
|
|
case FS_IOC_FSSETXATTR:
|
|
case FS_IOC_SETFLAGS:
|
|
ret = ovl_ioctl_set_flags(file, cmd, arg);
|
|
break;
|
|
|
|
default:
|
|
ret = -ENOTTY;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
long ovl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
switch (cmd) {
|
|
case FS_IOC32_GETFLAGS:
|
|
cmd = FS_IOC_GETFLAGS;
|
|
break;
|
|
|
|
case FS_IOC32_SETFLAGS:
|
|
cmd = FS_IOC_SETFLAGS;
|
|
break;
|
|
|
|
default:
|
|
return -ENOIOCTLCMD;
|
|
}
|
|
|
|
return ovl_ioctl(file, cmd, arg);
|
|
}
|
|
#endif
|
|
|
|
enum ovl_copyop {
|
|
OVL_COPY,
|
|
OVL_CLONE,
|
|
OVL_DEDUPE,
|
|
};
|
|
|
|
static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
|
|
struct file *file_out, loff_t pos_out,
|
|
loff_t len, unsigned int flags, enum ovl_copyop op)
|
|
{
|
|
struct inode *inode_out = file_inode(file_out);
|
|
struct fd real_in, real_out;
|
|
const struct cred *old_cred;
|
|
loff_t ret;
|
|
|
|
inode_lock(inode_out);
|
|
if (op != OVL_DEDUPE) {
|
|
/* Update mode */
|
|
ovl_copyattr(ovl_inode_real(inode_out), inode_out);
|
|
ret = file_remove_privs(file_out);
|
|
if (ret)
|
|
goto out_unlock;
|
|
}
|
|
|
|
ret = ovl_real_fdget(file_out, &real_out);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
ret = ovl_real_fdget(file_in, &real_in);
|
|
if (ret) {
|
|
fdput(real_out);
|
|
goto out_unlock;
|
|
}
|
|
|
|
old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
|
|
switch (op) {
|
|
case OVL_COPY:
|
|
ret = vfs_copy_file_range(real_in.file, pos_in,
|
|
real_out.file, pos_out, len, flags);
|
|
break;
|
|
|
|
case OVL_CLONE:
|
|
ret = vfs_clone_file_range(real_in.file, pos_in,
|
|
real_out.file, pos_out, len, flags);
|
|
break;
|
|
|
|
case OVL_DEDUPE:
|
|
ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
|
|
real_out.file, pos_out, len,
|
|
flags);
|
|
break;
|
|
}
|
|
ovl_revert_creds(file_inode(file_out)->i_sb, old_cred);
|
|
|
|
/* Update size */
|
|
ovl_copyattr(ovl_inode_real(inode_out), inode_out);
|
|
|
|
fdput(real_in);
|
|
fdput(real_out);
|
|
|
|
out_unlock:
|
|
inode_unlock(inode_out);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
|
|
struct file *file_out, loff_t pos_out,
|
|
size_t len, unsigned int flags)
|
|
{
|
|
return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
|
|
OVL_COPY);
|
|
}
|
|
|
|
static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
|
|
struct file *file_out, loff_t pos_out,
|
|
loff_t len, unsigned int remap_flags)
|
|
{
|
|
enum ovl_copyop op;
|
|
|
|
if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
|
|
return -EINVAL;
|
|
|
|
if (remap_flags & REMAP_FILE_DEDUP)
|
|
op = OVL_DEDUPE;
|
|
else
|
|
op = OVL_CLONE;
|
|
|
|
/*
|
|
* Don't copy up because of a dedupe request, this wouldn't make sense
|
|
* most of the time (data would be duplicated instead of deduplicated).
|
|
*/
|
|
if (op == OVL_DEDUPE &&
|
|
(!ovl_inode_upper(file_inode(file_in)) ||
|
|
!ovl_inode_upper(file_inode(file_out))))
|
|
return -EPERM;
|
|
|
|
return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
|
|
remap_flags, op);
|
|
}
|
|
|
|
const struct file_operations ovl_file_operations = {
|
|
.open = ovl_open,
|
|
.release = ovl_release,
|
|
.llseek = ovl_llseek,
|
|
.read_iter = ovl_read_iter,
|
|
.write_iter = ovl_write_iter,
|
|
.fsync = ovl_fsync,
|
|
.mmap = ovl_mmap,
|
|
.fallocate = ovl_fallocate,
|
|
.fadvise = ovl_fadvise,
|
|
.unlocked_ioctl = ovl_ioctl,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = ovl_compat_ioctl,
|
|
#endif
|
|
.splice_read = generic_file_splice_read,
|
|
.splice_write = ovl_splice_write,
|
|
|
|
.copy_file_range = ovl_copy_file_range,
|
|
.remap_file_range = ovl_remap_file_range,
|
|
};
|
|
|
|
int __init ovl_aio_request_cache_init(void)
|
|
{
|
|
ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
|
|
sizeof(struct ovl_aio_req),
|
|
0, SLAB_HWCACHE_ALIGN, NULL);
|
|
if (!ovl_aio_request_cachep)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void ovl_aio_request_cache_destroy(void)
|
|
{
|
|
kmem_cache_destroy(ovl_aio_request_cachep);
|
|
}
|