android_kernel_xiaomi_sm8450/fs/overlayfs/copy_up.c
Greg Kroah-Hartman e04ba5f57f This is the 5.10.199 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmU45eIACgkQONu9yGCS
 aT6OGw//TD8VOR/VIUsdCH4keamvfoOjW7IdUMI6WWrXGw4TBQhOb1S5OFmUXLIW
 1TQKvnJSpkukW9oQXChEPiVm9LMXq0dsWOaI11I23HmAzenZQ+cdHLFa8Rod3DeJ
 t17qefmsZxvI3U5nXJiCYRlUcqWF8rgpYR8NaJass8xOOGKEDk9JMXy1hvCG1N8C
 1Zvth4wJmaDvJxSLHCL0gZkYBQBddePtrHwxWqLJ9vwUJEHGWf6AlwaASFUtRMut
 am2sWYx7aDKQT4w6B4MEJfA3bcTbLAglZG5s85ENhYYAMYW+cX/YtQH182KcpRTx
 mRmDc2vk1tJsSAuuE94OAAESjCdWF7V5SUkk/GLawnRiR7NeOax7vvS634uPtN+g
 LdTOlWMlcum46LmrJd6pu7oLXyZHGrr0/cBPewwYTlcRsmSS+WADUfH1yZL14lDC
 Nf8JASLIj68jrxnTn1lWGtShn8unNV9ZVauA8krsXJzvgjYNpaQSRhxOnltc+Zuy
 GFC0oipwgbzM8Y3lSPfF8rwBA85tmvF397oBM5c4uzZ+ULn7XWPJG+wIYtk7R9N7
 57rKAKyu+s3hHEUCyF7Z/HF7pHiL6vg4hQzhgIKqDMYkZmyHYV3iIAy5j5jvCkvD
 8zjBiV3iBC9PYzNYghVRVm5LjRwlXvqSpy88YwgkB1iD+5rZ3RQ=
 =PvGf
 -----END PGP SIGNATURE-----

Merge 5.10.199 into android12-5.10-lts

Changes in 5.10.199
        RDMA/srp: Make struct scsi_cmnd and struct srp_request adjacent
        RDMA/srp: Do not call scsi_done() from srp_abort()
        RDMA/cxgb4: Check skb value for failure to allocate
        perf/arm-cmn: Fix the unhandled overflow status of counter 4 to 7
        lib/test_meminit: fix off-by-one error in test_pages()
        HID: logitech-hidpp: Fix kernel crash on receiver USB disconnect
        quota: Fix slow quotaoff
        net: prevent address rewrite in kernel_bind()
        drm/msm/dp: do not reinitialize phy unless retry during link training
        drm/msm/dsi: skip the wait for video mode done if not applicable
        drm/msm/dpu: change _dpu_plane_calc_bw() to use u64 to avoid overflow
        ravb: Fix up dma_free_coherent() call in ravb_remove()
        ieee802154: ca8210: Fix a potential UAF in ca8210_probe
        mlxsw: fix mlxsw_sp2_nve_vxlan_learning_set() return type
        xen-netback: use default TX queue size for vifs
        riscv, bpf: Factor out emit_call for kernel and bpf context
        riscv, bpf: Sign-extend return values
        drm/vmwgfx: fix typo of sizeof argument
        net: macsec: indicate next pn update when offloading
        net: phy: mscc: macsec: reject PN update requests
        ixgbe: fix crash with empty VF macvlan list
        net: nfc: fix races in nfc_llcp_sock_get() and nfc_llcp_sock_get_sn()
        pinctrl: renesas: rzn1: Enable missing PINMUX
        nfc: nci: assert requested protocol is valid
        workqueue: Override implicit ordered attribute in workqueue_apply_unbound_cpumask()
        Revert "spi: zynqmp-gqspi: fix clock imbalance on probe failure"
        Revert "spi: spi-zynqmp-gqspi: Fix runtime PM imbalance in zynqmp_qspi_probe"
        net: add sysctl accept_ra_min_rtr_lft
        net: change accept_ra_min_rtr_lft to affect all RA lifetimes
        net: release reference to inet6_dev pointer
        media: mtk-jpeg: Fix use after free bug due to uncanceled work
        dmaengine: stm32-mdma: abort resume if no ongoing transfer
        usb: xhci: xhci-ring: Use sysdev for mapping bounce buffer
        net: usb: dm9601: fix uninitialized variable use in dm9601_mdio_read
        usb: dwc3: Soft reset phy on probe for host
        usb: musb: Get the musb_qh poniter after musb_giveback
        usb: musb: Modify the "HWVers" register address
        iio: pressure: bmp280: Fix NULL pointer exception
        iio: pressure: dps310: Adjust Timeout Settings
        iio: pressure: ms5611: ms5611_prom_is_valid false negative bug
        x86/cpu: Fix AMD erratum #1485 on Zen4-based CPUs
        mcb: remove is_added flag from mcb_device struct
        thunderbolt: Check that lane 1 is in CL0 before enabling lane bonding
        libceph: use kernel_connect()
        ceph: fix incorrect revoked caps assert in ceph_fill_file_size()
        ceph: fix type promotion bug on 32bit systems
        Input: powermate - fix use-after-free in powermate_config_complete
        Input: psmouse - fix fast_reconnect function for PS/2 mode
        Input: xpad - add PXN V900 support
        Input: i8042 - add Fujitsu Lifebook E5411 to i8042 quirk table
        Input: goodix - ensure int GPIO is in input for gpio_count == 1 && gpio_int_idx == 0 case
        tee: amdtee: fix use-after-free vulnerability in amdtee_close_session
        cgroup: Remove duplicates in cgroup v1 tasks file
        pinctrl: avoid unsafe code pattern in find_pinctrl()
        counter: microchip-tcb-capture: Fix the use of internal GCLK logic
        usb: gadget: udc-xilinx: replace memcpy with memcpy_toio
        usb: gadget: ncm: Handle decoding of multiple NTB's in unwrap call
        dmaengine: mediatek: Fix deadlock caused by synchronize_irq()
        powerpc/8xx: Fix pte_access_permitted() for PAGE_NONE
        powerpc/64e: Fix wrong test in __ptep_test_and_clear_young()
        x86/alternatives: Disable KASAN in apply_alternatives()
        arm64: report EL1 UNDEFs better
        arm64: die(): pass 'err' as long
        arm64: consistently pass ESR_ELx to die()
        arm64: rework FPAC exception handling
        arm64: rework BTI exception handling
        arm64: allow kprobes on EL0 handlers
        arm64: split EL0/EL1 UNDEF handlers
        arm64: factor out EL1 SSBS emulation hook
        arm64: factor insn read out of call_undef_hook()
        arm64: rework EL0 MRS emulation
        arm64: armv8_deprecated: fold ops into insn_emulation
        arm64: armv8_deprecated move emulation functions
        arm64: armv8_deprecated: move aarch32 helper earlier
        arm64: armv8_deprecated: rework deprected instruction handling
        arm64: armv8_deprecated: fix unused-function error
        RDMA/srp: Set scmnd->result only when scmnd is not NULL
        RDMA/srp: Fix srp_abort()
        ravb: Fix use-after-free issue in ravb_tx_timeout_work()
        dev_forward_skb: do not scrub skb mark within the same name space
        lib/Kconfig.debug: do not enable DEBUG_PREEMPT by default
        mm/memory_hotplug: rate limit page migration warnings
        Documentation: sysctl: align cells in second content column
        usb: hub: Guard against accesses to uninitialized BOS descriptors
        Bluetooth: hci_event: Ignore NULL link key
        Bluetooth: Reject connection with the device which has same BD_ADDR
        Bluetooth: Fix a refcnt underflow problem for hci_conn
        Bluetooth: vhci: Fix race when opening vhci device
        Bluetooth: hci_event: Fix coding style
        Bluetooth: avoid memcmp() out of bounds warning
        ice: fix over-shifted variable
        ice: reset first in crash dump kernels
        nfc: nci: fix possible NULL pointer dereference in send_acknowledge()
        regmap: fix NULL deref on lookup
        KVM: x86: Mask LVTPC when handling a PMI
        x86/sev: Disable MMIO emulation from user mode
        x86/sev: Check IOBM for IOIO exceptions from user-space
        x86/sev: Check for user-space IOIO pointing to kernel space
        tcp: check mptcp-level constraints for backlog coalescing
        netfilter: nft_payload: fix wrong mac header matching
        nvmet-tcp: Fix a possible UAF in queue intialization setup
        drm/i915: Retry gtt fault when out of fence registers
        qed: fix LL2 RX buffer allocation
        xfrm: fix a data-race in xfrm_gen_index()
        xfrm: interface: use DEV_STATS_INC()
        net: ipv4: fix return value check in esp_remove_trailer
        net: ipv6: fix return value check in esp_remove_trailer
        net: rfkill: gpio: prevent value glitch during probe
        tcp: fix excessive TLP and RACK timeouts from HZ rounding
        tcp: tsq: relax tcp_small_queue_check() when rtx queue contains a single skb
        tun: prevent negative ifindex
        ipv4: fib: annotate races around nh->nh_saddr_genid and nh->nh_saddr
        net: usb: smsc95xx: Fix an error code in smsc95xx_reset()
        i40e: prevent crash on probe if hw registers have invalid values
        net: dsa: bcm_sf2: Fix possible memory leak in bcm_sf2_mdio_register()
        net/sched: sch_hfsc: upgrade 'rt' to 'sc' when it becomes a inner curve
        neighbor: tracing: Move pin6 inside CONFIG_IPV6=y section
        netfilter: nft_set_rbtree: .deactivate fails if element has expired
        net: pktgen: Fix interface flags printing
        thunderbolt: Workaround an IOMMU fault on certain systems with Intel Maple Ridge
        resource: Add irqresource_disabled()
        ACPI: Drop acpi_dev_irqresource_disabled()
        ACPI: resources: Add DMI-based legacy IRQ override quirk
        ACPI: resource: Skip IRQ override on Asus Vivobook K3402ZA/K3502ZA
        ACPI: resource: Add ASUS model S5402ZA to quirks
        ACPI: resource: Skip IRQ override on Asus Vivobook S5602ZA
        ACPI: resource: Add Asus ExpertBook B2502 to Asus quirks
        ACPI: resource: Skip IRQ override on Asus Expertbook B2402CBA
        ACPI: resource: Skip IRQ override on ASUS ExpertBook B1502CBA
        ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CBA
        selftests/vm: make charge_reserved_hugetlb.sh work with existing cgroup setting
        selftests/mm: fix awk usage in charge_reserved_hugetlb.sh and hugetlb_reparenting_test.sh that may cause error
        usb: core: Track SuperSpeed Plus GenXxY
        xhci: cleanup xhci_hub_control port references
        xhci: move port specific items such as state completions to port structure
        xhci: rename resume_done to resume_timestamp
        xhci: clear usb2 resume related variables in one place.
        xhci: decouple usb2 port resume and get_port_status request handling
        xhci: track port suspend state correctly in unsuccessful resume cases
        serial: 8250: omap: Fix imprecise external abort for omap_8250_pm()
        serial: 8250_omap: Fix errors with no_console_suspend
        drm/amd/display: only check available pipe to disable vbios mode.
        drm/amd/display: Don't set dpms_off for seamless boot
        drm/connector: Give connector sysfs devices there own device_type
        drm/connector: Add a fwnode pointer to drm_connector and register with ACPI (v2)
        drm/connector: Add drm_connector_find_by_fwnode() function (v3)
        drm/connector: Add support for out-of-band hotplug notification (v3)
        usb: typec: altmodes/displayport: Notify drm subsys of hotplug events
        usb: typec: altmodes/displayport: Signal hpd low when exiting mode
        ARM: dts: ti: omap: Fix noisy serial with overrun-throttle-ms for mapphone
        btrfs: return -EUCLEAN for delayed tree ref with a ref count not equals to 1
        btrfs: initialize start_slot in btrfs_log_prealloc_extents
        i2c: mux: Avoid potential false error message in i2c_mux_add_adapter
        overlayfs: set ctime when setting mtime and atime
        gpio: timberdale: Fix potential deadlock on &tgpio->lock
        ata: libata-eh: Fix compilation warning in ata_eh_link_report()
        tracing: relax trace_event_eval_update() execution with cond_resched()
        HID: holtek: fix slab-out-of-bounds Write in holtek_kbd_input_event
        Bluetooth: Avoid redundant authentication
        Bluetooth: hci_core: Fix build warnings
        wifi: cfg80211: Fix 6GHz scan configuration
        wifi: mac80211: allow transmitting EAPOL frames with tainted key
        wifi: cfg80211: avoid leaking stack data into trace
        regulator/core: Revert "fix kobject release warning and memory leak in regulator_register()"
        sky2: Make sure there is at least one frag_addr available
        ipv4/fib: send notify when delete source address routes
        drm: panel-orientation-quirks: Add quirk for One Mix 2S
        btrfs: fix some -Wmaybe-uninitialized warnings in ioctl.c
        HID: multitouch: Add required quirk for Synaptics 0xcd7e device
        platform/x86: touchscreen_dmi: Add info for the Positivo C4128B
        net/mlx5: Handle fw tracer change ownership event based on MTRC
        Bluetooth: hci_event: Fix using memcmp when comparing keys
        mtd: rawnand: qcom: Unmap the right resource upon probe failure
        mtd: rawnand: marvell: Ensure program page operations are successful
        mtd: rawnand: arasan: Ensure program page operations are successful
        mtd: spinand: micron: correct bitmask for ecc status
        mtd: physmap-core: Restore map_rom fallback
        mmc: core: sdio: hold retuning if sdio in 1-bit mode
        mmc: core: Capture correct oemid-bits for eMMC cards
        Revert "pinctrl: avoid unsafe code pattern in find_pinctrl()"
        pNFS: Fix a hang in nfs4_evict_inode()
        ACPI: irq: Fix incorrect return value in acpi_register_gsi()
        nvme-pci: add BOGUS_NID for Intel 0a54 device
        nvme-rdma: do not try to stop unallocated queues
        USB: serial: option: add Telit LE910C4-WWX 0x1035 composition
        USB: serial: option: add entry for Sierra EM9191 with new firmware
        USB: serial: option: add Fibocom to DELL custom modem FM101R-GL
        perf: Disallow mis-matched inherited group reads
        s390/pci: fix iommu bitmap allocation
        platform/x86: asus-wmi: Change ASUS_WMI_BRN_DOWN code from 0x20 to 0x2e
        platform/x86: asus-wmi: Map 0x2a code, Ignore 0x2b and 0x2c events
        gpio: vf610: set value before the direction to avoid a glitch
        ASoC: pxa: fix a memory leak in probe()
        phy: mapphone-mdm6600: Fix runtime disable on probe
        phy: mapphone-mdm6600: Fix runtime PM for remove
        phy: mapphone-mdm6600: Fix pinctrl_pm handling for sleep pins
        Bluetooth: hci_sock: fix slab oob read in create_monitor_event
        Bluetooth: hci_sock: Correctly bounds check and pad HCI_MON_NEW_INDEX name
        xfrm6: fix inet6_dev refcount underflow problem
        Linux 5.10.199

NOTE, this reverts the following commits in order to apply things
cleanly and avoid ABI breakage.  Due to the complexity involved,
individual reverts would not work properly:
        fc778e9d79 xhci: track port suspend state correctly in unsuccessful resume cases
        1c034c6e22 xhci: decouple usb2 port resume and get_port_status request handling
        92088dd886 xhci: clear usb2 resume related variables in one place.
        e7abc4b18d xhci: rename resume_done to resume_timestamp
        d44c9285ce xhci: move port specific items such as state completions to port structure
        e2b4de13e5 xhci: cleanup xhci_hub_control port references
        489818719a arm64: armv8_deprecated: fix unused-function error
        da7603cedb arm64: armv8_deprecated: rework deprected instruction handling
        45a26d2a53 arm64: armv8_deprecated: move aarch32 helper earlier
        0b6a7a9f6d arm64: armv8_deprecated move emulation functions
        2202536144 arm64: armv8_deprecated: fold ops into insn_emulation
        5aa232345e arm64: rework EL0 MRS emulation
        15e964971f arm64: factor insn read out of call_undef_hook()
        0edde7fd1c arm64: factor out EL1 SSBS emulation hook
        7a76df1ae1 arm64: split EL0/EL1 UNDEF handlers
        8a8d4cc303 arm64: allow kprobes on EL0 handlers
        793ed958b6 arm64: rework BTI exception handling
        9113333d7c arm64: rework FPAC exception handling
        a8d7c8484f arm64: consistently pass ESR_ELx to die()
        004bdab6ed arm64: die(): pass 'err' as long
        835cb1f78d arm64: report EL1 UNDEFs better

Change-Id: I54f6d79ae4886b808d6e3c017343f1f25c5254c3
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2023-10-27 11:58:41 +00:00

1013 lines
23 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 2011 Novell Inc.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/splice.h>
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/uaccess.h>
#include <linux/sched/signal.h>
#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/fdtable.h>
#include <linux/ratelimit.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
{
pr_warn("\"check_copy_up\" module option is obsolete\n");
return 0;
}
static int ovl_ccup_get(char *buf, const struct kernel_param *param)
{
return sprintf(buf, "N\n");
}
module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing");
static bool ovl_must_copy_xattr(const char *name)
{
return !strcmp(name, XATTR_POSIX_ACL_ACCESS) ||
!strcmp(name, XATTR_POSIX_ACL_DEFAULT) ||
!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
}
int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
int error = 0;
size_t slen;
if (!(old->d_inode->i_opflags & IOP_XATTR) ||
!(new->d_inode->i_opflags & IOP_XATTR))
return 0;
list_size = vfs_listxattr(old, NULL, 0);
if (list_size <= 0) {
if (list_size == -EOPNOTSUPP)
return 0;
return list_size;
}
buf = kzalloc(list_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
list_size = vfs_listxattr(old, buf, list_size);
if (list_size <= 0) {
error = list_size;
goto out;
}
for (name = buf; list_size; name += slen) {
slen = strnlen(name, list_size) + 1;
/* underlying fs providing us with an broken xattr list? */
if (WARN_ON(slen > list_size)) {
error = -EIO;
break;
}
list_size -= slen;
if (ovl_is_private_xattr(sb, name))
continue;
error = security_inode_copy_up_xattr(name);
if (error < 0 && error != -EOPNOTSUPP)
break;
if (error == 1) {
error = 0;
continue; /* Discard */
}
retry:
size = vfs_getxattr(old, name, value, value_size);
if (size == -ERANGE)
size = vfs_getxattr(old, name, NULL, 0);
if (size < 0) {
error = size;
break;
}
if (size > value_size) {
void *new;
new = krealloc(value, size, GFP_KERNEL);
if (!new) {
error = -ENOMEM;
break;
}
value = new;
value_size = size;
goto retry;
}
error = vfs_setxattr(new, name, value, size, 0);
if (error) {
if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
break;
/* Ignore failure to copy unknown xattrs */
error = 0;
}
}
kfree(value);
out:
kfree(buf);
return error;
}
static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
struct path *new, loff_t len)
{
struct file *old_file;
struct file *new_file;
loff_t old_pos = 0;
loff_t new_pos = 0;
loff_t cloned;
loff_t data_pos = -1;
loff_t hole_len;
bool skip_hole = false;
int error = 0;
if (len == 0)
return 0;
old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file))
return PTR_ERR(old_file);
new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
if (IS_ERR(new_file)) {
error = PTR_ERR(new_file);
goto out_fput;
}
/* Try to use clone_file_range to clone up within the same fs */
cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
if (cloned == len)
goto out;
/* Couldn't clone, so now we try to copy the data */
/* Check if lower fs supports seek operation */
if (old_file->f_mode & FMODE_LSEEK &&
old_file->f_op->llseek)
skip_hole = true;
while (len) {
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
long bytes;
if (len < this_len)
this_len = len;
if (signal_pending_state(TASK_KILLABLE, current)) {
error = -EINTR;
break;
}
/*
* Fill zero for hole will cost unnecessary disk space
* and meanwhile slow down the copy-up speed, so we do
* an optimization for hole during copy-up, it relies
* on SEEK_DATA implementation in lower fs so if lower
* fs does not support it, copy-up will behave as before.
*
* Detail logic of hole detection as below:
* When we detect next data position is larger than current
* position we will skip that hole, otherwise we copy
* data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
* it may not recognize all kind of holes and sometimes
* only skips partial of hole area. However, it will be
* enough for most of the use cases.
*/
if (skip_hole && data_pos < old_pos) {
data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
if (data_pos > old_pos) {
hole_len = data_pos - old_pos;
len -= hole_len;
old_pos = new_pos = data_pos;
continue;
} else if (data_pos == -ENXIO) {
break;
} else if (data_pos < 0) {
skip_hole = false;
}
}
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
this_len, SPLICE_F_MOVE);
if (bytes <= 0) {
error = bytes;
break;
}
WARN_ON(old_pos != new_pos);
len -= bytes;
}
out:
if (!error && ovl_should_sync(ofs))
error = vfs_fsync(new_file, 0);
fput(new_file);
out_fput:
fput(old_file);
return error;
}
static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
.ia_valid = ATTR_SIZE,
.ia_size = stat->size,
};
return notify_change(upperdentry, &attr, NULL);
}
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
.ia_valid =
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME,
.ia_atime = stat->atime,
.ia_mtime = stat->mtime,
};
return notify_change(upperdentry, &attr, NULL);
}
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
{
int err = 0;
if (!S_ISLNK(stat->mode)) {
struct iattr attr = {
.ia_valid = ATTR_MODE,
.ia_mode = stat->mode,
};
err = notify_change(upperdentry, &attr, NULL);
}
if (!err) {
struct iattr attr = {
.ia_valid = ATTR_UID | ATTR_GID,
.ia_uid = stat->uid,
.ia_gid = stat->gid,
};
err = notify_change(upperdentry, &attr, NULL);
}
if (!err)
ovl_set_timestamps(upperdentry, stat);
return err;
}
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
{
struct ovl_fh *fh;
int fh_type, dwords;
int buflen = MAX_HANDLE_SZ;
uuid_t *uuid = &real->d_sb->s_uuid;
int err;
/* Make sure the real fid stays 32bit aligned */
BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
/*
* We encode a non-connectable file handle for non-dir, because we
* only need to find the lower inode number and we don't want to pay
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
buflen = (dwords << 2);
err = -EIO;
if (WARN_ON(fh_type < 0) ||
WARN_ON(buflen > MAX_HANDLE_SZ) ||
WARN_ON(fh_type == FILEID_INVALID))
goto out_err;
fh->fb.version = OVL_FH_VERSION;
fh->fb.magic = OVL_FH_MAGIC;
fh->fb.type = fh_type;
fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
/*
* When we will want to decode an overlay dentry from this handle
* and all layers are on the same fs, if we get a disconncted real
* dentry when we decode fid, the only way to tell if we should assign
* it to upperdentry or to lowerstack is by checking this flag.
*/
if (is_upper)
fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
fh->fb.len = sizeof(fh->fb) + buflen;
fh->fb.uuid = *uuid;
return fh;
out_err:
kfree(fh);
return ERR_PTR(err);
}
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper)
{
const struct ovl_fh *fh = NULL;
int err;
/*
* When lower layer doesn't support export operations store a 'null' fh,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
if (ovl_can_decode_fh(lower->d_sb)) {
fh = ovl_encode_real_fh(lower, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
/*
* Do not fail when upper doesn't support xattrs.
*/
err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
fh ? fh->fb.len : 0, 0);
kfree(fh);
return err;
}
/* Store file handle of @upper dir in @index dir entry */
static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *index)
{
const struct ovl_fh *fh;
int err;
fh = ovl_encode_real_fh(upper, true);
if (IS_ERR(fh))
return PTR_ERR(fh);
err = ovl_do_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len);
kfree(fh);
return err;
}
/*
* Create and install index entry.
*
* Caller must hold i_mutex on indexdir.
*/
static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
struct dentry *upper)
{
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
struct inode *dir = d_inode(indexdir);
struct dentry *index = NULL;
struct dentry *temp = NULL;
struct qstr name = { };
int err;
/*
* For now this is only used for creating index entry for directories,
* because non-dir are copied up directly to index and then hardlinked
* to upper dir.
*
* TODO: implement create index for non-dir, so we can call it when
* encoding file handle for non-dir in case index does not exist.
*/
if (WARN_ON(!d_is_dir(dentry)))
return -EIO;
/* Directory not expected to be indexed before copy up */
if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
return -EIO;
err = ovl_get_index_name(origin, &name);
if (err)
return err;
temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
err = PTR_ERR(temp);
if (IS_ERR(temp))
goto free_name;
err = ovl_set_upper_fh(OVL_FS(dentry->d_sb), upper, temp);
if (err)
goto out;
index = lookup_one_len(name.name, indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
} else {
err = ovl_do_rename(dir, temp, dir, index, 0);
dput(index);
}
out:
if (err)
ovl_cleanup(dir, temp);
dput(temp);
free_name:
kfree(name.name);
return err;
}
struct ovl_copy_up_ctx {
struct dentry *parent;
struct dentry *dentry;
struct path lowerpath;
struct kstat stat;
struct kstat pstat;
const char *link;
struct dentry *destdir;
struct qstr destname;
struct dentry *workdir;
bool origin;
bool indexed;
bool metacopy;
};
static int ovl_link_up(struct ovl_copy_up_ctx *c)
{
int err;
struct dentry *upper;
struct dentry *upperdir = ovl_dentry_upper(c->parent);
struct inode *udir = d_inode(upperdir);
/* Mark parent "impure" because it may now contain non-pure upper */
err = ovl_set_impure(c->parent, upperdir);
if (err)
return err;
err = ovl_set_nlink_lower(c->dentry);
if (err)
return err;
inode_lock_nested(udir, I_MUTEX_PARENT);
upper = lookup_one_len(c->dentry->d_name.name, upperdir,
c->dentry->d_name.len);
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
dput(upper);
if (!err) {
/* Restore timestamps on parent (best effort) */
ovl_set_timestamps(upperdir, &c->pstat);
ovl_dentry_set_upper_alias(c->dentry);
ovl_dentry_update_reval(c->dentry, upper);
}
}
inode_unlock(udir);
if (err)
return err;
err = ovl_set_nlink_upper(c->dentry);
return err;
}
static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
{
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
int err;
/*
* Copy up data first and then xattrs. Writing data after
* xattrs will remove security.capability xattr automatically.
*/
if (S_ISREG(c->stat.mode) && !c->metacopy) {
struct path upperpath, datapath;
ovl_path_upper(c->dentry, &upperpath);
if (WARN_ON(upperpath.dentry != NULL))
return -EIO;
upperpath.dentry = temp;
ovl_path_lowerdata(c->dentry, &datapath);
err = ovl_copy_up_data(ofs, &datapath, &upperpath,
c->stat.size);
if (err)
return err;
}
err = ovl_copy_xattr(c->dentry->d_sb, c->lowerpath.dentry, temp);
if (err)
return err;
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
*
* Don't set origin when we are breaking the association with a lower
* hard link.
*/
if (c->origin) {
err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
if (err)
return err;
}
if (c->metacopy) {
err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
NULL, 0, -EOPNOTSUPP);
if (err)
return err;
}
inode_lock(temp->d_inode);
if (S_ISREG(c->stat.mode))
err = ovl_set_size(temp, &c->stat);
if (!err)
err = ovl_set_attr(temp, &c->stat);
inode_unlock(temp->d_inode);
return err;
}
struct ovl_cu_creds {
const struct cred *old;
struct cred *new;
};
static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
{
int err;
cc->old = cc->new = NULL;
err = security_inode_copy_up(dentry, &cc->new);
if (err < 0)
return err;
if (cc->new)
cc->old = override_creds(cc->new);
return 0;
}
static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
{
if (cc->new) {
revert_creds(cc->old);
put_cred(cc->new);
}
}
/*
* Copyup using workdir to prepare temp file. Used when copying up directories,
* special files or when upper fs doesn't support O_TMPFILE.
*/
static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
{
struct inode *inode;
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
struct dentry *temp, *upper;
struct ovl_cu_creds cc;
int err;
struct ovl_cattr cattr = {
/* Can't properly set mode on creation because of the umask */
.mode = c->stat.mode & S_IFMT,
.rdev = c->stat.rdev,
.link = c->link
};
/* workdir and destdir could be the same when copying up to indexdir */
err = -EIO;
if (lock_rename(c->workdir, c->destdir) != NULL)
goto unlock;
err = ovl_prep_cu_creds(c->dentry, &cc);
if (err)
goto unlock;
temp = ovl_create_temp(c->workdir, &cattr);
ovl_revert_cu_creds(&cc);
err = PTR_ERR(temp);
if (IS_ERR(temp))
goto unlock;
err = ovl_copy_up_inode(c, temp);
if (err)
goto cleanup;
if (S_ISDIR(c->stat.mode) && c->indexed) {
err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
if (err)
goto cleanup;
}
upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto cleanup;
err = ovl_do_rename(wdir, temp, udir, upper, 0);
dput(upper);
if (err)
goto cleanup;
if (!c->metacopy)
ovl_set_upperdata(d_inode(c->dentry));
inode = d_inode(c->dentry);
ovl_inode_update(inode, temp);
if (S_ISDIR(inode->i_mode))
ovl_set_flag(OVL_WHITEOUTS, inode);
unlock:
unlock_rename(c->workdir, c->destdir);
return err;
cleanup:
ovl_cleanup(wdir, temp);
dput(temp);
goto unlock;
}
/* Copyup using O_TMPFILE which does not require cross dir locking */
static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
{
struct inode *udir = d_inode(c->destdir);
struct dentry *temp, *upper;
struct ovl_cu_creds cc;
int err;
err = ovl_prep_cu_creds(c->dentry, &cc);
if (err)
return err;
temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
ovl_revert_cu_creds(&cc);
if (IS_ERR(temp))
return PTR_ERR(temp);
err = ovl_copy_up_inode(c, temp);
if (err)
goto out_dput;
inode_lock_nested(udir, I_MUTEX_PARENT);
upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
err = ovl_do_link(temp, udir, upper);
dput(upper);
}
inode_unlock(udir);
if (err)
goto out_dput;
if (!c->metacopy)
ovl_set_upperdata(d_inode(c->dentry));
ovl_inode_update(d_inode(c->dentry), temp);
return 0;
out_dput:
dput(temp);
return err;
}
/*
* Copy up a single dentry
*
* All renames start with copy up of source if necessary. The actual
* rename will only proceed once the copy up was successful. Copy up uses
* upper parent i_mutex for exclusion. Since rename can change d_parent it
* is possible that the copy up will lock the old parent. At that point
* the file will have already been copied up anyway.
*/
static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
bool to_index = false;
/*
* Indexed non-dir is copied up directly to the index entry and then
* hardlinked to upper dir. Indexed dir is copied up to indexdir,
* then index entry is created and then copied up dir installed.
* Copying dir up to indexdir instead of workdir simplifies locking.
*/
if (ovl_need_index(c->dentry)) {
c->indexed = true;
if (S_ISDIR(c->stat.mode))
c->workdir = ovl_indexdir(c->dentry->d_sb);
else
to_index = true;
}
if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
c->origin = true;
if (to_index) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
if (err)
return err;
} else if (WARN_ON(!c->parent)) {
/* Disconnected dentry must be copied up to index dir */
return -EIO;
} else {
/*
* Mark parent "impure" because it may now contain non-pure
* upper
*/
err = ovl_set_impure(c->parent, c->destdir);
if (err)
return err;
}
/* Should we copyup with O_TMPFILE or with workdir? */
if (S_ISREG(c->stat.mode) && ofs->tmpfile)
err = ovl_copy_up_tmpfile(c);
else
err = ovl_copy_up_workdir(c);
if (err)
goto out;
if (c->indexed)
ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
if (to_index) {
/* Initialize nlink for copy up of disconnected dentry */
err = ovl_set_nlink_upper(c->dentry);
} else {
struct inode *udir = d_inode(c->destdir);
/* Restore timestamps on parent (best effort) */
inode_lock(udir);
ovl_set_timestamps(c->destdir, &c->pstat);
inode_unlock(udir);
ovl_dentry_set_upper_alias(c->dentry);
ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
}
out:
if (to_index)
kfree(c->destname.name);
return err;
}
static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
int flags)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
if (!ofs->config.metacopy)
return false;
if (!S_ISREG(mode))
return false;
if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
return false;
return true;
}
static ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value)
{
ssize_t res;
char *buf;
res = vfs_getxattr(dentry, name, NULL, 0);
if (res == -ENODATA || res == -EOPNOTSUPP)
res = 0;
if (res > 0) {
buf = kzalloc(res, GFP_KERNEL);
if (!buf)
return -ENOMEM;
res = vfs_getxattr(dentry, name, buf, res);
if (res < 0)
kfree(buf);
else
*value = buf;
}
return res;
}
/* Copy up data of an inode which was copied up metadata only in the past. */
static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
{
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct path upperpath, datapath;
int err;
char *capability = NULL;
ssize_t cap_size;
ovl_path_upper(c->dentry, &upperpath);
if (WARN_ON(upperpath.dentry == NULL))
return -EIO;
ovl_path_lowerdata(c->dentry, &datapath);
if (WARN_ON(datapath.dentry == NULL))
return -EIO;
if (c->stat.size) {
err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS,
&capability);
if (cap_size < 0)
goto out;
}
err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size);
if (err)
goto out_free;
/*
* Writing to upper file will clear security.capability xattr. We
* don't want that to happen for normal copy-up operation.
*/
if (capability) {
err = vfs_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
capability, cap_size, 0);
if (err)
goto out_free;
}
err = ovl_do_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
if (err)
goto out_free;
ovl_set_upperdata(d_inode(c->dentry));
out_free:
kfree(capability);
out:
return err;
}
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
int flags)
{
int err;
DEFINE_DELAYED_CALL(done);
struct path parentpath;
struct ovl_copy_up_ctx ctx = {
.parent = parent,
.dentry = dentry,
.workdir = ovl_workdir(dentry),
};
if (WARN_ON(!ctx.workdir))
return -EROFS;
ovl_path_lower(dentry, &ctx.lowerpath);
err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
if (parent) {
ovl_path_upper(parent, &parentpath);
ctx.destdir = parentpath.dentry;
ctx.destname = dentry->d_name;
err = vfs_getattr(&parentpath, &ctx.pstat,
STATX_ATIME | STATX_MTIME,
AT_STATX_SYNC_AS_STAT);
if (err)
return err;
}
/* maybe truncate regular file. this has no effect on dirs */
if (flags & O_TRUNC)
ctx.stat.size = 0;
if (S_ISLNK(ctx.stat.mode)) {
ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
if (IS_ERR(ctx.link))
return PTR_ERR(ctx.link);
}
err = ovl_copy_up_start(dentry, flags);
/* err < 0: interrupted, err > 0: raced with another copy-up */
if (unlikely(err)) {
if (err > 0)
err = 0;
} else {
if (!ovl_dentry_upper(dentry))
err = ovl_do_copy_up(&ctx);
if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
err = ovl_link_up(&ctx);
if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
err = ovl_copy_up_meta_inode_data(&ctx);
ovl_copy_up_end(dentry);
}
do_delayed_call(&done);
return err;
}
static int ovl_copy_up_flags(struct dentry *dentry, int flags)
{
int err = 0;
const struct cred *old_cred;
bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
/*
* With NFS export, copy up can get called for a disconnected non-dir.
* In this case, we will copy up lower inode to index dir without
* linking it to upper dir.
*/
if (WARN_ON(disconnected && d_is_dir(dentry)))
return -EIO;
old_cred = ovl_override_creds(dentry->d_sb);
while (!err) {
struct dentry *next;
struct dentry *parent = NULL;
if (ovl_already_copied_up(dentry, flags))
break;
next = dget(dentry);
/* find the topmost dentry not yet copied up */
for (; !disconnected;) {
parent = dget_parent(next);
if (ovl_dentry_upper(parent))
break;
dput(next);
next = parent;
}
err = ovl_copy_up_one(parent, next, flags);
dput(parent);
dput(next);
}
ovl_revert_creds(dentry->d_sb, old_cred);
return err;
}
static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
{
/* Copy up of disconnected dentry does not set upper alias */
if (ovl_already_copied_up(dentry, flags))
return false;
if (special_file(d_inode(dentry)->i_mode))
return false;
if (!ovl_open_flags_need_copy_up(flags))
return false;
return true;
}
int ovl_maybe_copy_up(struct dentry *dentry, int flags)
{
int err = 0;
if (ovl_open_need_copy_up(dentry, flags)) {
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up_flags(dentry, flags);
ovl_drop_write(dentry);
}
}
return err;
}
int ovl_copy_up_with_data(struct dentry *dentry)
{
return ovl_copy_up_flags(dentry, O_WRONLY);
}
int ovl_copy_up(struct dentry *dentry)
{
return ovl_copy_up_flags(dentry, 0);
}