66e91da883
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmXYTLkACgkQONu9yGCS aT4+fhAAqqR/Cvx53ZKMQ8GZTCudAZnr/Dz6kWYwxhhhIbQjDpCaf9mgsrEDaQS2 ancSZjzYaOUIXq/IsthXxQIUhiZbuM3iuSEi7+odWgSYdkFyzuUt8MWLBGSaB5Er ojn+APtq7vPXTSnp7uMwqMC3/BHCKkeYIjRVevhhHBKG5d3lzkV1xU8NcvMkLaly CIRxpWXD3w2b7K0GEbb/zN1GQEHDCQcxjuaJoe/5FKGJkqd3T31eyiJTRumCCMcz j8vkGkYmcMJpWf04iLgVA1p13I5/HGrXdEBI/GutN8IABIC3Cp42jW8phHYKW5ZM a4R25LZG5buND1Ubpq+EDrYn3EaPek5XRki0w8ZAXfNa3rYc+N6mQjkzNSOzhJ/5 VNsn3EAE1Dwtar5Z3ASe9ugDbh+0bgx85PbfaADK88V+qWb3DVr1TBWmDNu2vfVP rv4I0EKu9r3vOE8aNMEBuhAVkIK3mEQUxwab6RKNrMby/5Uwa+ugrrUtQd8V+T1S j6r6v7u7aZ8mhYO7d6WSvAKL85lCWGbs3WRIKCJZmDRyqWrWW9tVWRN9wrZ2QnRr iaCQKk8P474P7/j1zwnmih8l4wS1oszveNziWwd0fi1Nn/WQYM+JKYQvpuQijmQ+ J9jLyWo7a59zffIE6mzJdNwFy9hlw9X+VnJmExk/Q88Z7Bt5wPQ= =laYd -----END PGP SIGNATURE----- Merge 5.10.210 into android12-5.10-lts Changes in 5.10.210 usb: cdns3: Fixes for sparse warnings usb: cdns3: fix uvc failure work since sg support enabled usb: cdns3: fix incorrect calculation of ep_buf_size when more than one config usb: cdns3: fix iso transfer error when mult is not zero usb: cdns3: Fix uvc fail when DMA cross 4k boundery since sg enabled PCI: mediatek: Clear interrupt status before dispatching handler units: change from 'L' to 'UL' units: add the HZ macros serial: sc16is7xx: set safe default SPI clock frequency spi: introduce SPI_MODE_X_MASK macro serial: sc16is7xx: add check for unsupported SPI modes during probe iio: adc: ad7091r: Set alert bit in config register iio: adc: ad7091r: Allow users to configure device events iio: adc: ad7091r: Enable internal vref if external vref is not supplied dmaengine: fix NULL pointer in channel unregistration function iio:adc:ad7091r: Move exports into IIO_AD7091R namespace. ext4: allow for the last group to be marked as trimmed crypto: api - Disallow identical driver names PM: hibernate: Enforce ordering during image compression/decompression hwrng: core - Fix page fault dead lock on mmap-ed hwrng crypto: s390/aes - Fix buffer overread in CTR mode rpmsg: virtio: Free driver_override when rpmsg_remove() bus: mhi: host: Drop chan lock before queuing buffers parisc/firmware: Fix F-extend for PDC addresses async: Split async_schedule_node_domain() async: Introduce async_schedule_dev_nocall() arm64: dts: qcom: sdm845: fix USB wakeup interrupt types arm64: dts: qcom: sdm845: fix USB DP/DM HS PHY interrupts lsm: new security_file_ioctl_compat() hook scripts/get_abi: fix source path leak mmc: core: Use mrq.sbc in close-ended ffu mmc: mmc_spi: remove custom DMA mapped buffers rtc: Adjust failure return code for cmos_set_alarm() nouveau/vmm: don't set addr on the fail path to avoid warning ubifs: ubifs_symlink: Fix memleak of inode->i_link in error path rename(): fix the locking of subdirectories block: Remove special-casing of compound pages stddef: Introduce DECLARE_FLEX_ARRAY() helper smb3: Replace smb2pdu 1-element arrays with flex-arrays mm: vmalloc: introduce array allocation functions KVM: use __vcalloc for very large allocations net/smc: fix illegal rmb_desc access in SMC-D connection dump tcp: make sure init the accept_queue's spinlocks once bnxt_en: Wait for FLR to complete during probe vlan: skip nested type that is not IFLA_VLAN_QOS_MAPPING llc: make llc_ui_sendmsg() more robust against bonding changes llc: Drop support for ETH_P_TR_802_2. net/rds: Fix UBSAN: array-index-out-of-bounds in rds_cmsg_recv tracing: Ensure visibility when inserting an element into tracing_map afs: Hide silly-rename files from userspace tcp: Add memory barrier to tcp_push() netlink: fix potential sleeping issue in mqueue_flush_file ipv6: init the accept_queue's spinlocks in inet6_create net/mlx5: DR, Use the right GVMI number for drop action net/mlx5e: fix a double-free in arfs_create_groups netfilter: nf_tables: restrict anonymous set and map names to 16 bytes netfilter: nf_tables: validate NFPROTO_* family net: mvpp2: clear BM pool before initialization selftests: netdevsim: fix the udp_tunnel_nic test fjes: fix memleaks in fjes_hw_setup net: fec: fix the unhandled context fault from smmu btrfs: ref-verify: free ref cache before clearing mount opt btrfs: tree-checker: fix inline ref size in error messages btrfs: don't warn if discard range is not aligned to sector btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args btrfs: don't abort filesystem when attempting to snapshot deleted subvolume rbd: don't move requests to the running list on errors exec: Fix error handling in begin_new_exec() wifi: iwlwifi: fix a memory corruption netfilter: nft_chain_filter: handle NETDEV_UNREGISTER for inet/ingress basechain netfilter: nf_tables: reject QUEUE/DROP verdict parameters gpiolib: acpi: Ignore touchpad wakeup on GPD G1619-04 drm: Don't unref the same fb many times by mistake due to deadlock handling drm/bridge: nxp-ptn3460: fix i2c_master_send() error checking drm/tidss: Fix atomic_flush check drm/bridge: nxp-ptn3460: simplify some error checking PM: sleep: Use dev_printk() when possible PM: sleep: Avoid calling put_device() under dpm_list_mtx PM: core: Remove unnecessary (void *) conversions PM: sleep: Fix possible deadlocks in core system-wide PM code fs/pipe: move check to pipe_has_watch_queue() pipe: wakeup wr_wait after setting max_usage ARM: dts: samsung: exynos4210-i9100: Unconditionally enable LDO12 arm64: dts: qcom: sc7180: Use pdc interrupts for USB instead of GIC interrupts arm64: dts: qcom: sc7180: fix USB wakeup interrupt types media: mtk-jpeg: Fix use after free bug due to error path handling in mtk_jpeg_dec_device_run mm: use __pfn_to_section() instead of open coding it mm/sparsemem: fix race in accessing memory_section->usage btrfs: remove err variable from btrfs_delete_subvolume btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted drm: panel-simple: add missing bus flags for Tianma tm070jvhg[30/33] drm/exynos: fix accidental on-stack copy of exynos_drm_plane drm/exynos: gsc: minor fix for loop iteration in gsc_runtime_resume gpio: eic-sprd: Clear interrupt after set the interrupt type spi: bcm-qspi: fix SFDP BFPT read by usig mspi read mips: Call lose_fpu(0) before initializing fcr31 in mips_set_personality_nan tick/sched: Preserve number of idle sleeps across CPU hotplug events x86/entry/ia32: Ensure s32 is sign extended to s64 powerpc/mm: Fix null-pointer dereference in pgtable_cache_add drivers/perf: pmuv3: don't expose SW_INCR event in sysfs powerpc: Fix build error due to is_valid_bugaddr() powerpc/mm: Fix build failures due to arch_reserved_kernel_pages() x86/boot: Ignore NMIs during very early boot powerpc: pmd_move_must_withdraw() is only needed for CONFIG_TRANSPARENT_HUGEPAGE powerpc/lib: Validate size for vector operations x86/mce: Mark fatal MCE's page as poison to avoid panic in the kdump kernel perf/core: Fix narrow startup race when creating the perf nr_addr_filters sysfs file debugobjects: Stop accessing objects after releasing hash bucket lock regulator: core: Only increment use_count when enable_count changes audit: Send netlink ACK before setting connection in auditd_set ACPI: video: Add quirk for the Colorful X15 AT 23 Laptop PNP: ACPI: fix fortify warning ACPI: extlog: fix NULL pointer dereference check PM / devfreq: Synchronize devfreq_monitor_[start/stop] ACPI: APEI: set memory failure flags as MF_ACTION_REQUIRED on synchronous events FS:JFS:UBSAN:array-index-out-of-bounds in dbAdjTree UBSAN: array-index-out-of-bounds in dtSplitRoot jfs: fix slab-out-of-bounds Read in dtSearch jfs: fix array-index-out-of-bounds in dbAdjTree jfs: fix uaf in jfs_evict_inode pstore/ram: Fix crash when setting number of cpus to an odd number crypto: stm32/crc32 - fix parsing list of devices afs: fix the usage of read_seqbegin_or_lock() in afs_lookup_volume_rcu() afs: fix the usage of read_seqbegin_or_lock() in afs_find_server*() rxrpc_find_service_conn_rcu: fix the usage of read_seqbegin_or_lock() jfs: fix array-index-out-of-bounds in diNewExt s390/ptrace: handle setting of fpc register correctly KVM: s390: fix setting of fpc register SUNRPC: Fix a suspicious RCU usage warning ecryptfs: Reject casefold directory inodes ext4: fix inconsistent between segment fstrim and full fstrim ext4: unify the type of flexbg_size to unsigned int ext4: remove unnecessary check from alloc_flex_gd() ext4: avoid online resizing failures due to oversized flex bg wifi: rt2x00: restart beacon queue when hardware reset selftests/bpf: satisfy compiler by having explicit return in btf test selftests/bpf: Fix pyperf180 compilation failure with clang18 scsi: lpfc: Fix possible file string name overflow when updating firmware PCI: Add no PM reset quirk for NVIDIA Spectrum devices bonding: return -ENOMEM instead of BUG in alb_upper_dev_walk scsi: arcmsr: Support new PCI device IDs 1883 and 1886 ARM: dts: imx7d: Fix coresight funnel ports ARM: dts: imx7s: Fix lcdif compatible ARM: dts: imx7s: Fix nand-controller #size-cells wifi: ath9k: Fix potential array-index-out-of-bounds read in ath9k_htc_txstatus() bpf: Add map and need_defer parameters to .map_fd_put_ptr() scsi: libfc: Don't schedule abort twice scsi: libfc: Fix up timeout error in fc_fcp_rec_error() bpf: Set uattr->batch.count as zero before batched update or deletion ARM: dts: rockchip: fix rk3036 hdmi ports node ARM: dts: imx25/27-eukrea: Fix RTC node name ARM: dts: imx: Use flash@0,0 pattern ARM: dts: imx27: Fix sram node ARM: dts: imx1: Fix sram node ionic: pass opcode to devcmd_wait block/rnbd-srv: Check for unlikely string overflow ARM: dts: imx25: Fix the iim compatible string ARM: dts: imx25/27: Pass timing0 ARM: dts: imx27-apf27dev: Fix LED name ARM: dts: imx23-sansa: Use preferred i2c-gpios properties ARM: dts: imx23/28: Fix the DMA controller node name net: dsa: mv88e6xxx: Fix mv88e6352_serdes_get_stats error path block: prevent an integer overflow in bvec_try_merge_hw_page md: Whenassemble the array, consult the superblock of the freshest device arm64: dts: qcom: msm8996: Fix 'in-ports' is a required property arm64: dts: qcom: msm8998: Fix 'out-ports' is a required property wifi: rtl8xxxu: Add additional USB IDs for RTL8192EU devices wifi: rtlwifi: rtl8723{be,ae}: using calculate_bit_shift() wifi: cfg80211: free beacon_ies when overridden from hidden BSS Bluetooth: qca: Set both WIDEBAND_SPEECH and LE_STATES quirks for QCA2066 Bluetooth: L2CAP: Fix possible multiple reject send i40e: Fix VF disable behavior to block all traffic f2fs: fix to check return value of f2fs_reserve_new_block() ALSA: hda: Refer to correct stream index at loops ASoC: doc: Fix undefined SND_SOC_DAPM_NOPM argument fast_dput(): handle underflows gracefully RDMA/IPoIB: Fix error code return in ipoib_mcast_join drm/amd/display: Fix tiled display misalignment f2fs: fix write pointers on zoned device after roll forward drm/drm_file: fix use of uninitialized variable drm/framebuffer: Fix use of uninitialized variable drm/mipi-dsi: Fix detach call without attach media: stk1160: Fixed high volume of stk1160_dbg messages media: rockchip: rga: fix swizzling for RGB formats PCI: add INTEL_HDA_ARL to pci_ids.h ALSA: hda: Intel: add HDA_ARL PCI ID support ALSA: hda: intel-dspcfg: add filters for ARL-S and ARL drm/exynos: Call drm_atomic_helper_shutdown() at shutdown/unbind time IB/ipoib: Fix mcast list locking media: ddbridge: fix an error code problem in ddb_probe drm/msm/dpu: Ratelimit framedone timeout msgs clk: hi3620: Fix memory leak in hi3620_mmc_clk_init() clk: mmp: pxa168: Fix memory leak in pxa168_clk_init() watchdog: it87_wdt: Keep WDTCTRL bit 3 unmodified for IT8784/IT8786 drm/amdgpu: Let KFD sync with VM fences drm/amdgpu: Drop 'fence' check in 'to_amdgpu_amdkfd_fence()' leds: trigger: panic: Don't register panic notifier if creating the trigger failed um: Fix naming clash between UML and scheduler um: Don't use vfprintf() for os_info() um: net: Fix return type of uml_net_start_xmit() i3c: master: cdns: Update maximum prescaler value for i2c clock xen/gntdev: Fix the abuse of underlying struct page in DMA-buf import mfd: ti_am335x_tscadc: Fix TI SoC dependencies PCI: Only override AMD USB controller if required PCI: switchtec: Fix stdev_release() crash after surprise hot remove usb: hub: Replace hardcoded quirk value with BIT() macro tty: allow TIOCSLCKTRMIOS with CAP_CHECKPOINT_RESTORE fs/kernfs/dir: obey S_ISGID PCI/AER: Decode Requester ID when no error info found libsubcmd: Fix memory leak in uniq() virtio_net: Fix "‘%d’ directive writing between 1 and 11 bytes into a region of size 10" warnings blk-mq: fix IO hang from sbitmap wakeup race ceph: fix deadlock or deadcode of misusing dget() drm/amd/powerplay: Fix kzalloc parameter 'ATOM_Tonga_PPM_Table' in 'get_platform_power_management_table()' drm/amdgpu: Release 'adev->pm.fw' before return in 'amdgpu_device_need_post()' perf: Fix the nr_addr_filters fix wifi: cfg80211: fix RCU dereference in __cfg80211_bss_update drm: using mul_u32_u32() requires linux/math64.h scsi: isci: Fix an error code problem in isci_io_request_build() scsi: core: Introduce enum scsi_disposition scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler ip6_tunnel: use dev_sw_netstats_rx_add() ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv() net-zerocopy: Refactor frag-is-remappable test. tcp: add sanity checks to rx zerocopy ixgbe: Remove non-inclusive language ixgbe: Refactor returning internal error codes ixgbe: Refactor overtemp event handling ixgbe: Fix an error handling path in ixgbe_read_iosf_sb_reg_x550() ipv6: Ensure natural alignment of const ipv6 loopback and router addresses llc: call sock_orphan() at release time netfilter: nf_log: replace BUG_ON by WARN_ON_ONCE when putting logger netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations net: ipv4: fix a memleak in ip_setup_cork af_unix: fix lockdep positive in sk_diag_dump_icons() net: sysfs: Fix /sys/class/net/<iface> path HID: apple: Add support for the 2021 Magic Keyboard HID: apple: Add 2021 magic keyboard FN key mapping bonding: remove print in bond_verify_device_path uapi: stddef.h: Fix __DECLARE_FLEX_ARRAY for C++ PM: sleep: Fix error handling in dpm_prepare() dmaengine: fsl-dpaa2-qdma: Fix the size of dma pools dmaengine: ti: k3-udma: Report short packet errors dmaengine: fsl-qdma: Fix a memory leak related to the status queue DMA dmaengine: fsl-qdma: Fix a memory leak related to the queue command DMA phy: renesas: rcar-gen3-usb2: Fix returning wrong error code dmaengine: fix is_slave_direction() return false when DMA_DEV_TO_DEV phy: ti: phy-omap-usb2: Fix NULL pointer dereference for SRP drm/msm/dp: return correct Colorimetry for DP_TEST_DYNAMIC_RANGE_CEA case net: stmmac: xgmac: fix handling of DPP safety error for DMA channels selftests: net: avoid just another constant wait tunnels: fix out of bounds access when building IPv6 PMTU error atm: idt77252: fix a memleak in open_card_ubr0 hwmon: (aspeed-pwm-tacho) mutex for tach reading hwmon: (coretemp) Fix out-of-bounds memory access hwmon: (coretemp) Fix bogus core_id to attr name mapping inet: read sk->sk_family once in inet_recv_error() rxrpc: Fix response to PING RESPONSE ACKs to a dead call tipc: Check the bearer type before calling tipc_udp_nl_bearer_add() ppp_async: limit MRU to 64K netfilter: nft_compat: reject unused compat flag netfilter: nft_compat: restrict match/target protocol to u16 netfilter: nft_ct: reject direction for ct id netfilter: nft_set_pipapo: store index in scratch maps netfilter: nft_set_pipapo: add helper to release pcpu scratch area netfilter: nft_set_pipapo: remove scratch_aligned pointer scsi: core: Move scsi_host_busy() out of host lock if it is for per-command blk-iocost: Fix an UBSAN shift-out-of-bounds warning net/af_iucv: clean up a try_then_request_module() USB: serial: qcserial: add new usb-id for Dell Wireless DW5826e USB: serial: option: add Fibocom FM101-GL variant USB: serial: cp210x: add ID for IMST iM871A-USB usb: host: xhci-plat: Add support for XHCI_SG_TRB_CACHE_SIZE_QUIRK hrtimer: Report offline hrtimer enqueue Input: i8042 - fix strange behavior of touchpad on Clevo NS70PU Input: atkbd - skip ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID vhost: use kzalloc() instead of kmalloc() followed by memset() clocksource: Skip watchdog check for large watchdog intervals net: stmmac: xgmac: use #define for string constants net: stmmac: xgmac: fix a typo of register name in DPP safety handling netfilter: nft_set_rbtree: skip end interval element from gc btrfs: forbid creating subvol qgroups btrfs: do not ASSERT() if the newly created subvolume already got read btrfs: forbid deleting live subvol qgroup btrfs: send: return EOPNOTSUPP on unknown flags of: unittest: Fix compile in the non-dynamic case net: openvswitch: limit the number of recursions from action sets spi: ppc4xx: Drop write-only variable ASoC: rt5645: Fix deadlock in rt5645_jack_detect_work() net: sysfs: Fix /sys/class/net/<iface> path for statistics MIPS: Add 'memory' clobber to csum_ipv6_magic() inline assembler i40e: Fix waiting for queues of all VSIs to be disabled tracing/trigger: Fix to return error if failed to alloc snapshot mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again ALSA: hda/realtek: Fix the external mic not being recognised for Acer Swift 1 SF114-32 ALSA: hda/realtek: Enable Mute LED on HP Laptop 14-fq0xxx HID: wacom: generic: Avoid reporting a serial of '0' to userspace HID: wacom: Do not register input devices until after hid_hw_start usb: ucsi_acpi: Fix command completion handling USB: hub: check for alternate port before enabling A_ALT_HNP_SUPPORT usb: f_mass_storage: forbid async queue when shutdown happen media: ir_toy: fix a memleak in irtoy_tx powerpc/kasan: Fix addr error caused by page alignment i2c: i801: Remove i801_set_block_buffer_mode i2c: i801: Fix block process call transactions modpost: trim leading spaces when processing source files list scsi: Revert "scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock" lsm: fix the logic in security_inode_getsecctx() firewire: core: correct documentation of fw_csr_string() kernel API kbuild: Fix changing ELF file type for output of gen_btf for big endian nfc: nci: free rx_data_reassembly skb on NCI device cleanup net: hsr: remove WARN_ONCE() in send_hsr_supervision_frame() xen-netback: properly sync TX responses ALSA: hda/realtek: Enable headset mic on Vaio VJFE-ADL binder: signal epoll threads of self-work misc: fastrpc: Mark all sessions as invalid in cb_remove ext4: fix double-free of blocks due to wrong extents moved_len tracing: Fix wasted memory in saved_cmdlines logic staging: iio: ad5933: fix type mismatch regression iio: magnetometer: rm3100: add boundary check for the value read from RM3100_REG_TMRC iio: accel: bma400: Fix a compilation problem media: rc: bpf attach/detach requires write permission hv_netvsc: Fix race condition between netvsc_probe and netvsc_remove ring-buffer: Clean ring_buffer_poll_wait() error return serial: max310x: set default value when reading clock ready bit serial: max310x: improve crystal stable clock detection x86/Kconfig: Transmeta Crusoe is CPU family 5, not 6 x86/mm/ident_map: Use gbpages only where full GB page should be mapped. mmc: slot-gpio: Allow non-sleeping GPIO ro ALSA: hda/conexant: Add quirk for SWS JS201D nilfs2: fix data corruption in dsync block recovery for small block sizes nilfs2: fix hang in nilfs_lookup_dirty_data_buffers() crypto: ccp - Fix null pointer dereference in __sev_platform_shutdown_locked nfp: use correct macro for LengthSelect in BAR config nfp: flower: prevent re-adding mac index for bonded port wifi: mac80211: reload info pointer in ieee80211_tx_dequeue() irqchip/irq-brcmstb-l2: Add write memory barrier before exit irqchip/gic-v3-its: Fix GICv4.1 VPE affinity update s390/qeth: Fix potential loss of L3-IP@ in case of network issues ceph: prevent use-after-free in encode_cap_msg() of: property: fix typo in io-channels can: j1939: Fix UAF in j1939_sk_match_filter during setsockopt(SO_J1939_FILTER) pmdomain: core: Move the unused cleanup to a _sync initcall tracing: Inform kmemleak of saved_cmdlines allocation Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d" bus: moxtet: Add spi device table PCI: dwc: endpoint: Fix dw_pcie_ep_raise_msix_irq() alignment support mips: Fix max_mapnr being uninitialized on early stages crypto: lib/mpi - Fix unexpected pointer access in mpi_ec_init serial: Add rs485_supported to uart_port serial: 8250_exar: Fill in rs485_supported serial: 8250_exar: Set missing rs485_supported flag scripts/decode_stacktrace.sh: silence stderr messages from addr2line/nm scripts/decode_stacktrace.sh: support old bash version scripts: decode_stacktrace: demangle Rust symbols scripts/decode_stacktrace.sh: optionally use LLVM utilities netfilter: ipset: fix performance regression in swap operation netfilter: ipset: Missing gc cancellations fixed hrtimer: Ignore slack time for RT tasks in schedule_hrtimeout_range() Revert "arm64: Stash shadow stack pointer in the task struct on interrupt" net: prevent mss overflow in skb_segment() sched/membarrier: reduce the ability to hammer on sys_membarrier nilfs2: fix potential bug in end_buffer_async_write nilfs2: replace WARN_ONs for invalid DAT metadata block requests dm: limit the number of targets and parameter size area PM: runtime: add devm_pm_runtime_enable helper PM: runtime: Have devm_pm_runtime_enable() handle pm_runtime_dont_use_autosuspend() drm/msm/dsi: Enable runtime PM netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval() net: bcmgenet: Fix EEE implementation PCI: dwc: Fix a 64bit bug in dw_pcie_ep_raise_msix_irq() Linux 5.10.210 Change-Id: I5e7327f58dd6abd26ac2b1e328a81c1010d1147c Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
1453 lines
35 KiB
C
1453 lines
35 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* linux/fs/pipe.c
|
|
*
|
|
* Copyright (C) 1991, 1992, 1999 Linus Torvalds
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/file.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/module.h>
|
|
#include <linux/init.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/log2.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/pseudo_fs.h>
|
|
#include <linux/magic.h>
|
|
#include <linux/pipe_fs_i.h>
|
|
#include <linux/uio.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/memcontrol.h>
|
|
#include <linux/watch_queue.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
#include <asm/ioctls.h>
|
|
|
|
#include "internal.h"
|
|
|
|
/*
|
|
* New pipe buffers will be restricted to this size while the user is exceeding
|
|
* their pipe buffer quota. The general pipe use case needs at least two
|
|
* buffers: one for data yet to be read, and one for new data. If this is less
|
|
* than two, then a write to a non-empty pipe may block even if the pipe is not
|
|
* full. This can occur with GNU make jobserver or similar uses of pipes as
|
|
* semaphores: multiple processes may be waiting to write tokens back to the
|
|
* pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
|
|
*
|
|
* Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
|
|
* own risk, namely: pipe writes to non-full pipes may block until the pipe is
|
|
* emptied.
|
|
*/
|
|
#define PIPE_MIN_DEF_BUFFERS 2
|
|
|
|
/*
|
|
* The max size that a non-root user is allowed to grow the pipe. Can
|
|
* be set by root in /proc/sys/fs/pipe-max-size
|
|
*/
|
|
unsigned int pipe_max_size = 1048576;
|
|
|
|
/* Maximum allocatable pages per user. Hard limit is unset by default, soft
|
|
* matches default values.
|
|
*/
|
|
unsigned long pipe_user_pages_hard;
|
|
unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
|
|
|
|
/*
|
|
* We use head and tail indices that aren't masked off, except at the point of
|
|
* dereference, but rather they're allowed to wrap naturally. This means there
|
|
* isn't a dead spot in the buffer, but the ring has to be a power of two and
|
|
* <= 2^31.
|
|
* -- David Howells 2019-09-23.
|
|
*
|
|
* Reads with count = 0 should always return 0.
|
|
* -- Julian Bradfield 1999-06-07.
|
|
*
|
|
* FIFOs and Pipes now generate SIGIO for both readers and writers.
|
|
* -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
|
|
*
|
|
* pipe_read & write cleanup
|
|
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
|
|
*/
|
|
|
|
static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
|
|
{
|
|
if (pipe->files)
|
|
mutex_lock_nested(&pipe->mutex, subclass);
|
|
}
|
|
|
|
void pipe_lock(struct pipe_inode_info *pipe)
|
|
{
|
|
/*
|
|
* pipe_lock() nests non-pipe inode locks (for writing to a file)
|
|
*/
|
|
pipe_lock_nested(pipe, I_MUTEX_PARENT);
|
|
}
|
|
EXPORT_SYMBOL(pipe_lock);
|
|
|
|
void pipe_unlock(struct pipe_inode_info *pipe)
|
|
{
|
|
if (pipe->files)
|
|
mutex_unlock(&pipe->mutex);
|
|
}
|
|
EXPORT_SYMBOL(pipe_unlock);
|
|
|
|
static inline void __pipe_lock(struct pipe_inode_info *pipe)
|
|
{
|
|
mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
|
|
}
|
|
|
|
static inline void __pipe_unlock(struct pipe_inode_info *pipe)
|
|
{
|
|
mutex_unlock(&pipe->mutex);
|
|
}
|
|
|
|
void pipe_double_lock(struct pipe_inode_info *pipe1,
|
|
struct pipe_inode_info *pipe2)
|
|
{
|
|
BUG_ON(pipe1 == pipe2);
|
|
|
|
if (pipe1 < pipe2) {
|
|
pipe_lock_nested(pipe1, I_MUTEX_PARENT);
|
|
pipe_lock_nested(pipe2, I_MUTEX_CHILD);
|
|
} else {
|
|
pipe_lock_nested(pipe2, I_MUTEX_PARENT);
|
|
pipe_lock_nested(pipe1, I_MUTEX_CHILD);
|
|
}
|
|
}
|
|
|
|
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
|
|
struct pipe_buffer *buf)
|
|
{
|
|
struct page *page = buf->page;
|
|
|
|
/*
|
|
* If nobody else uses this page, and we don't already have a
|
|
* temporary page, let's keep track of it as a one-deep
|
|
* allocation cache. (Otherwise just release our reference to it)
|
|
*/
|
|
if (page_count(page) == 1 && !pipe->tmp_page)
|
|
pipe->tmp_page = page;
|
|
else
|
|
put_page(page);
|
|
}
|
|
|
|
static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
|
|
struct pipe_buffer *buf)
|
|
{
|
|
struct page *page = buf->page;
|
|
|
|
if (page_count(page) != 1)
|
|
return false;
|
|
memcg_kmem_uncharge_page(page, 0);
|
|
__SetPageLocked(page);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
|
|
* @pipe: the pipe that the buffer belongs to
|
|
* @buf: the buffer to attempt to steal
|
|
*
|
|
* Description:
|
|
* This function attempts to steal the &struct page attached to
|
|
* @buf. If successful, this function returns 0 and returns with
|
|
* the page locked. The caller may then reuse the page for whatever
|
|
* he wishes; the typical use is insertion into a different file
|
|
* page cache.
|
|
*/
|
|
bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
|
|
struct pipe_buffer *buf)
|
|
{
|
|
struct page *page = buf->page;
|
|
|
|
/*
|
|
* A reference of one is golden, that means that the owner of this
|
|
* page is the only one holding a reference to it. lock the page
|
|
* and return OK.
|
|
*/
|
|
if (page_count(page) == 1) {
|
|
lock_page(page);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL(generic_pipe_buf_try_steal);
|
|
|
|
/**
|
|
* generic_pipe_buf_get - get a reference to a &struct pipe_buffer
|
|
* @pipe: the pipe that the buffer belongs to
|
|
* @buf: the buffer to get a reference to
|
|
*
|
|
* Description:
|
|
* This function grabs an extra reference to @buf. It's used in
|
|
* in the tee() system call, when we duplicate the buffers in one
|
|
* pipe into another.
|
|
*/
|
|
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
|
|
{
|
|
return try_get_page(buf->page);
|
|
}
|
|
EXPORT_SYMBOL(generic_pipe_buf_get);
|
|
|
|
/**
|
|
* generic_pipe_buf_release - put a reference to a &struct pipe_buffer
|
|
* @pipe: the pipe that the buffer belongs to
|
|
* @buf: the buffer to put a reference to
|
|
*
|
|
* Description:
|
|
* This function releases a reference to @buf.
|
|
*/
|
|
void generic_pipe_buf_release(struct pipe_inode_info *pipe,
|
|
struct pipe_buffer *buf)
|
|
{
|
|
put_page(buf->page);
|
|
}
|
|
EXPORT_SYMBOL(generic_pipe_buf_release);
|
|
|
|
static const struct pipe_buf_operations anon_pipe_buf_ops = {
|
|
.release = anon_pipe_buf_release,
|
|
.try_steal = anon_pipe_buf_try_steal,
|
|
.get = generic_pipe_buf_get,
|
|
};
|
|
|
|
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
|
|
static inline bool pipe_readable(const struct pipe_inode_info *pipe)
|
|
{
|
|
unsigned int head = READ_ONCE(pipe->head);
|
|
unsigned int tail = READ_ONCE(pipe->tail);
|
|
unsigned int writers = READ_ONCE(pipe->writers);
|
|
|
|
return !pipe_empty(head, tail) || !writers;
|
|
}
|
|
|
|
static ssize_t
|
|
pipe_read(struct kiocb *iocb, struct iov_iter *to)
|
|
{
|
|
size_t total_len = iov_iter_count(to);
|
|
struct file *filp = iocb->ki_filp;
|
|
struct pipe_inode_info *pipe = filp->private_data;
|
|
bool was_full, wake_next_reader = false;
|
|
ssize_t ret;
|
|
|
|
/* Null read succeeds. */
|
|
if (unlikely(total_len == 0))
|
|
return 0;
|
|
|
|
ret = 0;
|
|
__pipe_lock(pipe);
|
|
|
|
/*
|
|
* We only wake up writers if the pipe was full when we started
|
|
* reading in order to avoid unnecessary wakeups.
|
|
*
|
|
* But when we do wake up writers, we do so using a sync wakeup
|
|
* (WF_SYNC), because we want them to get going and generate more
|
|
* data for us.
|
|
*/
|
|
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
|
|
for (;;) {
|
|
/* Read ->head with a barrier vs post_one_notification() */
|
|
unsigned int head = smp_load_acquire(&pipe->head);
|
|
unsigned int tail = pipe->tail;
|
|
unsigned int mask = pipe->ring_size - 1;
|
|
|
|
#ifdef CONFIG_WATCH_QUEUE
|
|
if (pipe->note_loss) {
|
|
struct watch_notification n;
|
|
|
|
if (total_len < 8) {
|
|
if (ret == 0)
|
|
ret = -ENOBUFS;
|
|
break;
|
|
}
|
|
|
|
n.type = WATCH_TYPE_META;
|
|
n.subtype = WATCH_META_LOSS_NOTIFICATION;
|
|
n.info = watch_sizeof(n);
|
|
if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
|
|
if (ret == 0)
|
|
ret = -EFAULT;
|
|
break;
|
|
}
|
|
ret += sizeof(n);
|
|
total_len -= sizeof(n);
|
|
pipe->note_loss = false;
|
|
}
|
|
#endif
|
|
|
|
if (!pipe_empty(head, tail)) {
|
|
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
|
|
size_t chars = buf->len;
|
|
size_t written;
|
|
int error;
|
|
|
|
if (chars > total_len) {
|
|
if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
|
|
if (ret == 0)
|
|
ret = -ENOBUFS;
|
|
break;
|
|
}
|
|
chars = total_len;
|
|
}
|
|
|
|
error = pipe_buf_confirm(pipe, buf);
|
|
if (error) {
|
|
if (!ret)
|
|
ret = error;
|
|
break;
|
|
}
|
|
|
|
written = copy_page_to_iter(buf->page, buf->offset, chars, to);
|
|
if (unlikely(written < chars)) {
|
|
if (!ret)
|
|
ret = -EFAULT;
|
|
break;
|
|
}
|
|
ret += chars;
|
|
buf->offset += chars;
|
|
buf->len -= chars;
|
|
|
|
/* Was it a packet buffer? Clean up and exit */
|
|
if (buf->flags & PIPE_BUF_FLAG_PACKET) {
|
|
total_len = chars;
|
|
buf->len = 0;
|
|
}
|
|
|
|
if (!buf->len) {
|
|
pipe_buf_release(pipe, buf);
|
|
spin_lock_irq(&pipe->rd_wait.lock);
|
|
#ifdef CONFIG_WATCH_QUEUE
|
|
if (buf->flags & PIPE_BUF_FLAG_LOSS)
|
|
pipe->note_loss = true;
|
|
#endif
|
|
tail++;
|
|
pipe->tail = tail;
|
|
spin_unlock_irq(&pipe->rd_wait.lock);
|
|
}
|
|
total_len -= chars;
|
|
if (!total_len)
|
|
break; /* common path: read succeeded */
|
|
if (!pipe_empty(head, tail)) /* More to do? */
|
|
continue;
|
|
}
|
|
|
|
if (!pipe->writers)
|
|
break;
|
|
if (ret)
|
|
break;
|
|
if (filp->f_flags & O_NONBLOCK) {
|
|
ret = -EAGAIN;
|
|
break;
|
|
}
|
|
__pipe_unlock(pipe);
|
|
|
|
/*
|
|
* We only get here if we didn't actually read anything.
|
|
*
|
|
* However, we could have seen (and removed) a zero-sized
|
|
* pipe buffer, and might have made space in the buffers
|
|
* that way.
|
|
*
|
|
* You can't make zero-sized pipe buffers by doing an empty
|
|
* write (not even in packet mode), but they can happen if
|
|
* the writer gets an EFAULT when trying to fill a buffer
|
|
* that already got allocated and inserted in the buffer
|
|
* array.
|
|
*
|
|
* So we still need to wake up any pending writers in the
|
|
* _very_ unlikely case that the pipe was full, but we got
|
|
* no data.
|
|
*/
|
|
if (unlikely(was_full)) {
|
|
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
|
|
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
|
|
}
|
|
|
|
/*
|
|
* But because we didn't read anything, at this point we can
|
|
* just return directly with -ERESTARTSYS if we're interrupted,
|
|
* since we've done any required wakeups and there's no need
|
|
* to mark anything accessed. And we've dropped the lock.
|
|
*/
|
|
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
|
|
return -ERESTARTSYS;
|
|
|
|
__pipe_lock(pipe);
|
|
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
|
|
wake_next_reader = true;
|
|
}
|
|
if (pipe_empty(pipe->head, pipe->tail))
|
|
wake_next_reader = false;
|
|
__pipe_unlock(pipe);
|
|
|
|
if (was_full) {
|
|
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
|
|
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
|
|
}
|
|
if (wake_next_reader)
|
|
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
|
|
if (ret > 0)
|
|
file_accessed(filp);
|
|
return ret;
|
|
}
|
|
|
|
static inline int is_packetized(struct file *file)
|
|
{
|
|
return (file->f_flags & O_DIRECT) != 0;
|
|
}
|
|
|
|
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
|
|
static inline bool pipe_writable(const struct pipe_inode_info *pipe)
|
|
{
|
|
unsigned int head = READ_ONCE(pipe->head);
|
|
unsigned int tail = READ_ONCE(pipe->tail);
|
|
unsigned int max_usage = READ_ONCE(pipe->max_usage);
|
|
|
|
return !pipe_full(head, tail, max_usage) ||
|
|
!READ_ONCE(pipe->readers);
|
|
}
|
|
|
|
static ssize_t
|
|
pipe_write(struct kiocb *iocb, struct iov_iter *from)
|
|
{
|
|
struct file *filp = iocb->ki_filp;
|
|
struct pipe_inode_info *pipe = filp->private_data;
|
|
unsigned int head;
|
|
ssize_t ret = 0;
|
|
size_t total_len = iov_iter_count(from);
|
|
ssize_t chars;
|
|
bool was_empty = false;
|
|
bool wake_next_writer = false;
|
|
|
|
/* Null write succeeds. */
|
|
if (unlikely(total_len == 0))
|
|
return 0;
|
|
|
|
__pipe_lock(pipe);
|
|
|
|
if (!pipe->readers) {
|
|
send_sig(SIGPIPE, current, 0);
|
|
ret = -EPIPE;
|
|
goto out;
|
|
}
|
|
|
|
if (pipe_has_watch_queue(pipe)) {
|
|
ret = -EXDEV;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Epoll nonsensically wants a wakeup whether the pipe
|
|
* was already empty or not.
|
|
*
|
|
* If it wasn't empty we try to merge new data into
|
|
* the last buffer.
|
|
*
|
|
* That naturally merges small writes, but it also
|
|
* page-aligns the rest of the writes for large writes
|
|
* spanning multiple pages.
|
|
*/
|
|
head = pipe->head;
|
|
was_empty = true;
|
|
chars = total_len & (PAGE_SIZE-1);
|
|
if (chars && !pipe_empty(head, pipe->tail)) {
|
|
unsigned int mask = pipe->ring_size - 1;
|
|
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
|
|
int offset = buf->offset + buf->len;
|
|
|
|
if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
|
|
offset + chars <= PAGE_SIZE) {
|
|
ret = pipe_buf_confirm(pipe, buf);
|
|
if (ret)
|
|
goto out;
|
|
|
|
ret = copy_page_from_iter(buf->page, offset, chars, from);
|
|
if (unlikely(ret < chars)) {
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
buf->len += ret;
|
|
if (!iov_iter_count(from))
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
for (;;) {
|
|
if (!pipe->readers) {
|
|
send_sig(SIGPIPE, current, 0);
|
|
if (!ret)
|
|
ret = -EPIPE;
|
|
break;
|
|
}
|
|
|
|
head = pipe->head;
|
|
if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
|
|
unsigned int mask = pipe->ring_size - 1;
|
|
struct pipe_buffer *buf = &pipe->bufs[head & mask];
|
|
struct page *page = pipe->tmp_page;
|
|
int copied;
|
|
|
|
if (!page) {
|
|
page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
|
|
if (unlikely(!page)) {
|
|
ret = ret ? : -ENOMEM;
|
|
break;
|
|
}
|
|
pipe->tmp_page = page;
|
|
}
|
|
|
|
/* Allocate a slot in the ring in advance and attach an
|
|
* empty buffer. If we fault or otherwise fail to use
|
|
* it, either the reader will consume it or it'll still
|
|
* be there for the next write.
|
|
*/
|
|
spin_lock_irq(&pipe->rd_wait.lock);
|
|
|
|
head = pipe->head;
|
|
if (pipe_full(head, pipe->tail, pipe->max_usage)) {
|
|
spin_unlock_irq(&pipe->rd_wait.lock);
|
|
continue;
|
|
}
|
|
|
|
pipe->head = head + 1;
|
|
spin_unlock_irq(&pipe->rd_wait.lock);
|
|
|
|
/* Insert it into the buffer array */
|
|
buf = &pipe->bufs[head & mask];
|
|
buf->page = page;
|
|
buf->ops = &anon_pipe_buf_ops;
|
|
buf->offset = 0;
|
|
buf->len = 0;
|
|
if (is_packetized(filp))
|
|
buf->flags = PIPE_BUF_FLAG_PACKET;
|
|
else
|
|
buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
|
|
pipe->tmp_page = NULL;
|
|
|
|
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
|
|
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
|
|
if (!ret)
|
|
ret = -EFAULT;
|
|
break;
|
|
}
|
|
ret += copied;
|
|
buf->offset = 0;
|
|
buf->len = copied;
|
|
|
|
if (!iov_iter_count(from))
|
|
break;
|
|
}
|
|
|
|
if (!pipe_full(head, pipe->tail, pipe->max_usage))
|
|
continue;
|
|
|
|
/* Wait for buffer space to become available. */
|
|
if (filp->f_flags & O_NONBLOCK) {
|
|
if (!ret)
|
|
ret = -EAGAIN;
|
|
break;
|
|
}
|
|
if (signal_pending(current)) {
|
|
if (!ret)
|
|
ret = -ERESTARTSYS;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* We're going to release the pipe lock and wait for more
|
|
* space. We wake up any readers if necessary, and then
|
|
* after waiting we need to re-check whether the pipe
|
|
* become empty while we dropped the lock.
|
|
*/
|
|
__pipe_unlock(pipe);
|
|
if (was_empty) {
|
|
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
|
|
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
|
|
}
|
|
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
|
|
__pipe_lock(pipe);
|
|
was_empty = pipe_empty(pipe->head, pipe->tail);
|
|
wake_next_writer = true;
|
|
}
|
|
out:
|
|
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
|
|
wake_next_writer = false;
|
|
__pipe_unlock(pipe);
|
|
|
|
/*
|
|
* If we do do a wakeup event, we do a 'sync' wakeup, because we
|
|
* want the reader to start processing things asap, rather than
|
|
* leave the data pending.
|
|
*
|
|
* This is particularly important for small writes, because of
|
|
* how (for example) the GNU make jobserver uses small writes to
|
|
* wake up pending jobs
|
|
*/
|
|
if (was_empty) {
|
|
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
|
|
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
|
|
}
|
|
if (wake_next_writer)
|
|
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
|
|
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
|
|
int err = file_update_time(filp);
|
|
if (err)
|
|
ret = err;
|
|
sb_end_write(file_inode(filp)->i_sb);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct pipe_inode_info *pipe = filp->private_data;
|
|
int count, head, tail, mask;
|
|
|
|
switch (cmd) {
|
|
case FIONREAD:
|
|
__pipe_lock(pipe);
|
|
count = 0;
|
|
head = pipe->head;
|
|
tail = pipe->tail;
|
|
mask = pipe->ring_size - 1;
|
|
|
|
while (tail != head) {
|
|
count += pipe->bufs[tail & mask].len;
|
|
tail++;
|
|
}
|
|
__pipe_unlock(pipe);
|
|
|
|
return put_user(count, (int __user *)arg);
|
|
|
|
#ifdef CONFIG_WATCH_QUEUE
|
|
case IOC_WATCH_QUEUE_SET_SIZE: {
|
|
int ret;
|
|
__pipe_lock(pipe);
|
|
ret = watch_queue_set_size(pipe, arg);
|
|
__pipe_unlock(pipe);
|
|
return ret;
|
|
}
|
|
|
|
case IOC_WATCH_QUEUE_SET_FILTER:
|
|
return watch_queue_set_filter(
|
|
pipe, (struct watch_notification_filter __user *)arg);
|
|
#endif
|
|
|
|
default:
|
|
return -ENOIOCTLCMD;
|
|
}
|
|
}
|
|
|
|
/* No kernel lock held - fine */
|
|
static __poll_t
|
|
pipe_poll(struct file *filp, poll_table *wait)
|
|
{
|
|
__poll_t mask;
|
|
struct pipe_inode_info *pipe = filp->private_data;
|
|
unsigned int head, tail;
|
|
|
|
/*
|
|
* Reading pipe state only -- no need for acquiring the semaphore.
|
|
*
|
|
* But because this is racy, the code has to add the
|
|
* entry to the poll table _first_ ..
|
|
*/
|
|
if (filp->f_mode & FMODE_READ)
|
|
poll_wait(filp, &pipe->rd_wait, wait);
|
|
if (filp->f_mode & FMODE_WRITE)
|
|
poll_wait(filp, &pipe->wr_wait, wait);
|
|
|
|
/*
|
|
* .. and only then can you do the racy tests. That way,
|
|
* if something changes and you got it wrong, the poll
|
|
* table entry will wake you up and fix it.
|
|
*/
|
|
head = READ_ONCE(pipe->head);
|
|
tail = READ_ONCE(pipe->tail);
|
|
|
|
mask = 0;
|
|
if (filp->f_mode & FMODE_READ) {
|
|
if (!pipe_empty(head, tail))
|
|
mask |= EPOLLIN | EPOLLRDNORM;
|
|
if (!pipe->writers && filp->f_version != pipe->w_counter)
|
|
mask |= EPOLLHUP;
|
|
}
|
|
|
|
if (filp->f_mode & FMODE_WRITE) {
|
|
if (!pipe_full(head, tail, pipe->max_usage))
|
|
mask |= EPOLLOUT | EPOLLWRNORM;
|
|
/*
|
|
* Most Unices do not set EPOLLERR for FIFOs but on Linux they
|
|
* behave exactly like pipes for poll().
|
|
*/
|
|
if (!pipe->readers)
|
|
mask |= EPOLLERR;
|
|
}
|
|
|
|
return mask;
|
|
}
|
|
|
|
static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
|
|
{
|
|
int kill = 0;
|
|
|
|
spin_lock(&inode->i_lock);
|
|
if (!--pipe->files) {
|
|
inode->i_pipe = NULL;
|
|
kill = 1;
|
|
}
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
if (kill)
|
|
free_pipe_info(pipe);
|
|
}
|
|
|
|
static int
|
|
pipe_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct pipe_inode_info *pipe = file->private_data;
|
|
|
|
__pipe_lock(pipe);
|
|
if (file->f_mode & FMODE_READ)
|
|
pipe->readers--;
|
|
if (file->f_mode & FMODE_WRITE)
|
|
pipe->writers--;
|
|
|
|
/* Was that the last reader or writer, but not the other side? */
|
|
if (!pipe->readers != !pipe->writers) {
|
|
wake_up_interruptible_all(&pipe->rd_wait);
|
|
wake_up_interruptible_all(&pipe->wr_wait);
|
|
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
|
|
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
|
|
}
|
|
__pipe_unlock(pipe);
|
|
|
|
put_pipe_info(inode, pipe);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
pipe_fasync(int fd, struct file *filp, int on)
|
|
{
|
|
struct pipe_inode_info *pipe = filp->private_data;
|
|
int retval = 0;
|
|
|
|
__pipe_lock(pipe);
|
|
if (filp->f_mode & FMODE_READ)
|
|
retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
|
|
if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
|
|
retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
|
|
if (retval < 0 && (filp->f_mode & FMODE_READ))
|
|
/* this can happen only if on == T */
|
|
fasync_helper(-1, filp, 0, &pipe->fasync_readers);
|
|
}
|
|
__pipe_unlock(pipe);
|
|
return retval;
|
|
}
|
|
|
|
unsigned long account_pipe_buffers(struct user_struct *user,
|
|
unsigned long old, unsigned long new)
|
|
{
|
|
return atomic_long_add_return(new - old, &user->pipe_bufs);
|
|
}
|
|
|
|
bool too_many_pipe_buffers_soft(unsigned long user_bufs)
|
|
{
|
|
unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
|
|
|
|
return soft_limit && user_bufs > soft_limit;
|
|
}
|
|
|
|
bool too_many_pipe_buffers_hard(unsigned long user_bufs)
|
|
{
|
|
unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
|
|
|
|
return hard_limit && user_bufs > hard_limit;
|
|
}
|
|
|
|
bool pipe_is_unprivileged_user(void)
|
|
{
|
|
return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
|
|
}
|
|
|
|
struct pipe_inode_info *alloc_pipe_info(void)
|
|
{
|
|
struct pipe_inode_info *pipe;
|
|
unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
|
|
struct user_struct *user = get_current_user();
|
|
unsigned long user_bufs;
|
|
unsigned int max_size = READ_ONCE(pipe_max_size);
|
|
|
|
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
|
|
if (pipe == NULL)
|
|
goto out_free_uid;
|
|
|
|
if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
|
|
pipe_bufs = max_size >> PAGE_SHIFT;
|
|
|
|
user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
|
|
|
|
if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
|
|
user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
|
|
pipe_bufs = PIPE_MIN_DEF_BUFFERS;
|
|
}
|
|
|
|
if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
|
|
goto out_revert_acct;
|
|
|
|
pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
|
|
GFP_KERNEL_ACCOUNT);
|
|
|
|
if (pipe->bufs) {
|
|
init_waitqueue_head(&pipe->rd_wait);
|
|
init_waitqueue_head(&pipe->wr_wait);
|
|
pipe->r_counter = pipe->w_counter = 1;
|
|
pipe->max_usage = pipe_bufs;
|
|
pipe->ring_size = pipe_bufs;
|
|
pipe->nr_accounted = pipe_bufs;
|
|
pipe->user = user;
|
|
mutex_init(&pipe->mutex);
|
|
return pipe;
|
|
}
|
|
|
|
out_revert_acct:
|
|
(void) account_pipe_buffers(user, pipe_bufs, 0);
|
|
kfree(pipe);
|
|
out_free_uid:
|
|
free_uid(user);
|
|
return NULL;
|
|
}
|
|
|
|
void free_pipe_info(struct pipe_inode_info *pipe)
|
|
{
|
|
int i;
|
|
|
|
#ifdef CONFIG_WATCH_QUEUE
|
|
if (pipe->watch_queue)
|
|
watch_queue_clear(pipe->watch_queue);
|
|
#endif
|
|
|
|
(void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
|
|
free_uid(pipe->user);
|
|
for (i = 0; i < pipe->ring_size; i++) {
|
|
struct pipe_buffer *buf = pipe->bufs + i;
|
|
if (buf->ops)
|
|
pipe_buf_release(pipe, buf);
|
|
}
|
|
#ifdef CONFIG_WATCH_QUEUE
|
|
if (pipe->watch_queue)
|
|
put_watch_queue(pipe->watch_queue);
|
|
#endif
|
|
if (pipe->tmp_page)
|
|
__free_page(pipe->tmp_page);
|
|
kfree(pipe->bufs);
|
|
kfree(pipe);
|
|
}
|
|
|
|
static struct vfsmount *pipe_mnt __read_mostly;
|
|
|
|
/*
|
|
* pipefs_dname() is called from d_path().
|
|
*/
|
|
static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
|
|
{
|
|
return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
|
|
d_inode(dentry)->i_ino);
|
|
}
|
|
|
|
static const struct dentry_operations pipefs_dentry_operations = {
|
|
.d_dname = pipefs_dname,
|
|
};
|
|
|
|
static struct inode * get_pipe_inode(void)
|
|
{
|
|
struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
|
|
struct pipe_inode_info *pipe;
|
|
|
|
if (!inode)
|
|
goto fail_inode;
|
|
|
|
inode->i_ino = get_next_ino();
|
|
|
|
pipe = alloc_pipe_info();
|
|
if (!pipe)
|
|
goto fail_iput;
|
|
|
|
inode->i_pipe = pipe;
|
|
pipe->files = 2;
|
|
pipe->readers = pipe->writers = 1;
|
|
inode->i_fop = &pipefifo_fops;
|
|
|
|
/*
|
|
* Mark the inode dirty from the very beginning,
|
|
* that way it will never be moved to the dirty
|
|
* list because "mark_inode_dirty()" will think
|
|
* that it already _is_ on the dirty list.
|
|
*/
|
|
inode->i_state = I_DIRTY;
|
|
inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
|
|
inode->i_uid = current_fsuid();
|
|
inode->i_gid = current_fsgid();
|
|
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
|
|
|
|
return inode;
|
|
|
|
fail_iput:
|
|
iput(inode);
|
|
|
|
fail_inode:
|
|
return NULL;
|
|
}
|
|
|
|
int create_pipe_files(struct file **res, int flags)
|
|
{
|
|
struct inode *inode = get_pipe_inode();
|
|
struct file *f;
|
|
int error;
|
|
|
|
if (!inode)
|
|
return -ENFILE;
|
|
|
|
if (flags & O_NOTIFICATION_PIPE) {
|
|
error = watch_queue_init(inode->i_pipe);
|
|
if (error) {
|
|
free_pipe_info(inode->i_pipe);
|
|
iput(inode);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
f = alloc_file_pseudo(inode, pipe_mnt, "",
|
|
O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
|
|
&pipefifo_fops);
|
|
if (IS_ERR(f)) {
|
|
free_pipe_info(inode->i_pipe);
|
|
iput(inode);
|
|
return PTR_ERR(f);
|
|
}
|
|
|
|
f->private_data = inode->i_pipe;
|
|
|
|
res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
|
|
&pipefifo_fops);
|
|
if (IS_ERR(res[0])) {
|
|
put_pipe_info(inode, inode->i_pipe);
|
|
fput(f);
|
|
return PTR_ERR(res[0]);
|
|
}
|
|
res[0]->private_data = inode->i_pipe;
|
|
res[1] = f;
|
|
stream_open(inode, res[0]);
|
|
stream_open(inode, res[1]);
|
|
return 0;
|
|
}
|
|
|
|
static int __do_pipe_flags(int *fd, struct file **files, int flags)
|
|
{
|
|
int error;
|
|
int fdw, fdr;
|
|
|
|
if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
|
|
return -EINVAL;
|
|
|
|
error = create_pipe_files(files, flags);
|
|
if (error)
|
|
return error;
|
|
|
|
error = get_unused_fd_flags(flags);
|
|
if (error < 0)
|
|
goto err_read_pipe;
|
|
fdr = error;
|
|
|
|
error = get_unused_fd_flags(flags);
|
|
if (error < 0)
|
|
goto err_fdr;
|
|
fdw = error;
|
|
|
|
audit_fd_pair(fdr, fdw);
|
|
fd[0] = fdr;
|
|
fd[1] = fdw;
|
|
return 0;
|
|
|
|
err_fdr:
|
|
put_unused_fd(fdr);
|
|
err_read_pipe:
|
|
fput(files[0]);
|
|
fput(files[1]);
|
|
return error;
|
|
}
|
|
|
|
int do_pipe_flags(int *fd, int flags)
|
|
{
|
|
struct file *files[2];
|
|
int error = __do_pipe_flags(fd, files, flags);
|
|
if (!error) {
|
|
fd_install(fd[0], files[0]);
|
|
fd_install(fd[1], files[1]);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* sys_pipe() is the normal C calling standard for creating
|
|
* a pipe. It's not the way Unix traditionally does this, though.
|
|
*/
|
|
static int do_pipe2(int __user *fildes, int flags)
|
|
{
|
|
struct file *files[2];
|
|
int fd[2];
|
|
int error;
|
|
|
|
error = __do_pipe_flags(fd, files, flags);
|
|
if (!error) {
|
|
if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
|
|
fput(files[0]);
|
|
fput(files[1]);
|
|
put_unused_fd(fd[0]);
|
|
put_unused_fd(fd[1]);
|
|
error = -EFAULT;
|
|
} else {
|
|
fd_install(fd[0], files[0]);
|
|
fd_install(fd[1], files[1]);
|
|
}
|
|
}
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
|
|
{
|
|
return do_pipe2(fildes, flags);
|
|
}
|
|
|
|
SYSCALL_DEFINE1(pipe, int __user *, fildes)
|
|
{
|
|
return do_pipe2(fildes, 0);
|
|
}
|
|
|
|
/*
|
|
* This is the stupid "wait for pipe to be readable or writable"
|
|
* model.
|
|
*
|
|
* See pipe_read/write() for the proper kind of exclusive wait,
|
|
* but that requires that we wake up any other readers/writers
|
|
* if we then do not end up reading everything (ie the whole
|
|
* "wake_next_reader/writer" logic in pipe_read/write()).
|
|
*/
|
|
void pipe_wait_readable(struct pipe_inode_info *pipe)
|
|
{
|
|
pipe_unlock(pipe);
|
|
wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
|
|
pipe_lock(pipe);
|
|
}
|
|
|
|
void pipe_wait_writable(struct pipe_inode_info *pipe)
|
|
{
|
|
pipe_unlock(pipe);
|
|
wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
|
|
pipe_lock(pipe);
|
|
}
|
|
|
|
/*
|
|
* This depends on both the wait (here) and the wakeup (wake_up_partner)
|
|
* holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
|
|
* race with the count check and waitqueue prep.
|
|
*
|
|
* Normally in order to avoid races, you'd do the prepare_to_wait() first,
|
|
* then check the condition you're waiting for, and only then sleep. But
|
|
* because of the pipe lock, we can check the condition before being on
|
|
* the wait queue.
|
|
*
|
|
* We use the 'rd_wait' waitqueue for pipe partner waiting.
|
|
*/
|
|
static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
|
|
{
|
|
DEFINE_WAIT(rdwait);
|
|
int cur = *cnt;
|
|
|
|
while (cur == *cnt) {
|
|
prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
|
|
pipe_unlock(pipe);
|
|
schedule();
|
|
finish_wait(&pipe->rd_wait, &rdwait);
|
|
pipe_lock(pipe);
|
|
if (signal_pending(current))
|
|
break;
|
|
}
|
|
return cur == *cnt ? -ERESTARTSYS : 0;
|
|
}
|
|
|
|
static void wake_up_partner(struct pipe_inode_info *pipe)
|
|
{
|
|
wake_up_interruptible_all(&pipe->rd_wait);
|
|
}
|
|
|
|
static int fifo_open(struct inode *inode, struct file *filp)
|
|
{
|
|
struct pipe_inode_info *pipe;
|
|
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
|
|
int ret;
|
|
|
|
filp->f_version = 0;
|
|
|
|
spin_lock(&inode->i_lock);
|
|
if (inode->i_pipe) {
|
|
pipe = inode->i_pipe;
|
|
pipe->files++;
|
|
spin_unlock(&inode->i_lock);
|
|
} else {
|
|
spin_unlock(&inode->i_lock);
|
|
pipe = alloc_pipe_info();
|
|
if (!pipe)
|
|
return -ENOMEM;
|
|
pipe->files = 1;
|
|
spin_lock(&inode->i_lock);
|
|
if (unlikely(inode->i_pipe)) {
|
|
inode->i_pipe->files++;
|
|
spin_unlock(&inode->i_lock);
|
|
free_pipe_info(pipe);
|
|
pipe = inode->i_pipe;
|
|
} else {
|
|
inode->i_pipe = pipe;
|
|
spin_unlock(&inode->i_lock);
|
|
}
|
|
}
|
|
filp->private_data = pipe;
|
|
/* OK, we have a pipe and it's pinned down */
|
|
|
|
__pipe_lock(pipe);
|
|
|
|
/* We can only do regular read/write on fifos */
|
|
stream_open(inode, filp);
|
|
|
|
switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
|
|
case FMODE_READ:
|
|
/*
|
|
* O_RDONLY
|
|
* POSIX.1 says that O_NONBLOCK means return with the FIFO
|
|
* opened, even when there is no process writing the FIFO.
|
|
*/
|
|
pipe->r_counter++;
|
|
if (pipe->readers++ == 0)
|
|
wake_up_partner(pipe);
|
|
|
|
if (!is_pipe && !pipe->writers) {
|
|
if ((filp->f_flags & O_NONBLOCK)) {
|
|
/* suppress EPOLLHUP until we have
|
|
* seen a writer */
|
|
filp->f_version = pipe->w_counter;
|
|
} else {
|
|
if (wait_for_partner(pipe, &pipe->w_counter))
|
|
goto err_rd;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case FMODE_WRITE:
|
|
/*
|
|
* O_WRONLY
|
|
* POSIX.1 says that O_NONBLOCK means return -1 with
|
|
* errno=ENXIO when there is no process reading the FIFO.
|
|
*/
|
|
ret = -ENXIO;
|
|
if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
|
|
goto err;
|
|
|
|
pipe->w_counter++;
|
|
if (!pipe->writers++)
|
|
wake_up_partner(pipe);
|
|
|
|
if (!is_pipe && !pipe->readers) {
|
|
if (wait_for_partner(pipe, &pipe->r_counter))
|
|
goto err_wr;
|
|
}
|
|
break;
|
|
|
|
case FMODE_READ | FMODE_WRITE:
|
|
/*
|
|
* O_RDWR
|
|
* POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
|
|
* This implementation will NEVER block on a O_RDWR open, since
|
|
* the process can at least talk to itself.
|
|
*/
|
|
|
|
pipe->readers++;
|
|
pipe->writers++;
|
|
pipe->r_counter++;
|
|
pipe->w_counter++;
|
|
if (pipe->readers == 1 || pipe->writers == 1)
|
|
wake_up_partner(pipe);
|
|
break;
|
|
|
|
default:
|
|
ret = -EINVAL;
|
|
goto err;
|
|
}
|
|
|
|
/* Ok! */
|
|
__pipe_unlock(pipe);
|
|
return 0;
|
|
|
|
err_rd:
|
|
if (!--pipe->readers)
|
|
wake_up_interruptible(&pipe->wr_wait);
|
|
ret = -ERESTARTSYS;
|
|
goto err;
|
|
|
|
err_wr:
|
|
if (!--pipe->writers)
|
|
wake_up_interruptible_all(&pipe->rd_wait);
|
|
ret = -ERESTARTSYS;
|
|
goto err;
|
|
|
|
err:
|
|
__pipe_unlock(pipe);
|
|
|
|
put_pipe_info(inode, pipe);
|
|
return ret;
|
|
}
|
|
|
|
const struct file_operations pipefifo_fops = {
|
|
.open = fifo_open,
|
|
.llseek = no_llseek,
|
|
.read_iter = pipe_read,
|
|
.write_iter = pipe_write,
|
|
.poll = pipe_poll,
|
|
.unlocked_ioctl = pipe_ioctl,
|
|
.release = pipe_release,
|
|
.fasync = pipe_fasync,
|
|
.splice_write = iter_file_splice_write,
|
|
};
|
|
|
|
/*
|
|
* Currently we rely on the pipe array holding a power-of-2 number
|
|
* of pages. Returns 0 on error.
|
|
*/
|
|
unsigned int round_pipe_size(unsigned long size)
|
|
{
|
|
if (size > (1U << 31))
|
|
return 0;
|
|
|
|
/* Minimum pipe size, as required by POSIX */
|
|
if (size < PAGE_SIZE)
|
|
return PAGE_SIZE;
|
|
|
|
return roundup_pow_of_two(size);
|
|
}
|
|
|
|
/*
|
|
* Resize the pipe ring to a number of slots.
|
|
*
|
|
* Note the pipe can be reduced in capacity, but only if the current
|
|
* occupancy doesn't exceed nr_slots; if it does, EBUSY will be
|
|
* returned instead.
|
|
*/
|
|
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
|
|
{
|
|
struct pipe_buffer *bufs;
|
|
unsigned int head, tail, mask, n;
|
|
|
|
bufs = kcalloc(nr_slots, sizeof(*bufs),
|
|
GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
|
|
if (unlikely(!bufs))
|
|
return -ENOMEM;
|
|
|
|
spin_lock_irq(&pipe->rd_wait.lock);
|
|
mask = pipe->ring_size - 1;
|
|
head = pipe->head;
|
|
tail = pipe->tail;
|
|
|
|
n = pipe_occupancy(head, tail);
|
|
if (nr_slots < n) {
|
|
spin_unlock_irq(&pipe->rd_wait.lock);
|
|
kfree(bufs);
|
|
return -EBUSY;
|
|
}
|
|
|
|
/*
|
|
* The pipe array wraps around, so just start the new one at zero
|
|
* and adjust the indices.
|
|
*/
|
|
if (n > 0) {
|
|
unsigned int h = head & mask;
|
|
unsigned int t = tail & mask;
|
|
if (h > t) {
|
|
memcpy(bufs, pipe->bufs + t,
|
|
n * sizeof(struct pipe_buffer));
|
|
} else {
|
|
unsigned int tsize = pipe->ring_size - t;
|
|
if (h > 0)
|
|
memcpy(bufs + tsize, pipe->bufs,
|
|
h * sizeof(struct pipe_buffer));
|
|
memcpy(bufs, pipe->bufs + t,
|
|
tsize * sizeof(struct pipe_buffer));
|
|
}
|
|
}
|
|
|
|
head = n;
|
|
tail = 0;
|
|
|
|
kfree(pipe->bufs);
|
|
pipe->bufs = bufs;
|
|
pipe->ring_size = nr_slots;
|
|
if (pipe->max_usage > nr_slots)
|
|
pipe->max_usage = nr_slots;
|
|
pipe->tail = tail;
|
|
pipe->head = head;
|
|
|
|
if (!pipe_has_watch_queue(pipe)) {
|
|
pipe->max_usage = nr_slots;
|
|
pipe->nr_accounted = nr_slots;
|
|
}
|
|
|
|
spin_unlock_irq(&pipe->rd_wait.lock);
|
|
|
|
/* This might have made more room for writers */
|
|
wake_up_interruptible(&pipe->wr_wait);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Allocate a new array of pipe buffers and copy the info over. Returns the
|
|
* pipe size if successful, or return -ERROR on error.
|
|
*/
|
|
static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
|
|
{
|
|
unsigned long user_bufs;
|
|
unsigned int nr_slots, size;
|
|
long ret = 0;
|
|
|
|
if (pipe_has_watch_queue(pipe))
|
|
return -EBUSY;
|
|
|
|
size = round_pipe_size(arg);
|
|
nr_slots = size >> PAGE_SHIFT;
|
|
|
|
if (!nr_slots)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* If trying to increase the pipe capacity, check that an
|
|
* unprivileged user is not trying to exceed various limits
|
|
* (soft limit check here, hard limit check just below).
|
|
* Decreasing the pipe capacity is always permitted, even
|
|
* if the user is currently over a limit.
|
|
*/
|
|
if (nr_slots > pipe->max_usage &&
|
|
size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
|
|
return -EPERM;
|
|
|
|
user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
|
|
|
|
if (nr_slots > pipe->max_usage &&
|
|
(too_many_pipe_buffers_hard(user_bufs) ||
|
|
too_many_pipe_buffers_soft(user_bufs)) &&
|
|
pipe_is_unprivileged_user()) {
|
|
ret = -EPERM;
|
|
goto out_revert_acct;
|
|
}
|
|
|
|
ret = pipe_resize_ring(pipe, nr_slots);
|
|
if (ret < 0)
|
|
goto out_revert_acct;
|
|
|
|
return pipe->max_usage * PAGE_SIZE;
|
|
|
|
out_revert_acct:
|
|
(void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
|
|
* location, so checking ->i_pipe is not enough to verify that this is a
|
|
* pipe.
|
|
*/
|
|
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
|
|
{
|
|
struct pipe_inode_info *pipe = file->private_data;
|
|
|
|
if (file->f_op != &pipefifo_fops || !pipe)
|
|
return NULL;
|
|
if (for_splice && pipe_has_watch_queue(pipe))
|
|
return NULL;
|
|
return pipe;
|
|
}
|
|
|
|
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct pipe_inode_info *pipe;
|
|
long ret;
|
|
|
|
pipe = get_pipe_info(file, false);
|
|
if (!pipe)
|
|
return -EBADF;
|
|
|
|
__pipe_lock(pipe);
|
|
|
|
switch (cmd) {
|
|
case F_SETPIPE_SZ:
|
|
ret = pipe_set_size(pipe, arg);
|
|
break;
|
|
case F_GETPIPE_SZ:
|
|
ret = pipe->max_usage * PAGE_SIZE;
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
__pipe_unlock(pipe);
|
|
return ret;
|
|
}
|
|
|
|
static const struct super_operations pipefs_ops = {
|
|
.destroy_inode = free_inode_nonrcu,
|
|
.statfs = simple_statfs,
|
|
};
|
|
|
|
/*
|
|
* pipefs should _never_ be mounted by userland - too much of security hassle,
|
|
* no real gain from having the whole whorehouse mounted. So we don't need
|
|
* any operations on the root directory. However, we need a non-trivial
|
|
* d_name - pipe: will go nicely and kill the special-casing in procfs.
|
|
*/
|
|
|
|
static int pipefs_init_fs_context(struct fs_context *fc)
|
|
{
|
|
struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
|
|
if (!ctx)
|
|
return -ENOMEM;
|
|
ctx->ops = &pipefs_ops;
|
|
ctx->dops = &pipefs_dentry_operations;
|
|
return 0;
|
|
}
|
|
|
|
static struct file_system_type pipe_fs_type = {
|
|
.name = "pipefs",
|
|
.init_fs_context = pipefs_init_fs_context,
|
|
.kill_sb = kill_anon_super,
|
|
};
|
|
|
|
static int __init init_pipe_fs(void)
|
|
{
|
|
int err = register_filesystem(&pipe_fs_type);
|
|
|
|
if (!err) {
|
|
pipe_mnt = kern_mount(&pipe_fs_type);
|
|
if (IS_ERR(pipe_mnt)) {
|
|
err = PTR_ERR(pipe_mnt);
|
|
unregister_filesystem(&pipe_fs_type);
|
|
}
|
|
}
|
|
return err;
|
|
}
|
|
|
|
fs_initcall(init_pipe_fs);
|