-----BEGIN PGP SIGNATURE-----
iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAl5Cn6wACgkQONu9yGCS
aT789BAAkpzYCCHEL2aqDpnZQdu1kVua2nywEJCY0WqSM1lWLeU1Lk9EvS6uu99B
nHnIgoAGXR1zQy9rlhpKKt62LvCCM94QWlQRDYYeJxbFPn1ogT2/0vmwN7rqNz4M
Jdszd6gfNKtB3zpZZLJ0KXG8q6YRp5kXOHEzOXNjcVsfKRuNTWWIBV0dMmkCzduQ
Y5e62+d1FnnRFj28R7wjJfXiZSRnIGcMHohcQGXsWZsh2rktYOYsL6G37I9lCBwx
RO7/+qVOT+BImqB5fIxB98JOzOlo6uEVqPgXjMHAAZUzzA4KpOkDBn55m5hA9axf
EG67Ft4vZJc6Q3FTtHdSZZ/x6TBAJ2DUzatpKhCTDB3vlWJ6a+CsTFq3dXj4+bFr
hFuyi0u91VeudmWR8IH5Er8QaNaOq8m72XAwK22fZptZz0ZHl+Bf1QZyEY0L0P2Q
DpT/kmZVgSSDusvMtJOwI8Vr4Ibb8o46kFTQN+PCSs0pbPchEJmInHz0mIypK89N
4YIjcDZZu3WUS13pEsgNAi2FEpwZdn32LYxZg8xTYBtovzuvT1pJUEppiVSMXgKS
8vF6oCAd7pX9Fal5fYklA7gyQENnHBFI+LE+bHwMR/qwreH/3wBTLnhRPsGOxyZI
oj57YDdxZCAwEfXGoWA3Le+60lj6bGuRfmCc4VkodaOxMLb1WrE=
=rUtE
-----END PGP SIGNATURE-----
Merge 5.4.19 into android-5.4
Changes in 5.4.19
sparc32: fix struct ipc64_perm type definition
bnxt_en: Move devlink_register before registering netdev
cls_rsvp: fix rsvp_policy
gtp: use __GFP_NOWARN to avoid memalloc warning
l2tp: Allow duplicate session creation with UDP
net: hsr: fix possible NULL deref in hsr_handle_frame()
net_sched: fix an OOB access in cls_tcindex
net: stmmac: Delete txtimer in suspend()
bnxt_en: Fix TC queue mapping.
rxrpc: Fix use-after-free in rxrpc_put_local()
rxrpc: Fix insufficient receive notification generation
rxrpc: Fix missing active use pinning of rxrpc_local object
rxrpc: Fix NULL pointer deref due to call->conn being cleared on disconnect
tcp: clear tp->total_retrans in tcp_disconnect()
tcp: clear tp->delivered in tcp_disconnect()
tcp: clear tp->data_segs{in|out} in tcp_disconnect()
tcp: clear tp->segs_{in|out} in tcp_disconnect()
ionic: fix rxq comp packet type mask
MAINTAINERS: correct entries for ISDN/mISDN section
netdevsim: fix stack-out-of-bounds in nsim_dev_debugfs_init()
bnxt_en: Fix logic that disables Bus Master during firmware reset.
media: uvcvideo: Avoid cyclic entity chains due to malformed USB descriptors
mfd: dln2: More sanity checking for endpoints
netfilter: ipset: fix suspicious RCU usage in find_set_and_id
ipc/msg.c: consolidate all xxxctl_down() functions
tracing/kprobes: Have uname use __get_str() in print_fmt
tracing: Fix sched switch start/stop refcount racy updates
rcu: Use *_ONCE() to protect lockless ->expmask accesses
rcu: Avoid data-race in rcu_gp_fqs_check_wake()
srcu: Apply *_ONCE() to ->srcu_last_gp_end
rcu: Use READ_ONCE() for ->expmask in rcu_read_unlock_special()
nvmet: Fix error print message at nvmet_install_queue function
nvmet: Fix controller use after free
Bluetooth: btusb: fix memory leak on fw
Bluetooth: btusb: Disable runtime suspend on Realtek devices
brcmfmac: Fix memory leak in brcmf_usbdev_qinit
usb: dwc3: gadget: Check END_TRANSFER completion
usb: dwc3: gadget: Delay starting transfer
usb: typec: tcpci: mask event interrupts when remove driver
objtool: Silence build output
usb: gadget: f_fs: set req->num_sgs as 0 for non-sg transfer
usb: gadget: legacy: set max_speed to super-speed
usb: gadget: f_ncm: Use atomic_t to track in-flight request
usb: gadget: f_ecm: Use atomic_t to track in-flight request
ALSA: usb-audio: Fix endianess in descriptor validation
ALSA: usb-audio: Annotate endianess in Scarlett gen2 quirk
ALSA: dummy: Fix PCM format loop in proc output
memcg: fix a crash in wb_workfn when a device disappears
mm/sparse.c: reset section's mem_map when fully deactivated
mmc: sdhci-pci: Make function amd_sdhci_reset static
utimes: Clamp the timestamps in notify_change()
mm/memory_hotplug: fix remove_memory() lockdep splat
mm: thp: don't need care deferred split queue in memcg charge move path
mm: move_pages: report the number of non-attempted pages
media/v4l2-core: set pages dirty upon releasing DMA buffers
media: v4l2-core: compat: ignore native command codes
media: v4l2-rect.h: fix v4l2_rect_map_inside() top/left adjustments
lib/test_kasan.c: fix memory leak in kmalloc_oob_krealloc_more()
irqdomain: Fix a memory leak in irq_domain_push_irq()
x86/cpu: Update cached HLE state on write to TSX_CTRL_CPUID_CLEAR
platform/x86: intel_scu_ipc: Fix interrupt support
ALSA: hda: Apply aligned MMIO access only conditionally
ALSA: hda: Add Clevo W65_67SB the power_save blacklist
ALSA: hda: Add JasperLake PCI ID and codec vid
arm64: acpi: fix DAIF manipulation with pNMI
KVM: arm64: Correct PSTATE on exception entry
KVM: arm/arm64: Correct CPSR on exception entry
KVM: arm/arm64: Correct AArch32 SPSR on exception entry
KVM: arm64: Only sign-extend MMIO up to register width
MIPS: syscalls: fix indentation of the 'SYSNR' message
MIPS: fix indentation of the 'RELOCS' message
MIPS: boot: fix typo in 'vmlinux.lzma.its' target
s390/mm: fix dynamic pagetable upgrade for hugetlbfs
powerpc/mmu_gather: enable RCU_TABLE_FREE even for !SMP case
powerpc/ptdump: Fix W+X verification
powerpc/xmon: don't access ASDR in VMs
powerpc/pseries: Advance pfn if section is not present in lmb_is_removable()
powerpc/32s: Fix bad_kuap_fault()
powerpc/32s: Fix CPU wake-up from sleep mode
tracing: Fix now invalid var_ref_vals assumption in trace action
PCI: tegra: Fix return value check of pm_runtime_get_sync()
PCI: keystone: Fix outbound region mapping
PCI: keystone: Fix link training retries initiation
PCI: keystone: Fix error handling when "num-viewport" DT property is not populated
mmc: spi: Toggle SPI polarity, do not hardcode it
ACPI: video: Do not export a non working backlight interface on MSI MS-7721 boards
ACPI / battery: Deal with design or full capacity being reported as -1
ACPI / battery: Use design-cap for capacity calculations if full-cap is not available
ACPI / battery: Deal better with neither design nor full capacity not being reported
alarmtimer: Unregister wakeup source when module get fails
fscrypt: don't print name of busy file when removing key
ubifs: don't trigger assertion on invalid no-key filename
ubifs: Fix wrong memory allocation
ubifs: Fix FS_IOC_SETFLAGS unexpectedly clearing encrypt flag
ubifs: Fix deadlock in concurrent bulk-read and writepage
mmc: sdhci-of-at91: fix memleak on clk_get failure
ASoC: SOF: core: free trace on errors
hv_balloon: Balloon up according to request page number
mfd: axp20x: Mark AXP20X_VBUS_IPSOUT_MGMT as volatile
nvmem: core: fix memory abort in cleanup path
crypto: api - Check spawn->alg under lock in crypto_drop_spawn
crypto: ccree - fix backlog memory leak
crypto: ccree - fix AEAD decrypt auth fail
crypto: ccree - fix pm wrongful error reporting
crypto: ccree - fix FDE descriptor sequence
crypto: ccree - fix PM race condition
padata: Remove broken queue flushing
fs: allow deduplication of eof block into the end of the destination file
scripts/find-unused-docs: Fix massive false positives
erofs: fix out-of-bound read for shifted uncompressed block
scsi: megaraid_sas: Do not initiate OCR if controller is not in ready state
scsi: qla2xxx: Fix mtcp dump collection failure
cpupower: Revert library ABI changes from commit ae2917093f
power: supply: axp20x_ac_power: Fix reporting online status
power: supply: ltc2941-battery-gauge: fix use-after-free
ovl: fix wrong WARN_ON() in ovl_cache_update_ino()
ovl: fix lseek overflow on 32bit
f2fs: choose hardlimit when softlimit is larger than hardlimit in f2fs_statfs_project()
f2fs: fix miscounted block limit in f2fs_statfs_project()
f2fs: code cleanup for f2fs_statfs_project()
f2fs: fix dcache lookup of !casefolded directories
f2fs: fix race conditions in ->d_compare() and ->d_hash()
PM: core: Fix handling of devices deleted during system-wide resume
cpufreq: Avoid creating excessively large stack frames
of: Add OF_DMA_DEFAULT_COHERENT & select it on powerpc
ARM: dma-api: fix max_pfn off-by-one error in __dma_supported()
dm zoned: support zone sizes smaller than 128MiB
dm space map common: fix to ensure new block isn't already in use
dm writecache: fix incorrect flush sequence when doing SSD mode commit
dm crypt: fix GFP flags passed to skcipher_request_alloc()
dm crypt: fix benbi IV constructor crash if used in authenticated mode
dm thin metadata: use pool locking at end of dm_pool_metadata_close
dm: fix potential for q->make_request_fn NULL pointer
scsi: qla2xxx: Fix stuck login session using prli_pend_timer
ASoC: SOF: Introduce state machine for FW boot
ASoC: SOF: core: release resources on errors in probe_continue
tracing: Annotate ftrace_graph_hash pointer with __rcu
tracing: Annotate ftrace_graph_notrace_hash pointer with __rcu
ftrace: Add comment to why rcu_dereference_sched() is open coded
ftrace: Protect ftrace_graph_hash with ftrace_sync
crypto: pcrypt - Avoid deadlock by using per-instance padata queues
btrfs: fix improper setting of scanned for range cyclic write cache pages
btrfs: Handle another split brain scenario with metadata uuid feature
riscv, bpf: Fix broken BPF tail calls
selftests/bpf: Fix perf_buffer test on systems w/ offline CPUs
bpf, devmap: Pass lockdep expression to RCU lists
libbpf: Fix realloc usage in bpf_core_find_cands
tc-testing: fix eBPF tests failure on linux fresh clones
samples/bpf: Don't try to remove user's homedir on clean
samples/bpf: Xdp_redirect_cpu fix missing tracepoint attach
selftests/bpf: Fix test_attach_probe
selftests/bpf: Skip perf hw events test if the setup disabled it
selftests: bpf: Use a temporary file in test_sockmap
selftests: bpf: Ignore FIN packets for reuseport tests
crypto: api - fix unexpectedly getting generic implementation
crypto: hisilicon - Use the offset fields in sqe to avoid need to split scatterlists
crypto: ccp - set max RSA modulus size for v3 platform devices as well
crypto: arm64/ghash-neon - bump priority to 150
crypto: pcrypt - Do not clear MAY_SLEEP flag in original request
crypto: atmel-aes - Fix counter overflow in CTR mode
crypto: api - Fix race condition in crypto_spawn_alg
crypto: picoxcell - adjust the position of tasklet_init and fix missed tasklet_kill
powerpc/futex: Fix incorrect user access blocking
scsi: qla2xxx: Fix unbound NVME response length
NFS: Fix memory leaks and corruption in readdir
NFS: Directory page cache pages need to be locked when read
nfsd: fix filecache lookup
jbd2_seq_info_next should increase position index
ext4: fix deadlock allocating crypto bounce page from mempool
ext4: fix race conditions in ->d_compare() and ->d_hash()
Btrfs: fix missing hole after hole punching and fsync when using NO_HOLES
Btrfs: make deduplication with range including the last block work
Btrfs: fix infinite loop during fsync after rename operations
btrfs: set trans->drity in btrfs_commit_transaction
btrfs: drop log root for dropped roots
Btrfs: fix race between adding and putting tree mod seq elements and nodes
btrfs: flush write bio if we loop in extent_write_cache_pages
btrfs: Correctly handle empty trees in find_first_clear_extent_bit
ARM: tegra: Enable PLLP bypass during Tegra124 LP1
iwlwifi: don't throw error when trying to remove IGTK
mwifiex: fix unbalanced locking in mwifiex_process_country_ie()
sunrpc: expiry_time should be seconds not timeval
gfs2: fix gfs2_find_jhead that returns uninitialized jhead with seq 0
gfs2: move setting current->backing_dev_info
gfs2: fix O_SYNC write handling
drm: atmel-hlcdc: use double rate for pixel clock only if supported
drm: atmel-hlcdc: enable clock before configuring timing engine
drm: atmel-hlcdc: prefer a lower pixel-clock than requested
drm/rect: Avoid division by zero
media: iguanair: fix endpoint sanity check
media: rc: ensure lirc is initialized before registering input device
tools/kvm_stat: Fix kvm_exit filter name
xen/balloon: Support xend-based toolstack take two
watchdog: fix UAF in reboot notifier handling in watchdog core code
bcache: add readahead cache policy options via sysfs interface
eventfd: track eventfd_signal() recursion depth
aio: prevent potential eventfd recursion on poll
KVM: x86: Refactor picdev_write() to prevent Spectre-v1/L1TF attacks
KVM: x86: Refactor prefix decoding to prevent Spectre-v1/L1TF attacks
KVM: x86: Protect pmu_intel.c from Spectre-v1/L1TF attacks
KVM: x86: Protect DR-based index computations from Spectre-v1/L1TF attacks
KVM: x86: Protect kvm_lapic_reg_write() from Spectre-v1/L1TF attacks
KVM: x86: Protect kvm_hv_msr_[get|set]_crash_data() from Spectre-v1/L1TF attacks
KVM: x86: Protect ioapic_write_indirect() from Spectre-v1/L1TF attacks
KVM: x86: Protect MSR-based index computations in pmu.h from Spectre-v1/L1TF attacks
KVM: x86: Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks
KVM: x86: Protect MSR-based index computations from Spectre-v1/L1TF attacks in x86.c
KVM: x86: Protect x86_decode_insn from Spectre-v1/L1TF attacks
KVM: x86: Protect MSR-based index computations in fixed_msr_to_seg_unit() from Spectre-v1/L1TF attacks
KVM: x86: Fix potential put_fpu() w/o load_fpu() on MPX platform
KVM: PPC: Book3S HV: Uninit vCPU if vcore creation fails
KVM: PPC: Book3S PR: Free shared page if mmu initialization fails
kvm/svm: PKU not currently supported
x86/kvm: Be careful not to clear KVM_VCPU_FLUSH_TLB bit
x86/kvm: Introduce kvm_(un)map_gfn()
x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed
x86/kvm: Cache gfn to pfn translation
x86/KVM: Clean up host's steal time structure
KVM: VMX: Add non-canonical check on writes to RTIT address MSRs
KVM: x86: Don't let userspace set host-reserved cr4 bits
KVM: x86: Free wbinvd_dirty_mask if vCPU creation fails
KVM: x86: Handle TIF_NEED_FPU_LOAD in kvm_{load,put}_guest_fpu()
KVM: x86: Ensure guest's FPU state is loaded when accessing for emulation
KVM: x86: Revert "KVM: X86: Fix fpu state crash in kvm guest"
KVM: s390: do not clobber registers during guest reset/store status
ocfs2: fix oops when writing cloned file
mm/page_alloc.c: fix uninitialized memmaps on a partially populated last section
arm64: dts: qcom: qcs404-evb: Set vdd_apc regulator in high power mode
mm/mmu_gather: invalidate TLB correctly on batch allocation failure and flush
clk: tegra: Mark fuse clock as critical
drm/amd/dm/mst: Ignore payload update failures
virtio-balloon: initialize all vq callbacks
virtio-pci: check name when counting MSI-X vectors
fix up iter on short count in fuse_direct_io()
broken ping to ipv6 linklocal addresses on debian buster
percpu: Separate decrypted varaibles anytime encryption can be enabled
ASoC: meson: axg-fifo: fix fifo threshold setup
scsi: qla2xxx: Fix the endianness of the qla82xx_get_fw_size() return type
scsi: csiostor: Adjust indentation in csio_device_reset
scsi: qla4xxx: Adjust indentation in qla4xxx_mem_free
scsi: ufs: Recheck bkops level if bkops is disabled
mtd: spi-nor: Split mt25qu512a (n25q512a) entry into two
phy: qualcomm: Adjust indentation in read_poll_timeout
ext2: Adjust indentation in ext2_fill_super
powerpc/44x: Adjust indentation in ibm4xx_denali_fixup_memsize
drm: msm: mdp4: Adjust indentation in mdp4_dsi_encoder_enable
NFC: pn544: Adjust indentation in pn544_hci_check_presence
ppp: Adjust indentation into ppp_async_input
net: smc911x: Adjust indentation in smc911x_phy_configure
net: tulip: Adjust indentation in {dmfe, uli526x}_init_module
IB/mlx5: Fix outstanding_pi index for GSI qps
IB/core: Fix ODP get user pages flow
nfsd: fix delay timer on 32-bit architectures
nfsd: fix jiffies/time_t mixup in LRU list
nfsd: Return the correct number of bytes written to the file
virtio-balloon: Fix memory leak when unloading while hinting is in progress
virtio_balloon: Fix memory leaks on errors in virtballoon_probe()
ubi: fastmap: Fix inverted logic in seen selfcheck
ubi: Fix an error pointer dereference in error handling code
ubifs: Fix memory leak from c->sup_node
regulator: core: Add regulator_is_equal() helper
ASoC: sgtl5000: Fix VDDA and VDDIO comparison
bonding/alb: properly access headers in bond_alb_xmit()
devlink: report 0 after hitting end in region read
dpaa_eth: support all modes with rate adapting PHYs
net: dsa: b53: Always use dev->vlan_enabled in b53_configure_vlan()
net: dsa: bcm_sf2: Only 7278 supports 2Gb/sec IMP port
net: dsa: microchip: enable module autoprobe
net: mvneta: move rx_dropped and rx_errors in per-cpu stats
net_sched: fix a resource leak in tcindex_set_parms()
net: stmmac: fix a possible endless loop
net: systemport: Avoid RBUF stuck in Wake-on-LAN mode
net/mlx5: IPsec, Fix esp modify function attribute
net/mlx5: IPsec, fix memory leak at mlx5_fpga_ipsec_delete_sa_ctx
net: macb: Remove unnecessary alignment check for TSO
net: macb: Limit maximum GEM TX length in TSO
taprio: Fix enabling offload with wrong number of traffic classes
taprio: Fix still allowing changing the flags during runtime
taprio: Add missing policy validation for flags
taprio: Use taprio_reset_tc() to reset Traffic Classes configuration
taprio: Fix dropping packets when using taprio + ETF offloading
ipv6/addrconf: fix potential NULL deref in inet6_set_link_af()
qed: Fix timestamping issue for L2 unicast ptp packets.
drop_monitor: Do not cancel uninitialized work item
net/mlx5: Fix deadlock in fs_core
net/mlx5: Deprecate usage of generic TLS HW capability bit
ASoC: Intel: skl_hda_dsp_common: Fix global-out-of-bounds bug
mfd: da9062: Fix watchdog compatible string
mfd: rn5t618: Mark ADC control register volatile
mfd: bd70528: Fix hour register mask
x86/timer: Don't skip PIT setup when APIC is disabled or in legacy mode
btrfs: use bool argument in free_root_pointers()
btrfs: free block groups after free'ing fs trees
drm/dp_mst: Remove VCPI while disabling topology mgr
KVM: x86/mmu: Apply max PA check for MMIO sptes to 32-bit KVM
KVM: x86: use CPUID to locate host page table reserved bits
KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM
KVM: x86: fix overlap between SPTE_MMIO_MASK and generation
KVM: nVMX: vmread should not set rflags to specify success in case of #PF
KVM: Use vcpu-specific gva->hva translation when querying host page size
KVM: Play nice with read-only memslots when querying host page size
cifs: fail i/o on soft mounts if sessionsetup errors out
x86/apic/msi: Plug non-maskable MSI affinity race
clocksource: Prevent double add_timer_on() for watchdog_timer
perf/core: Fix mlock accounting in perf_mmap()
rxrpc: Fix service call disconnection
regulator fix for "regulator: core: Add regulator_is_equal() helper"
powerpc/kuap: Fix set direction in allow/prevent_user_access()
Linux 5.4.19
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ief6bae336b8e6931810e5b357c0d5e16fbf1c13e
544 lines
14 KiB
C
544 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* linux/fs/ext4/page-io.c
|
|
*
|
|
* This contains the new page_io functions for ext4
|
|
*
|
|
* Written by Theodore Ts'o, 2010.
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/time.h>
|
|
#include <linux/highuid.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/quotaops.h>
|
|
#include <linux/string.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/pagevec.h>
|
|
#include <linux/mpage.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/uio.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/backing-dev.h>
|
|
|
|
#include "ext4_jbd2.h"
|
|
#include "xattr.h"
|
|
#include "acl.h"
|
|
|
|
static struct kmem_cache *io_end_cachep;
|
|
|
|
int __init ext4_init_pageio(void)
|
|
{
|
|
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
|
|
if (io_end_cachep == NULL)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
void ext4_exit_pageio(void)
|
|
{
|
|
kmem_cache_destroy(io_end_cachep);
|
|
}
|
|
|
|
/*
|
|
* Print an buffer I/O error compatible with the fs/buffer.c. This
|
|
* provides compatibility with dmesg scrapers that look for a specific
|
|
* buffer I/O error message. We really need a unified error reporting
|
|
* structure to userspace ala Digital Unix's uerf system, but it's
|
|
* probably not going to happen in my lifetime, due to LKML politics...
|
|
*/
|
|
static void buffer_io_error(struct buffer_head *bh)
|
|
{
|
|
printk_ratelimited(KERN_ERR "Buffer I/O error on device %pg, logical block %llu\n",
|
|
bh->b_bdev,
|
|
(unsigned long long)bh->b_blocknr);
|
|
}
|
|
|
|
static void ext4_finish_bio(struct bio *bio)
|
|
{
|
|
struct bio_vec *bvec;
|
|
struct bvec_iter_all iter_all;
|
|
|
|
bio_for_each_segment_all(bvec, bio, iter_all) {
|
|
struct page *page = bvec->bv_page;
|
|
struct page *bounce_page = NULL;
|
|
struct buffer_head *bh, *head;
|
|
unsigned bio_start = bvec->bv_offset;
|
|
unsigned bio_end = bio_start + bvec->bv_len;
|
|
unsigned under_io = 0;
|
|
unsigned long flags;
|
|
|
|
if (!page)
|
|
continue;
|
|
|
|
if (fscrypt_is_bounce_page(page)) {
|
|
bounce_page = page;
|
|
page = fscrypt_pagecache_page(bounce_page);
|
|
}
|
|
|
|
if (bio->bi_status) {
|
|
SetPageError(page);
|
|
mapping_set_error(page->mapping, -EIO);
|
|
}
|
|
bh = head = page_buffers(page);
|
|
/*
|
|
* We check all buffers in the page under BH_Uptodate_Lock
|
|
* to avoid races with other end io clearing async_write flags
|
|
*/
|
|
local_irq_save(flags);
|
|
bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
|
do {
|
|
if (bh_offset(bh) < bio_start ||
|
|
bh_offset(bh) + bh->b_size > bio_end) {
|
|
if (buffer_async_write(bh))
|
|
under_io++;
|
|
continue;
|
|
}
|
|
clear_buffer_async_write(bh);
|
|
if (bio->bi_status)
|
|
buffer_io_error(bh);
|
|
} while ((bh = bh->b_this_page) != head);
|
|
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
|
local_irq_restore(flags);
|
|
if (!under_io) {
|
|
fscrypt_free_bounce_page(bounce_page);
|
|
end_page_writeback(page);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void ext4_release_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
struct bio *bio, *next_bio;
|
|
|
|
BUG_ON(!list_empty(&io_end->list));
|
|
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
|
WARN_ON(io_end->handle);
|
|
|
|
for (bio = io_end->bio; bio; bio = next_bio) {
|
|
next_bio = bio->bi_private;
|
|
ext4_finish_bio(bio);
|
|
bio_put(bio);
|
|
}
|
|
kmem_cache_free(io_end_cachep, io_end);
|
|
}
|
|
|
|
/*
|
|
* Check a range of space and convert unwritten extents to written. Note that
|
|
* we are protected from truncate touching same part of extent tree by the
|
|
* fact that truncate code waits for all DIO to finish (thus exclusion from
|
|
* direct IO is achieved) and also waits for PageWriteback bits. Thus we
|
|
* cannot get to ext4_ext_truncate() before all IOs overlapping that range are
|
|
* completed (happens from ext4_free_ioend()).
|
|
*/
|
|
static int ext4_end_io(ext4_io_end_t *io)
|
|
{
|
|
struct inode *inode = io->inode;
|
|
loff_t offset = io->offset;
|
|
ssize_t size = io->size;
|
|
handle_t *handle = io->handle;
|
|
int ret = 0;
|
|
|
|
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
|
|
"list->prev 0x%p\n",
|
|
io, inode->i_ino, io->list.next, io->list.prev);
|
|
|
|
io->handle = NULL; /* Following call will use up the handle */
|
|
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
|
|
if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
|
|
ext4_msg(inode->i_sb, KERN_EMERG,
|
|
"failed to convert unwritten extents to written "
|
|
"extents -- potential data loss! "
|
|
"(inode %lu, offset %llu, size %zd, error %d)",
|
|
inode->i_ino, offset, size, ret);
|
|
}
|
|
ext4_clear_io_unwritten_flag(io);
|
|
ext4_release_io_end(io);
|
|
return ret;
|
|
}
|
|
|
|
static void dump_completed_IO(struct inode *inode, struct list_head *head)
|
|
{
|
|
#ifdef EXT4FS_DEBUG
|
|
struct list_head *cur, *before, *after;
|
|
ext4_io_end_t *io, *io0, *io1;
|
|
|
|
if (list_empty(head))
|
|
return;
|
|
|
|
ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
|
|
list_for_each_entry(io, head, list) {
|
|
cur = &io->list;
|
|
before = cur->prev;
|
|
io0 = container_of(before, ext4_io_end_t, list);
|
|
after = cur->next;
|
|
io1 = container_of(after, ext4_io_end_t, list);
|
|
|
|
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
|
io, inode->i_ino, io0, io1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* Add the io_end to per-inode completed end_io list. */
|
|
static void ext4_add_complete_io(ext4_io_end_t *io_end)
|
|
{
|
|
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
|
|
struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
|
|
struct workqueue_struct *wq;
|
|
unsigned long flags;
|
|
|
|
/* Only reserved conversions from writeback should enter here */
|
|
WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
|
WARN_ON(!io_end->handle && sbi->s_journal);
|
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
wq = sbi->rsv_conversion_wq;
|
|
if (list_empty(&ei->i_rsv_conversion_list))
|
|
queue_work(wq, &ei->i_rsv_conversion_work);
|
|
list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
|
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
}
|
|
|
|
static int ext4_do_flush_completed_IO(struct inode *inode,
|
|
struct list_head *head)
|
|
{
|
|
ext4_io_end_t *io;
|
|
struct list_head unwritten;
|
|
unsigned long flags;
|
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
|
int err, ret = 0;
|
|
|
|
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
|
dump_completed_IO(inode, head);
|
|
list_replace_init(head, &unwritten);
|
|
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
|
|
|
while (!list_empty(&unwritten)) {
|
|
io = list_entry(unwritten.next, ext4_io_end_t, list);
|
|
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
|
|
list_del_init(&io->list);
|
|
|
|
err = ext4_end_io(io);
|
|
if (unlikely(!ret && err))
|
|
ret = err;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* work on completed IO, to convert unwritten extents to extents
|
|
*/
|
|
void ext4_end_io_rsv_work(struct work_struct *work)
|
|
{
|
|
struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
|
|
i_rsv_conversion_work);
|
|
ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list);
|
|
}
|
|
|
|
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
|
{
|
|
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
|
|
if (io) {
|
|
io->inode = inode;
|
|
INIT_LIST_HEAD(&io->list);
|
|
atomic_set(&io->count, 1);
|
|
}
|
|
return io;
|
|
}
|
|
|
|
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
|
|
{
|
|
if (atomic_dec_and_test(&io_end->count)) {
|
|
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
|
|
ext4_release_io_end(io_end);
|
|
return;
|
|
}
|
|
ext4_add_complete_io(io_end);
|
|
}
|
|
}
|
|
|
|
int ext4_put_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
int err = 0;
|
|
|
|
if (atomic_dec_and_test(&io_end->count)) {
|
|
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
|
err = ext4_convert_unwritten_extents(io_end->handle,
|
|
io_end->inode, io_end->offset,
|
|
io_end->size);
|
|
io_end->handle = NULL;
|
|
ext4_clear_io_unwritten_flag(io_end);
|
|
}
|
|
ext4_release_io_end(io_end);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
|
|
{
|
|
atomic_inc(&io_end->count);
|
|
return io_end;
|
|
}
|
|
|
|
/* BIO completion function for page writeback */
|
|
static void ext4_end_bio(struct bio *bio)
|
|
{
|
|
ext4_io_end_t *io_end = bio->bi_private;
|
|
sector_t bi_sector = bio->bi_iter.bi_sector;
|
|
char b[BDEVNAME_SIZE];
|
|
|
|
if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
|
|
bio_devname(bio, b),
|
|
(long long) bio->bi_iter.bi_sector,
|
|
(unsigned) bio_sectors(bio),
|
|
bio->bi_status)) {
|
|
ext4_finish_bio(bio);
|
|
bio_put(bio);
|
|
return;
|
|
}
|
|
bio->bi_end_io = NULL;
|
|
|
|
if (bio->bi_status) {
|
|
struct inode *inode = io_end->inode;
|
|
|
|
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
|
|
"(offset %llu size %ld starting block %llu)",
|
|
bio->bi_status, inode->i_ino,
|
|
(unsigned long long) io_end->offset,
|
|
(long) io_end->size,
|
|
(unsigned long long)
|
|
bi_sector >> (inode->i_blkbits - 9));
|
|
mapping_set_error(inode->i_mapping,
|
|
blk_status_to_errno(bio->bi_status));
|
|
}
|
|
|
|
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
|
/*
|
|
* Link bio into list hanging from io_end. We have to do it
|
|
* atomically as bio completions can be racing against each
|
|
* other.
|
|
*/
|
|
bio->bi_private = xchg(&io_end->bio, bio);
|
|
ext4_put_io_end_defer(io_end);
|
|
} else {
|
|
/*
|
|
* Drop io_end reference early. Inode can get freed once
|
|
* we finish the bio.
|
|
*/
|
|
ext4_put_io_end_defer(io_end);
|
|
ext4_finish_bio(bio);
|
|
bio_put(bio);
|
|
}
|
|
}
|
|
|
|
void ext4_io_submit(struct ext4_io_submit *io)
|
|
{
|
|
struct bio *bio = io->io_bio;
|
|
|
|
if (bio) {
|
|
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
|
|
REQ_SYNC : 0;
|
|
io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
|
|
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
|
|
submit_bio(io->io_bio);
|
|
}
|
|
io->io_bio = NULL;
|
|
}
|
|
|
|
void ext4_io_submit_init(struct ext4_io_submit *io,
|
|
struct writeback_control *wbc)
|
|
{
|
|
io->io_wbc = wbc;
|
|
io->io_bio = NULL;
|
|
io->io_end = NULL;
|
|
}
|
|
|
|
static int io_submit_init_bio(struct ext4_io_submit *io,
|
|
struct buffer_head *bh)
|
|
{
|
|
struct bio *bio;
|
|
|
|
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
|
|
if (!bio)
|
|
return -ENOMEM;
|
|
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
|
|
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
|
bio_set_dev(bio, bh->b_bdev);
|
|
bio->bi_end_io = ext4_end_bio;
|
|
bio->bi_private = ext4_get_io_end(io->io_end);
|
|
io->io_bio = bio;
|
|
io->io_next_block = bh->b_blocknr;
|
|
wbc_init_bio(io->io_wbc, bio);
|
|
return 0;
|
|
}
|
|
|
|
static int io_submit_add_bh(struct ext4_io_submit *io,
|
|
struct inode *inode,
|
|
struct page *page,
|
|
struct buffer_head *bh)
|
|
{
|
|
int ret;
|
|
|
|
if (io->io_bio && (bh->b_blocknr != io->io_next_block ||
|
|
!fscrypt_mergeable_bio_bh(io->io_bio, bh))) {
|
|
submit_and_retry:
|
|
ext4_io_submit(io);
|
|
}
|
|
if (io->io_bio == NULL) {
|
|
ret = io_submit_init_bio(io, bh);
|
|
if (ret)
|
|
return ret;
|
|
io->io_bio->bi_write_hint = inode->i_write_hint;
|
|
}
|
|
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
|
|
if (ret != bh->b_size)
|
|
goto submit_and_retry;
|
|
wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
|
|
io->io_next_block++;
|
|
return 0;
|
|
}
|
|
|
|
int ext4_bio_write_page(struct ext4_io_submit *io,
|
|
struct page *page,
|
|
int len,
|
|
struct writeback_control *wbc,
|
|
bool keep_towrite)
|
|
{
|
|
struct page *bounce_page = NULL;
|
|
struct inode *inode = page->mapping->host;
|
|
unsigned block_start;
|
|
struct buffer_head *bh, *head;
|
|
int ret = 0;
|
|
int nr_submitted = 0;
|
|
int nr_to_submit = 0;
|
|
|
|
BUG_ON(!PageLocked(page));
|
|
BUG_ON(PageWriteback(page));
|
|
|
|
if (keep_towrite)
|
|
set_page_writeback_keepwrite(page);
|
|
else
|
|
set_page_writeback(page);
|
|
ClearPageError(page);
|
|
|
|
/*
|
|
* Comments copied from block_write_full_page:
|
|
*
|
|
* The page straddles i_size. It must be zeroed out on each and every
|
|
* writepage invocation because it may be mmapped. "A file is mapped
|
|
* in multiples of the page size. For a file that is not a multiple of
|
|
* the page size, the remaining memory is zeroed when mapped, and
|
|
* writes to that region are not written out to the file."
|
|
*/
|
|
if (len < PAGE_SIZE)
|
|
zero_user_segment(page, len, PAGE_SIZE);
|
|
/*
|
|
* In the first loop we prepare and mark buffers to submit. We have to
|
|
* mark all buffers in the page before submitting so that
|
|
* end_page_writeback() cannot be called from ext4_bio_end_io() when IO
|
|
* on the first buffer finishes and we are still working on submitting
|
|
* the second buffer.
|
|
*/
|
|
bh = head = page_buffers(page);
|
|
do {
|
|
block_start = bh_offset(bh);
|
|
if (block_start >= len) {
|
|
clear_buffer_dirty(bh);
|
|
set_buffer_uptodate(bh);
|
|
continue;
|
|
}
|
|
if (!buffer_dirty(bh) || buffer_delay(bh) ||
|
|
!buffer_mapped(bh) || buffer_unwritten(bh)) {
|
|
/* A hole? We can safely clear the dirty bit */
|
|
if (!buffer_mapped(bh))
|
|
clear_buffer_dirty(bh);
|
|
if (io->io_bio)
|
|
ext4_io_submit(io);
|
|
continue;
|
|
}
|
|
if (buffer_new(bh))
|
|
clear_buffer_new(bh);
|
|
set_buffer_async_write(bh);
|
|
nr_to_submit++;
|
|
} while ((bh = bh->b_this_page) != head);
|
|
|
|
bh = head = page_buffers(page);
|
|
|
|
/*
|
|
* If any blocks are being written to an encrypted file, encrypt them
|
|
* into a bounce page. For simplicity, just encrypt until the last
|
|
* block which might be needed. This may cause some unneeded blocks
|
|
* (e.g. holes) to be unnecessarily encrypted, but this is rare and
|
|
* can't happen in the common case of blocksize == PAGE_SIZE.
|
|
*/
|
|
if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) {
|
|
gfp_t gfp_flags = GFP_NOFS;
|
|
unsigned int enc_bytes = round_up(len, i_blocksize(inode));
|
|
|
|
/*
|
|
* Since bounce page allocation uses a mempool, we can only use
|
|
* a waiting mask (i.e. request guaranteed allocation) on the
|
|
* first page of the bio. Otherwise it can deadlock.
|
|
*/
|
|
if (io->io_bio)
|
|
gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
|
|
retry_encrypt:
|
|
bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
|
|
0, gfp_flags);
|
|
if (IS_ERR(bounce_page)) {
|
|
ret = PTR_ERR(bounce_page);
|
|
if (ret == -ENOMEM &&
|
|
(io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
|
|
gfp_flags = GFP_NOFS;
|
|
if (io->io_bio)
|
|
ext4_io_submit(io);
|
|
else
|
|
gfp_flags |= __GFP_NOFAIL;
|
|
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
goto retry_encrypt;
|
|
}
|
|
bounce_page = NULL;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
/* Now submit buffers to write */
|
|
do {
|
|
if (!buffer_async_write(bh))
|
|
continue;
|
|
ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
|
|
if (ret) {
|
|
/*
|
|
* We only get here on ENOMEM. Not much else
|
|
* we can do but mark the page as dirty, and
|
|
* better luck next time.
|
|
*/
|
|
break;
|
|
}
|
|
nr_submitted++;
|
|
clear_buffer_dirty(bh);
|
|
} while ((bh = bh->b_this_page) != head);
|
|
|
|
/* Error stopped previous loop? Clean up buffers... */
|
|
if (ret) {
|
|
out:
|
|
fscrypt_free_bounce_page(bounce_page);
|
|
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
|
|
redirty_page_for_writepage(wbc, page);
|
|
do {
|
|
clear_buffer_async_write(bh);
|
|
bh = bh->b_this_page;
|
|
} while (bh != head);
|
|
}
|
|
unlock_page(page);
|
|
/* Nothing submitted - we have to end page writeback */
|
|
if (!nr_submitted)
|
|
end_page_writeback(page);
|
|
return ret;
|
|
}
|