2b3ea8bdef
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmVbOmsACgkQONu9yGCS aT5m1RAAx7hgbFDnLHCGh4YVBbNy8JngItsUBaJcI/67Mk5toNi0x8pqcS8mq7ED GTwRnRcKaIR2bTyco5Ed2OZn4jMCyHC4oiyBZnHWg6AMuQjSCYzIgm7DzlTCVYZ7 2r8uRbt/uXADTILJ2kwR2mtVpGcwrXa+lsHrMqvt+MvNwRoSVHBHVVYCrAc+JXwR GXCopzV/RFGS6w4SBsX0K+8pV7GO+bhpxJ1lPz1T/xeLYfT4C3EwSTWDbUXPbez7 IpJ+5yKJXXT9Xn9m/pekwZ/aOirLqtEbDxneEctsjvw140lCoQiEZn6ZRscgNEns 3H+J3Asgc2zXqPzfZFH02TebPj31B8HZ43Upu0okr0hr4A4/4JL9pjXEhm1bON/Z x3jlTF4dyay4vOGGIEYOAuJSUbn6AqpZ318uBWCd3BSPocihEDMJz2aoazVHcb6k 83MVxfFfEL6s9utcoSXB8VjHa4FQmpMYsozegloUSJJCsizgdzmih0buJYhBB9sI HbEohW+YAh3cACSn6arXUJIMH5F5xsfD89od2Pj+6UrapdlPz5gCaggA1RZplCho bjGc1k61Rp2qSdfMEcx+h4ypgoOdhgqZI0YhYDCgBSRcWOXnGrDjFvnnumatcT+H 6vqyX6zlNt6U1NpE56Jtf7gt1Ds6PeoadD0L6B8vjXrkdeXOlUU= =AZ9s -----END PGP SIGNATURE----- Merge 6.1.63 into android14-6.1-lts Changes in 6.1.63 hwmon: (nct6775) Fix incorrect variable reuse in fan_div calculation sched/fair: Fix cfs_rq_is_decayed() on !SMP iov_iter, x86: Be consistent about the __user tag on copy_mc_to_user() sched/uclamp: Set max_spare_cap_cpu even if max_spare_cap is 0 sched/uclamp: Ignore (util == 0) optimization in feec() when p_util_max = 0 objtool: Propagate early errors sched: Fix stop_one_cpu_nowait() vs hotplug vfs: fix readahead(2) on block devices writeback, cgroup: switch inodes with dirty timestamps to release dying cgwbs x86/srso: Fix SBPB enablement for (possible) future fixed HW futex: Don't include process MM in futex key on no-MMU x86/numa: Introduce numa_fill_memblks() ACPI/NUMA: Apply SRAT proximity domain to entire CFMWS window x86/sev-es: Allow copy_from_kernel_nofault() in earlier boot x86/boot: Fix incorrect startup_gdt_descr.size drivers/clocksource/timer-ti-dm: Don't call clk_get_rate() in stop function pstore/platform: Add check for kstrdup string: Adjust strtomem() logic to allow for smaller sources genirq/matrix: Exclude managed interrupts in irq_matrix_allocated() wifi: cfg80211: add flush functions for wiphy work wifi: mac80211: move radar detect work to wiphy work wifi: mac80211: move scan work to wiphy work wifi: mac80211: move offchannel works to wiphy work wifi: mac80211: move sched-scan stop work to wiphy work wifi: mac80211: fix # of MSDU in A-MSDU calculation wifi: iwlwifi: honor the enable_ini value i40e: fix potential memory leaks in i40e_remove() iavf: Fix promiscuous mode configuration flow messages selftests/bpf: Correct map_fd to data_fd in tailcalls udp: add missing WRITE_ONCE() around up->encap_rcv tcp: call tcp_try_undo_recovery when an RTOd TFO SYNACK is ACKed gve: Use size_add() in call to struct_size() mlxsw: Use size_mul() in call to struct_size() tls: Only use data field in crypto completion function tls: Use size_add() in call to struct_size() tipc: Use size_add() in calls to struct_size() net: spider_net: Use size_add() in call to struct_size() net: ethernet: mtk_wed: fix EXT_INT_STATUS_RX_FBUF definitions for MT7986 SoC wifi: rtw88: debug: Fix the NULL vs IS_ERR() bug for debugfs_create_file() wifi: ath11k: fix boot failure with one MSI vector wifi: mt76: mt7603: rework/fix rx pse hang check wifi: mt76: mt7603: improve watchdog reset reliablity wifi: mt76: mt7603: improve stuck beacon handling wifi: mt76: mt7915: fix beamforming availability check wifi: ath: dfs_pattern_detector: Fix a memory initialization issue tcp_metrics: add missing barriers on delete tcp_metrics: properly set tp->snd_ssthresh in tcp_init_metrics() tcp_metrics: do not create an entry from tcp_init_metrics() wifi: rtlwifi: fix EDCA limit set by BT coexistence ACPI: property: Allow _DSD buffer data only for byte accessors ACPI: video: Add acpi_backlight=vendor quirk for Toshiba Portégé R100 wifi: ath11k: fix Tx power value during active CAC can: dev: can_restart(): don't crash kernel if carrier is OK can: dev: can_restart(): fix race condition between controller restart and netif_carrier_on() can: dev: can_put_echo_skb(): don't crash kernel if can_priv::echo_skb is accessed out of bounds PM / devfreq: rockchip-dfi: Make pmu regmap mandatory wifi: wfx: fix case where rates are out of order netfilter: nf_tables: Drop pointless memset when dumping rules thermal: core: prevent potential string overflow r8169: use tp_to_dev instead of open code r8169: fix rare issue with broken rx after link-down on RTL8125 selftests: netfilter: test for sctp collision processing in nf_conntrack net: skb_find_text: Ignore patterns extending past 'to' chtls: fix tp->rcv_tstamp initialization tcp: fix cookie_init_timestamp() overflows wifi: iwlwifi: call napi_synchronize() before freeing rx/tx queues wifi: iwlwifi: pcie: synchronize IRQs before NAPI wifi: iwlwifi: empty overflow queue during flush Bluetooth: hci_sync: Fix Opcode prints in bt_dev_dbg/err bpf: Fix unnecessary -EBUSY from htab_lock_bucket ACPI: sysfs: Fix create_pnp_modalias() and create_of_modalias() ipv6: avoid atomic fragment on GSO packets net: add DEV_STATS_READ() helper ipvlan: properly track tx_errors regmap: debugfs: Fix a erroneous check after snprintf() spi: tegra: Fix missing IRQ check in tegra_slink_probe() clk: qcom: gcc-msm8996: Remove RPM bus clocks clk: qcom: clk-rcg2: Fix clock rate overflow for high parent frequencies clk: qcom: mmcc-msm8998: Don't check halt bit on some branch clks clk: qcom: mmcc-msm8998: Fix the SMMU GDSC clk: qcom: gcc-sm8150: Fix gcc_sdcc2_apps_clk_src regulator: mt6358: Fail probe on unknown chip ID clk: imx: Select MXC_CLK for CLK_IMX8QXP clk: imx: imx8mq: correct error handling path clk: imx: imx8qxp: Fix elcdif_pll clock clk: renesas: rcar-gen3: Extend SDnH divider table clk: renesas: rzg2l: Wait for status bit of SD mux before continuing clk: renesas: rzg2l: Lock around writes to mux register clk: renesas: rzg2l: Trust value returned by hardware clk: renesas: rzg2l: Use FIELD_GET() for PLL register fields clk: renesas: rzg2l: Fix computation formula clk: linux/clk-provider.h: fix kernel-doc warnings and typos spi: nxp-fspi: use the correct ioremap function clk: keystone: pll: fix a couple NULL vs IS_ERR() checks clk: ti: change ti_clk_register[_omap_hw]() API clk: ti: fix double free in of_ti_divider_clk_setup() clk: npcm7xx: Fix incorrect kfree clk: mediatek: clk-mt6765: Add check for mtk_alloc_clk_data clk: mediatek: clk-mt6779: Add check for mtk_alloc_clk_data clk: mediatek: clk-mt6797: Add check for mtk_alloc_clk_data clk: mediatek: clk-mt7629-eth: Add check for mtk_alloc_clk_data clk: mediatek: clk-mt7629: Add check for mtk_alloc_clk_data clk: mediatek: clk-mt2701: Add check for mtk_alloc_clk_data clk: qcom: config IPQ_APSS_6018 should depend on QCOM_SMEM platform/x86: wmi: Fix probe failure when failing to register WMI devices platform/x86: wmi: Fix opening of char device hwmon: (axi-fan-control) Fix possible NULL pointer dereference hwmon: (coretemp) Fix potentially truncated sysfs attribute name Revert "hwmon: (sch56xx-common) Add DMI override table" Revert "hwmon: (sch56xx-common) Add automatic module loading on supported devices" hwmon: (sch5627) Use bit macros when accessing the control register hwmon: (sch5627) Disallow write access if virtual registers are locked hte: tegra: Fix missing error code in tegra_hte_test_probe() drm/rockchip: vop: Fix reset of state in duplicate state crtc funcs drm/rockchip: vop: Fix call to crtc reset helper drm/rockchip: vop2: Don't crash for invalid duplicate_state drm/rockchip: vop2: Add missing call to crtc reset helper drm/radeon: possible buffer overflow drm: bridge: it66121: Fix invalid connector dereference drm/bridge: lt8912b: Add hot plug detection drm/bridge: lt8912b: Fix bridge_detach drm/bridge: lt8912b: Fix crash on bridge detach drm/bridge: lt8912b: Manually disable HPD only if it was enabled drm/bridge: lt8912b: Add missing drm_bridge_attach call drm/bridge: tc358768: Fix use of uninitialized variable drm/bridge: tc358768: Fix bit updates drm/bridge: tc358768: remove unused variable drm/bridge: tc358768: Use struct videomode drm/bridge: tc358768: Print logical values, not raw register values drm/bridge: tc358768: Use dev for dbg prints, not priv->dev drm/bridge: tc358768: Rename dsibclk to hsbyteclk drm/bridge: tc358768: Clean up clock period code drm/bridge: tc358768: Fix tc358768_ns_to_cnt() drm/amdkfd: fix some race conditions in vram buffer alloc/free of svm code drm/amd/display: Check all enabled planes in dm_check_crtc_cursor drm/amd/display: Refactor dm_get_plane_scale helper drm/amd/display: Bail from dm_check_crtc_cursor if no relevant change io_uring/kbuf: Fix check of BID wrapping in provided buffers io_uring/kbuf: Allow the full buffer id space for provided buffers drm/mediatek: Fix iommu fault by swapping FBs after updating plane state drm/mediatek: Fix iommu fault during crtc enabling drm/rockchip: cdn-dp: Fix some error handling paths in cdn_dp_probe() gpu: host1x: Correct allocated size for contexts drm/bridge: lt9611uxc: fix the race in the error path arm64/arm: xen: enlighten: Fix KPTI checks drm/rockchip: Fix type promotion bug in rockchip_gem_iommu_map() xenbus: fix error exit in xenbus_init() xen-pciback: Consider INTx disabled when MSI/MSI-X is enabled drm/msm/dsi: use msm_gem_kernel_put to free TX buffer drm/msm/dsi: free TX buffer in unbind clocksource/drivers/arm_arch_timer: limit XGene-1 workaround drm: mediatek: mtk_dsi: Fix NO_EOT_PACKET settings/handling drivers/perf: hisi: use cpuhp_state_remove_instance_nocalls() for hisi_hns3_pmu uninit process perf/arm-cmn: Revamp model detection perf/arm-cmn: Fix DTC domain detection drivers/perf: hisi_pcie: Check the type first in pmu::event_init() perf: hisi: Fix use-after-free when register pmu fails ARM: dts: renesas: blanche: Fix typo in GP_11_2 pin name arm64: dts: qcom: sdm845: cheza doesn't support LMh node arm64: dts: qcom: sc7280: link usb3_phy_wrapper_gcc_usb30_pipe_clk arm64: dts: qcom: msm8916: Fix iommu local address range arm64: dts: qcom: msm8992-libra: drop duplicated reserved memory arm64: dts: qcom: sc7280: Add missing LMH interrupts arm64: dts: qcom: sm8150: add ref clock to PCIe PHYs arm64: dts: qcom: sm8350: fix pinctrl for UART18 arm64: dts: qcom: sdm845-mtp: fix WiFi configuration ARM64: dts: marvell: cn9310: Use appropriate label for spi1 pins arm64: dts: qcom: apq8016-sbc: Add missing ADV7533 regulators ARM: dts: qcom: mdm9615: populate vsdcc fixed regulator soc: qcom: llcc: Handle a second device without data corruption kunit: Fix missed memory release in kunit_free_suite_set() firmware: ti_sci: Mark driver as non removable arm64: dts: ti: k3-am62a7-sk: Drop i2c-1 to 100Khz firmware: arm_ffa: Assign the missing IDR allocation ID to the FFA device firmware: arm_ffa: Allow the FF-A drivers to use 32bit mode of messaging ARM: dts: am3517-evm: Fix LED3/4 pinmux clk: scmi: Free scmi_clk allocated when the clocks with invalid info are skipped arm64: dts: imx8qm-ss-img: Fix jpegenc compatible entry arm64: dts: imx8mm: Add sound-dai-cells to micfil node arm64: dts: imx8mn: Add sound-dai-cells to micfil node arm64: tegra: Use correct interrupts for Tegra234 TKE selftests/pidfd: Fix ksft print formats selftests/resctrl: Ensure the benchmark commands fits to its array module/decompress: use vmalloc() for gzip decompression workspace ASoC: cs35l41: Verify PM runtime resume errors in IRQ handler ASoC: cs35l41: Undo runtime PM changes at driver exit time ALSA: hda: cs35l41: Fix unbalanced pm_runtime_get() ALSA: hda: cs35l41: Undo runtime PM changes at driver exit time KEYS: Include linux/errno.h in linux/verification.h crypto: hisilicon/hpre - Fix a erroneous check after snprintf() hwrng: bcm2835 - Fix hwrng throughput regression hwrng: geode - fix accessing registers RDMA/core: Use size_{add,sub,mul}() in calls to struct_size() crypto: qat - ignore subsequent state up commands crypto: qat - relocate bufferlist logic crypto: qat - rename bufferlist functions crypto: qat - change bufferlist logic interface crypto: qat - generalize crypto request buffers crypto: qat - extend buffer list interface crypto: qat - fix unregistration of crypto algorithms scsi: ibmvfc: Fix erroneous use of rtas_busy_delay with hcall return code libnvdimm/of_pmem: Use devm_kstrdup instead of kstrdup and check its return value nd_btt: Make BTT lanes preemptible crypto: caam/qi2 - fix Chacha20 + Poly1305 self test failure crypto: caam/jr - fix Chacha20 + Poly1305 self test failure crypto: qat - increase size of buffers PCI: vmd: Correct PCI Header Type Register's multi-function check hid: cp2112: Fix duplicate workqueue initialization crypto: hisilicon/qm - delete redundant null assignment operations crypto: hisilicon/qm - modify the process of regs dfx crypto: hisilicon/qm - split a debugfs.c from qm crypto: hisilicon/qm - fix PF queue parameter issue ARM: 9321/1: memset: cast the constant byte to unsigned char ext4: move 'ix' sanity check to corrent position ASoC: fsl: mpc5200_dma.c: Fix warning of Function parameter or member not described IB/mlx5: Fix rdma counter binding for RAW QP RDMA/hns: Fix printing level of asynchronous events RDMA/hns: Fix uninitialized ucmd in hns_roce_create_qp_common() RDMA/hns: Fix signed-unsigned mixed comparisons RDMA/hns: Add check for SL RDMA/hns: The UD mode can only be configured with DCQCN ASoC: SOF: core: Ensure sof_ops_free() is still called when probe never ran. ASoC: fsl: Fix PM disable depth imbalance in fsl_easrc_probe scsi: ufs: core: Leave space for '\0' in utf8 desc string RDMA/hfi1: Workaround truncation compilation error HID: cp2112: Make irq_chip immutable hid: cp2112: Fix IRQ shutdown stopping polling for all IRQs on chip sh: bios: Revive earlyprintk support Revert "HID: logitech-hidpp: add a module parameter to keep firmware gestures" HID: logitech-hidpp: Remove HIDPP_QUIRK_NO_HIDINPUT quirk HID: logitech-hidpp: Don't restart IO, instead defer hid_connect() only HID: logitech-hidpp: Revert "Don't restart communication if not necessary" HID: logitech-hidpp: Move get_wireless_feature_index() check to hidpp_connect_event() ASoC: Intel: Skylake: Fix mem leak when parsing UUIDs fails padata: Fix refcnt handling in padata_free_shell() crypto: qat - fix deadlock in backlog processing ASoC: ams-delta.c: use component after check IB/mlx5: Fix init stage error handling to avoid double free of same QP and UAF mfd: core: Un-constify mfd_cell.of_reg mfd: core: Ensure disabled devices are skipped without aborting mfd: dln2: Fix double put in dln2_probe dt-bindings: mfd: mt6397: Add binding for MT6357 dt-bindings: mfd: mt6397: Split out compatible for MediaTek MT6366 PMIC mfd: arizona-spi: Set pdata.hpdet_channel for ACPI enumerated devs leds: turris-omnia: Drop unnecessary mutex locking leds: turris-omnia: Do not use SMBUS calls leds: pwm: Don't disable the PWM when the LED should be off leds: trigger: ledtrig-cpu:: Fix 'output may be truncated' issue for 'cpu' kunit: add macro to allow conditionally exposing static symbols to tests apparmor: test: make static symbols visible during kunit testing apparmor: fix invalid reference on profile->disconnected perf stat: Fix aggr mode initialization iio: frequency: adf4350: Use device managed functions and fix power down issue. perf kwork: Fix incorrect and missing free atom in work_push_atom() perf kwork: Add the supported subcommands to the document perf kwork: Set ordered_events to true in 'struct perf_tool' filemap: add filemap_get_folios_tag() f2fs: convert f2fs_write_cache_pages() to use filemap_get_folios_tag() f2fs: compress: fix deadloop in f2fs_write_cache_pages() f2fs: compress: fix to avoid use-after-free on dic f2fs: compress: fix to avoid redundant compress extension tty: tty_jobctrl: fix pid memleak in disassociate_ctty() livepatch: Fix missing newline character in klp_resolve_symbols() pinctrl: renesas: rzg2l: Make reverse order of enable() for disable() perf record: Fix BTF type checks in the off-cpu profiling dmaengine: idxd: Register dsa_bus_type before registering idxd sub-drivers usb: dwc2: fix possible NULL pointer dereference caused by driver concurrency usb: chipidea: Fix DMA overwrite for Tegra usb: chipidea: Simplify Tegra DMA alignment code dmaengine: ti: edma: handle irq_of_parse_and_map() errors misc: st_core: Do not call kfree_skb() under spin_lock_irqsave() tools: iio: iio_generic_buffer ensure alignment USB: usbip: fix stub_dev hub disconnect dmaengine: pxa_dma: Remove an erroneous BUG_ON() in pxad_free_desc() f2fs: fix to initialize map.m_pblk in f2fs_precache_extents() interconnect: qcom: sc7180: Retire DEFINE_QBCM interconnect: qcom: sc7180: Set ACV enable_mask interconnect: qcom: sc7280: Set ACV enable_mask interconnect: qcom: sc8180x: Set ACV enable_mask interconnect: qcom: sc8280xp: Set ACV enable_mask interconnect: qcom: sdm845: Retire DEFINE_QBCM interconnect: qcom: sdm845: Set ACV enable_mask interconnect: qcom: sm6350: Retire DEFINE_QBCM interconnect: qcom: sm6350: Set ACV enable_mask interconnect: move ignore_list out of of_count_icc_providers() interconnect: qcom: sm8150: Drop IP0 interconnects interconnect: qcom: sm8150: Retire DEFINE_QBCM interconnect: qcom: sm8150: Set ACV enable_mask interconnect: qcom: sm8350: Retire DEFINE_QBCM interconnect: qcom: sm8350: Set ACV enable_mask powerpc: Only define __parse_fpscr() when required modpost: fix tee MODULE_DEVICE_TABLE built on big-endian host modpost: fix ishtp MODULE_DEVICE_TABLE built on big-endian host powerpc/40x: Remove stale PTE_ATOMIC_UPDATES macro powerpc/xive: Fix endian conversion size powerpc/vas: Limit open window failure messages in log bufffer powerpc/imc-pmu: Use the correct spinlock initializer. powerpc/pseries: fix potential memory leak in init_cpu_associativity() xhci: Loosen RPM as default policy to cover for AMD xHC 1.1 usb: host: xhci-plat: fix possible kernel oops while resuming perf machine: Avoid out of bounds LBR memory read perf hist: Add missing puts to hist__account_cycles 9p/net: fix possible memory leak in p9_check_errors() i3c: Fix potential refcount leak in i3c_master_register_new_i3c_devs cxl/mem: Fix shutdown order crypto: ccp - Name -1 return value as SEV_RET_NO_FW_CALL x86/sev: Change snp_guest_issue_request()'s fw_err argument virt: sevguest: Fix passing a stack buffer as a scatterlist target rtc: pcf85363: fix wrong mask/val parameters in regmap_update_bits call pcmcia: cs: fix possible hung task and memory leak pccardd() pcmcia: ds: fix refcount leak in pcmcia_device_add() pcmcia: ds: fix possible name leak in error path in pcmcia_device_add() media: hantro: Check whether reset op is defined before use media: verisilicon: Do not enable G2 postproc downscale if source is narrower than destination media: ov5640: Drop dead code using frame_interval media: ov5640: fix vblank unchange issue when work at dvp mode media: i2c: max9286: Fix some redundant of_node_put() calls media: ov5640: Fix a memory leak when ov5640_probe fails media: bttv: fix use after free error due to btv->timeout timer media: amphion: handle firmware debug message media: mtk-jpegenc: Fix bug in JPEG encode quality selection media: s3c-camif: Avoid inappropriate kfree() media: vidtv: psi: Add check for kstrdup media: vidtv: mux: Add check and kfree for kstrdup media: cedrus: Fix clock/reset sequence media: cadence: csi2rx: Unregister v4l2 async notifier media: dvb-usb-v2: af9035: fix missing unlock media: cec: meson: always include meson sub-directory in Makefile regmap: prevent noinc writes from clobbering cache pwm: sti: Reduce number of allocations and drop usage of chip_data pwm: brcmstb: Utilize appropriate clock APIs in suspend/resume Input: synaptics-rmi4 - fix use after free in rmi_unregister_function() watchdog: ixp4xx: Make sure restart always works llc: verify mac len before reading mac header hsr: Prevent use after free in prp_create_tagged_frame() tipc: Change nla_policy for bearer-related names to NLA_NUL_STRING bpf: Check map->usercnt after timer->timer is assigned inet: shrink struct flowi_common octeontx2-pf: Fix error codes octeontx2-pf: Fix holes in error code net: page_pool: add missing free_percpu when page_pool_init fail dccp: Call security_inet_conn_request() after setting IPv4 addresses. dccp/tcp: Call security_inet_conn_request() after setting IPv6 addresses. net: r8169: Disable multicast filter for RTL8168H and RTL8107E Fix termination state for idr_for_each_entry_ul() net: stmmac: xgmac: Enable support for multiple Flexible PPS outputs selftests: pmtu.sh: fix result checking octeontx2-pf: Rename tot_tx_queues to non_qos_queues octeontx2-pf: qos send queues management octeontx2-pf: Free pending and dropped SQEs net/smc: fix dangling sock under state SMC_APPFINCLOSEWAIT net/smc: allow cdc msg send rather than drop it with NULL sndbuf_desc net/smc: put sk reference if close work was canceled nvme: fix error-handling for io_uring nvme-passthrough tg3: power down device only on SYSTEM_POWER_OFF nbd: fix uaf in nbd_open blk-core: use pr_warn_ratelimited() in bio_check_ro() virtio/vsock: replace virtio_vsock_pkt with sk_buff vsock/virtio: remove socket from connected/bound list on shutdown r8169: respect userspace disabling IFF_MULTICAST i2c: iproc: handle invalid slave state netfilter: xt_recent: fix (increase) ipv6 literal buffer length netfilter: nft_redir: use `struct nf_nat_range2` throughout and deduplicate eval call-backs netfilter: nat: fix ipv6 nat redirect with mapped and scoped addresses RISC-V: Don't fail in riscv_of_parent_hartid() for disabled HARTs drm/syncobj: fix DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE ASoC: mediatek: mt8186_mt6366_rt1019_rt5682s: trivial: fix error messages ASoC: hdmi-codec: register hpd callback on component probe ASoC: dapm: fix clock get name spi: spi-zynq-qspi: add spi-mem to driver kconfig dependencies fbdev: imsttfb: Fix error path of imsttfb_probe() fbdev: imsttfb: fix a resource leak in probe fbdev: fsl-diu-fb: mark wr_reg_wa() static tracing/kprobes: Fix the order of argument descriptions io_uring/net: ensure socket is marked connected on connect retry x86/amd_nb: Use Family 19h Models 60h-7Fh Function 4 IDs Revert "mmc: core: Capture correct oemid-bits for eMMC cards" btrfs: use u64 for buffer sizes in the tree search ioctls wifi: cfg80211: fix kernel-doc for wiphy_delayed_work_flush() virtio/vsock: don't use skbuff state to account credit virtio/vsock: remove redundant 'skb_pull()' call virtio/vsock: don't drop skbuff on copy failure vsock/loopback: use only sk_buff_head.lock to protect the packet queue virtio/vsock: fix leaks due to missing skb owner virtio/vsock: Fix uninit-value in virtio_transport_recv_pkt() virtio/vsock: fix header length on skb merging Linux 6.1.63 Change-Id: I87b7a539b11c90cfaf16edb07d613f74d54458a4 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
864 lines
26 KiB
C
864 lines
26 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* mm/readahead.c - address_space-level file readahead.
|
|
*
|
|
* Copyright (C) 2002, Linus Torvalds
|
|
*
|
|
* 09Apr2002 Andrew Morton
|
|
* Initial version.
|
|
*/
|
|
|
|
/**
|
|
* DOC: Readahead Overview
|
|
*
|
|
* Readahead is used to read content into the page cache before it is
|
|
* explicitly requested by the application. Readahead only ever
|
|
* attempts to read folios that are not yet in the page cache. If a
|
|
* folio is present but not up-to-date, readahead will not try to read
|
|
* it. In that case a simple ->read_folio() will be requested.
|
|
*
|
|
* Readahead is triggered when an application read request (whether a
|
|
* system call or a page fault) finds that the requested folio is not in
|
|
* the page cache, or that it is in the page cache and has the
|
|
* readahead flag set. This flag indicates that the folio was read
|
|
* as part of a previous readahead request and now that it has been
|
|
* accessed, it is time for the next readahead.
|
|
*
|
|
* Each readahead request is partly synchronous read, and partly async
|
|
* readahead. This is reflected in the struct file_ra_state which
|
|
* contains ->size being the total number of pages, and ->async_size
|
|
* which is the number of pages in the async section. The readahead
|
|
* flag will be set on the first folio in this async section to trigger
|
|
* a subsequent readahead. Once a series of sequential reads has been
|
|
* established, there should be no need for a synchronous component and
|
|
* all readahead request will be fully asynchronous.
|
|
*
|
|
* When either of the triggers causes a readahead, three numbers need
|
|
* to be determined: the start of the region to read, the size of the
|
|
* region, and the size of the async tail.
|
|
*
|
|
* The start of the region is simply the first page address at or after
|
|
* the accessed address, which is not currently populated in the page
|
|
* cache. This is found with a simple search in the page cache.
|
|
*
|
|
* The size of the async tail is determined by subtracting the size that
|
|
* was explicitly requested from the determined request size, unless
|
|
* this would be less than zero - then zero is used. NOTE THIS
|
|
* CALCULATION IS WRONG WHEN THE START OF THE REGION IS NOT THE ACCESSED
|
|
* PAGE. ALSO THIS CALCULATION IS NOT USED CONSISTENTLY.
|
|
*
|
|
* The size of the region is normally determined from the size of the
|
|
* previous readahead which loaded the preceding pages. This may be
|
|
* discovered from the struct file_ra_state for simple sequential reads,
|
|
* or from examining the state of the page cache when multiple
|
|
* sequential reads are interleaved. Specifically: where the readahead
|
|
* was triggered by the readahead flag, the size of the previous
|
|
* readahead is assumed to be the number of pages from the triggering
|
|
* page to the start of the new readahead. In these cases, the size of
|
|
* the previous readahead is scaled, often doubled, for the new
|
|
* readahead, though see get_next_ra_size() for details.
|
|
*
|
|
* If the size of the previous read cannot be determined, the number of
|
|
* preceding pages in the page cache is used to estimate the size of
|
|
* a previous read. This estimate could easily be misled by random
|
|
* reads being coincidentally adjacent, so it is ignored unless it is
|
|
* larger than the current request, and it is not scaled up, unless it
|
|
* is at the start of file.
|
|
*
|
|
* In general readahead is accelerated at the start of the file, as
|
|
* reads from there are often sequential. There are other minor
|
|
* adjustments to the readahead size in various special cases and these
|
|
* are best discovered by reading the code.
|
|
*
|
|
* The above calculation, based on the previous readahead size,
|
|
* determines the size of the readahead, to which any requested read
|
|
* size may be added.
|
|
*
|
|
* Readahead requests are sent to the filesystem using the ->readahead()
|
|
* address space operation, for which mpage_readahead() is a canonical
|
|
* implementation. ->readahead() should normally initiate reads on all
|
|
* folios, but may fail to read any or all folios without causing an I/O
|
|
* error. The page cache reading code will issue a ->read_folio() request
|
|
* for any folio which ->readahead() did not read, and only an error
|
|
* from this will be final.
|
|
*
|
|
* ->readahead() will generally call readahead_folio() repeatedly to get
|
|
* each folio from those prepared for readahead. It may fail to read a
|
|
* folio by:
|
|
*
|
|
* * not calling readahead_folio() sufficiently many times, effectively
|
|
* ignoring some folios, as might be appropriate if the path to
|
|
* storage is congested.
|
|
*
|
|
* * failing to actually submit a read request for a given folio,
|
|
* possibly due to insufficient resources, or
|
|
*
|
|
* * getting an error during subsequent processing of a request.
|
|
*
|
|
* In the last two cases, the folio should be unlocked by the filesystem
|
|
* to indicate that the read attempt has failed. In the first case the
|
|
* folio will be unlocked by the VFS.
|
|
*
|
|
* Those folios not in the final ``async_size`` of the request should be
|
|
* considered to be important and ->readahead() should not fail them due
|
|
* to congestion or temporary resource unavailability, but should wait
|
|
* for necessary resources (e.g. memory or indexing information) to
|
|
* become available. Folios in the final ``async_size`` may be
|
|
* considered less urgent and failure to read them is more acceptable.
|
|
* In this case it is best to use filemap_remove_folio() to remove the
|
|
* folios from the page cache as is automatically done for folios that
|
|
* were not fetched with readahead_folio(). This will allow a
|
|
* subsequent synchronous readahead request to try them again. If they
|
|
* are left in the page cache, then they will be read individually using
|
|
* ->read_folio() which may be less efficient.
|
|
*/
|
|
|
|
#include <linux/blkdev.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/dax.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/export.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/task_io_accounting_ops.h>
|
|
#include <linux/pagevec.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/psi.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/file.h>
|
|
#include <linux/mm_inline.h>
|
|
#include <linux/blk-cgroup.h>
|
|
#include <linux/fadvise.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <trace/hooks/mm.h>
|
|
|
|
#include "internal.h"
|
|
|
|
/*
|
|
* Initialise a struct file's readahead state. Assumes that the caller has
|
|
* memset *ra to zero.
|
|
*/
|
|
void
|
|
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
|
|
{
|
|
ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
|
|
ra->prev_pos = -1;
|
|
}
|
|
EXPORT_SYMBOL_GPL(file_ra_state_init);
|
|
|
|
gfp_t readahead_gfp_mask(struct address_space *x)
|
|
{
|
|
gfp_t mask = __readahead_gfp_mask(x);
|
|
|
|
trace_android_rvh_set_readahead_gfp_mask(&mask);
|
|
return mask;
|
|
}
|
|
EXPORT_SYMBOL_GPL(readahead_gfp_mask);
|
|
|
|
static void read_pages(struct readahead_control *rac)
|
|
{
|
|
const struct address_space_operations *aops = rac->mapping->a_ops;
|
|
struct folio *folio;
|
|
struct blk_plug plug;
|
|
|
|
if (!readahead_count(rac))
|
|
return;
|
|
|
|
if (unlikely(rac->_workingset))
|
|
psi_memstall_enter(&rac->_pflags);
|
|
blk_start_plug(&plug);
|
|
|
|
if (aops->readahead) {
|
|
aops->readahead(rac);
|
|
/*
|
|
* Clean up the remaining folios. The sizes in ->ra
|
|
* may be used to size the next readahead, so make sure
|
|
* they accurately reflect what happened.
|
|
*/
|
|
while ((folio = readahead_folio(rac)) != NULL) {
|
|
unsigned long nr = folio_nr_pages(folio);
|
|
|
|
folio_get(folio);
|
|
rac->ra->size -= nr;
|
|
if (rac->ra->async_size >= nr) {
|
|
rac->ra->async_size -= nr;
|
|
filemap_remove_folio(folio);
|
|
}
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
}
|
|
} else {
|
|
while ((folio = readahead_folio(rac)) != NULL)
|
|
aops->read_folio(rac->file, folio);
|
|
}
|
|
|
|
blk_finish_plug(&plug);
|
|
if (unlikely(rac->_workingset))
|
|
psi_memstall_leave(&rac->_pflags);
|
|
rac->_workingset = false;
|
|
|
|
BUG_ON(readahead_count(rac));
|
|
}
|
|
|
|
/**
|
|
* page_cache_ra_unbounded - Start unchecked readahead.
|
|
* @ractl: Readahead control.
|
|
* @nr_to_read: The number of pages to read.
|
|
* @lookahead_size: Where to start the next readahead.
|
|
*
|
|
* This function is for filesystems to call when they want to start
|
|
* readahead beyond a file's stated i_size. This is almost certainly
|
|
* not the function you want to call. Use page_cache_async_readahead()
|
|
* or page_cache_sync_readahead() instead.
|
|
*
|
|
* Context: File is referenced by caller. Mutexes may be held by caller.
|
|
* May sleep, but will not reenter filesystem to reclaim memory.
|
|
*/
|
|
void page_cache_ra_unbounded(struct readahead_control *ractl,
|
|
unsigned long nr_to_read, unsigned long lookahead_size)
|
|
{
|
|
struct address_space *mapping = ractl->mapping;
|
|
unsigned long index = readahead_index(ractl);
|
|
gfp_t gfp_mask = readahead_gfp_mask(mapping);
|
|
unsigned long i;
|
|
|
|
/*
|
|
* Partway through the readahead operation, we will have added
|
|
* locked pages to the page cache, but will not yet have submitted
|
|
* them for I/O. Adding another page may need to allocate memory,
|
|
* which can trigger memory reclaim. Telling the VM we're in
|
|
* the middle of a filesystem operation will cause it to not
|
|
* touch file-backed pages, preventing a deadlock. Most (all?)
|
|
* filesystems already specify __GFP_NOFS in their mapping's
|
|
* gfp_mask, but let's be explicit here.
|
|
*/
|
|
unsigned int nofs = memalloc_nofs_save();
|
|
|
|
filemap_invalidate_lock_shared(mapping);
|
|
/*
|
|
* Preallocate as many pages as we will need.
|
|
*/
|
|
for (i = 0; i < nr_to_read; i++) {
|
|
struct folio *folio = xa_load(&mapping->i_pages, index + i);
|
|
|
|
if (folio && !xa_is_value(folio)) {
|
|
/*
|
|
* Page already present? Kick off the current batch
|
|
* of contiguous pages before continuing with the
|
|
* next batch. This page may be the one we would
|
|
* have intended to mark as Readahead, but we don't
|
|
* have a stable reference to this page, and it's
|
|
* not worth getting one just for that.
|
|
*/
|
|
read_pages(ractl);
|
|
ractl->_index++;
|
|
i = ractl->_index + ractl->_nr_pages - index - 1;
|
|
continue;
|
|
}
|
|
|
|
folio = filemap_alloc_folio(gfp_mask, 0);
|
|
if (!folio)
|
|
break;
|
|
if (filemap_add_folio(mapping, folio, index + i,
|
|
gfp_mask) < 0) {
|
|
folio_put(folio);
|
|
read_pages(ractl);
|
|
ractl->_index++;
|
|
i = ractl->_index + ractl->_nr_pages - index - 1;
|
|
continue;
|
|
}
|
|
if (i == nr_to_read - lookahead_size)
|
|
folio_set_readahead(folio);
|
|
ractl->_workingset |= folio_test_workingset(folio);
|
|
ractl->_nr_pages++;
|
|
}
|
|
|
|
/*
|
|
* Now start the IO. We ignore I/O errors - if the folio is not
|
|
* uptodate then the caller will launch read_folio again, and
|
|
* will then handle the error.
|
|
*/
|
|
read_pages(ractl);
|
|
filemap_invalidate_unlock_shared(mapping);
|
|
memalloc_nofs_restore(nofs);
|
|
}
|
|
EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
|
|
|
|
/*
|
|
* do_page_cache_ra() actually reads a chunk of disk. It allocates
|
|
* the pages first, then submits them for I/O. This avoids the very bad
|
|
* behaviour which would occur if page allocations are causing VM writeback.
|
|
* We really don't want to intermingle reads and writes like that.
|
|
*/
|
|
static void do_page_cache_ra(struct readahead_control *ractl,
|
|
unsigned long nr_to_read, unsigned long lookahead_size)
|
|
{
|
|
struct inode *inode = ractl->mapping->host;
|
|
unsigned long index = readahead_index(ractl);
|
|
loff_t isize = i_size_read(inode);
|
|
pgoff_t end_index; /* The last page we want to read */
|
|
|
|
if (isize == 0)
|
|
return;
|
|
|
|
end_index = (isize - 1) >> PAGE_SHIFT;
|
|
if (index > end_index)
|
|
return;
|
|
/* Don't read past the page containing the last byte of the file */
|
|
if (nr_to_read > end_index - index)
|
|
nr_to_read = end_index - index + 1;
|
|
|
|
page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
|
|
}
|
|
|
|
/*
|
|
* Chunk the readahead into 2 megabyte units, so that we don't pin too much
|
|
* memory at once.
|
|
*/
|
|
void force_page_cache_ra(struct readahead_control *ractl,
|
|
unsigned long nr_to_read)
|
|
{
|
|
struct address_space *mapping = ractl->mapping;
|
|
struct file_ra_state *ra = ractl->ra;
|
|
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
|
|
unsigned long max_pages, index;
|
|
|
|
if (unlikely(!mapping->a_ops->read_folio && !mapping->a_ops->readahead))
|
|
return;
|
|
|
|
/*
|
|
* If the request exceeds the readahead window, allow the read to
|
|
* be up to the optimal hardware IO size
|
|
*/
|
|
index = readahead_index(ractl);
|
|
max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
|
|
nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
|
|
while (nr_to_read) {
|
|
unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
|
|
|
|
if (this_chunk > nr_to_read)
|
|
this_chunk = nr_to_read;
|
|
ractl->_index = index;
|
|
do_page_cache_ra(ractl, this_chunk, 0);
|
|
|
|
index += this_chunk;
|
|
nr_to_read -= this_chunk;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set the initial window size, round to next power of 2 and square
|
|
* for small size, x 4 for medium, and x 2 for large
|
|
* for 128k (32 page) max ra
|
|
* 1-2 page = 16k, 3-4 page 32k, 5-8 page = 64k, > 8 page = 128k initial
|
|
*/
|
|
static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
|
|
{
|
|
unsigned long newsize = roundup_pow_of_two(size);
|
|
|
|
if (newsize <= max / 32)
|
|
newsize = newsize * 4;
|
|
else if (newsize <= max / 4)
|
|
newsize = newsize * 2;
|
|
else
|
|
newsize = max;
|
|
|
|
return newsize;
|
|
}
|
|
|
|
/*
|
|
* Get the previous window size, ramp it up, and
|
|
* return it as the new window size.
|
|
*/
|
|
static unsigned long get_next_ra_size(struct file_ra_state *ra,
|
|
unsigned long max)
|
|
{
|
|
unsigned long cur = ra->size;
|
|
|
|
if (cur < max / 16)
|
|
return 4 * cur;
|
|
if (cur <= max / 2)
|
|
return 2 * cur;
|
|
return max;
|
|
}
|
|
|
|
/*
|
|
* On-demand readahead design.
|
|
*
|
|
* The fields in struct file_ra_state represent the most-recently-executed
|
|
* readahead attempt:
|
|
*
|
|
* |<----- async_size ---------|
|
|
* |------------------- size -------------------->|
|
|
* |==================#===========================|
|
|
* ^start ^page marked with PG_readahead
|
|
*
|
|
* To overlap application thinking time and disk I/O time, we do
|
|
* `readahead pipelining': Do not wait until the application consumed all
|
|
* readahead pages and stalled on the missing page at readahead_index;
|
|
* Instead, submit an asynchronous readahead I/O as soon as there are
|
|
* only async_size pages left in the readahead window. Normally async_size
|
|
* will be equal to size, for maximum pipelining.
|
|
*
|
|
* In interleaved sequential reads, concurrent streams on the same fd can
|
|
* be invalidating each other's readahead state. So we flag the new readahead
|
|
* page at (start+size-async_size) with PG_readahead, and use it as readahead
|
|
* indicator. The flag won't be set on already cached pages, to avoid the
|
|
* readahead-for-nothing fuss, saving pointless page cache lookups.
|
|
*
|
|
* prev_pos tracks the last visited byte in the _previous_ read request.
|
|
* It should be maintained by the caller, and will be used for detecting
|
|
* small random reads. Note that the readahead algorithm checks loosely
|
|
* for sequential patterns. Hence interleaved reads might be served as
|
|
* sequential ones.
|
|
*
|
|
* There is a special-case: if the first page which the application tries to
|
|
* read happens to be the first page of the file, it is assumed that a linear
|
|
* read is about to happen and the window is immediately set to the initial size
|
|
* based on I/O request size and the max_readahead.
|
|
*
|
|
* The code ramps up the readahead size aggressively at first, but slow down as
|
|
* it approaches max_readhead.
|
|
*/
|
|
|
|
/*
|
|
* Count contiguously cached pages from @index-1 to @index-@max,
|
|
* this count is a conservative estimation of
|
|
* - length of the sequential read sequence, or
|
|
* - thrashing threshold in memory tight systems
|
|
*/
|
|
static pgoff_t count_history_pages(struct address_space *mapping,
|
|
pgoff_t index, unsigned long max)
|
|
{
|
|
pgoff_t head;
|
|
|
|
rcu_read_lock();
|
|
head = page_cache_prev_miss(mapping, index - 1, max);
|
|
rcu_read_unlock();
|
|
|
|
return index - 1 - head;
|
|
}
|
|
|
|
/*
|
|
* page cache context based readahead
|
|
*/
|
|
static int try_context_readahead(struct address_space *mapping,
|
|
struct file_ra_state *ra,
|
|
pgoff_t index,
|
|
unsigned long req_size,
|
|
unsigned long max)
|
|
{
|
|
pgoff_t size;
|
|
|
|
size = count_history_pages(mapping, index, max);
|
|
|
|
/*
|
|
* not enough history pages:
|
|
* it could be a random read
|
|
*/
|
|
if (size <= req_size)
|
|
return 0;
|
|
|
|
/*
|
|
* starts from beginning of file:
|
|
* it is a strong indication of long-run stream (or whole-file-read)
|
|
*/
|
|
if (size >= index)
|
|
size *= 2;
|
|
|
|
ra->start = index;
|
|
ra->size = min(size + req_size, max);
|
|
ra->async_size = 1;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* There are some parts of the kernel which assume that PMD entries
|
|
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
|
|
* limit the maximum allocation order to PMD size. I'm not aware of any
|
|
* assumptions about maximum order if THP are disabled, but 8 seems like
|
|
* a good order (that's 1MB if you're using 4kB pages)
|
|
*/
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
|
|
#else
|
|
#define MAX_PAGECACHE_ORDER 8
|
|
#endif
|
|
|
|
static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
|
|
pgoff_t mark, unsigned int order, gfp_t gfp)
|
|
{
|
|
int err;
|
|
struct folio *folio = filemap_alloc_folio(gfp, order);
|
|
|
|
if (!folio)
|
|
return -ENOMEM;
|
|
mark = round_up(mark, 1UL << order);
|
|
if (index == mark)
|
|
folio_set_readahead(folio);
|
|
err = filemap_add_folio(ractl->mapping, folio, index, gfp);
|
|
if (err) {
|
|
folio_put(folio);
|
|
return err;
|
|
}
|
|
|
|
ractl->_nr_pages += 1UL << order;
|
|
ractl->_workingset |= folio_test_workingset(folio);
|
|
return 0;
|
|
}
|
|
|
|
void page_cache_ra_order(struct readahead_control *ractl,
|
|
struct file_ra_state *ra, unsigned int new_order)
|
|
{
|
|
struct address_space *mapping = ractl->mapping;
|
|
pgoff_t index = readahead_index(ractl);
|
|
pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
|
|
pgoff_t mark = index + ra->size - ra->async_size;
|
|
int err = 0;
|
|
gfp_t gfp = readahead_gfp_mask(mapping);
|
|
|
|
if (!mapping_large_folio_support(mapping) || ra->size < 4)
|
|
goto fallback;
|
|
|
|
limit = min(limit, index + ra->size - 1);
|
|
|
|
if (new_order < MAX_PAGECACHE_ORDER) {
|
|
new_order += 2;
|
|
if (new_order > MAX_PAGECACHE_ORDER)
|
|
new_order = MAX_PAGECACHE_ORDER;
|
|
while ((1 << new_order) > ra->size)
|
|
new_order--;
|
|
}
|
|
|
|
filemap_invalidate_lock_shared(mapping);
|
|
while (index <= limit) {
|
|
unsigned int order = new_order;
|
|
|
|
/* Align with smaller pages if needed */
|
|
if (index & ((1UL << order) - 1)) {
|
|
order = __ffs(index);
|
|
if (order == 1)
|
|
order = 0;
|
|
}
|
|
/* Don't allocate pages past EOF */
|
|
while (index + (1UL << order) - 1 > limit) {
|
|
if (--order == 1)
|
|
order = 0;
|
|
}
|
|
err = ra_alloc_folio(ractl, index, mark, order, gfp);
|
|
if (err)
|
|
break;
|
|
index += 1UL << order;
|
|
}
|
|
|
|
if (index > limit) {
|
|
ra->size += index - limit - 1;
|
|
ra->async_size += index - limit - 1;
|
|
}
|
|
|
|
read_pages(ractl);
|
|
filemap_invalidate_unlock_shared(mapping);
|
|
|
|
/*
|
|
* If there were already pages in the page cache, then we may have
|
|
* left some gaps. Let the regular readahead code take care of this
|
|
* situation.
|
|
*/
|
|
if (!err)
|
|
return;
|
|
fallback:
|
|
do_page_cache_ra(ractl, ra->size, ra->async_size);
|
|
}
|
|
|
|
/*
|
|
* A minimal readahead algorithm for trivial sequential/random reads.
|
|
*/
|
|
static void ondemand_readahead(struct readahead_control *ractl,
|
|
struct folio *folio, unsigned long req_size)
|
|
{
|
|
struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
|
|
struct file_ra_state *ra = ractl->ra;
|
|
unsigned long max_pages = ra->ra_pages;
|
|
unsigned long add_pages;
|
|
pgoff_t index = readahead_index(ractl);
|
|
pgoff_t expected, prev_index;
|
|
unsigned int order = folio ? folio_order(folio) : 0;
|
|
|
|
/*
|
|
* If the request exceeds the readahead window, allow the read to
|
|
* be up to the optimal hardware IO size
|
|
*/
|
|
if (req_size > max_pages && bdi->io_pages > max_pages)
|
|
max_pages = min(req_size, bdi->io_pages);
|
|
|
|
trace_android_vh_ra_tuning_max_page(ractl, &max_pages);
|
|
|
|
/*
|
|
* start of file
|
|
*/
|
|
if (!index)
|
|
goto initial_readahead;
|
|
|
|
/*
|
|
* It's the expected callback index, assume sequential access.
|
|
* Ramp up sizes, and push forward the readahead window.
|
|
*/
|
|
expected = round_up(ra->start + ra->size - ra->async_size,
|
|
1UL << order);
|
|
if (index == expected || index == (ra->start + ra->size)) {
|
|
ra->start += ra->size;
|
|
ra->size = get_next_ra_size(ra, max_pages);
|
|
ra->async_size = ra->size;
|
|
goto readit;
|
|
}
|
|
|
|
/*
|
|
* Hit a marked folio without valid readahead state.
|
|
* E.g. interleaved reads.
|
|
* Query the pagecache for async_size, which normally equals to
|
|
* readahead size. Ramp it up and use it as the new readahead size.
|
|
*/
|
|
if (folio) {
|
|
pgoff_t start;
|
|
|
|
rcu_read_lock();
|
|
start = page_cache_next_miss(ractl->mapping, index + 1,
|
|
max_pages);
|
|
rcu_read_unlock();
|
|
|
|
if (!start || start - index > max_pages)
|
|
return;
|
|
|
|
ra->start = start;
|
|
ra->size = start - index; /* old async_size */
|
|
ra->size += req_size;
|
|
ra->size = get_next_ra_size(ra, max_pages);
|
|
ra->async_size = ra->size;
|
|
goto readit;
|
|
}
|
|
|
|
/*
|
|
* oversize read
|
|
*/
|
|
if (req_size > max_pages)
|
|
goto initial_readahead;
|
|
|
|
/*
|
|
* sequential cache miss
|
|
* trivial case: (index - prev_index) == 1
|
|
* unaligned reads: (index - prev_index) == 0
|
|
*/
|
|
prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
|
|
if (index - prev_index <= 1UL)
|
|
goto initial_readahead;
|
|
|
|
/*
|
|
* Query the page cache and look for the traces(cached history pages)
|
|
* that a sequential stream would leave behind.
|
|
*/
|
|
if (try_context_readahead(ractl->mapping, ra, index, req_size,
|
|
max_pages))
|
|
goto readit;
|
|
|
|
/*
|
|
* standalone, small random read
|
|
* Read as is, and do not pollute the readahead state.
|
|
*/
|
|
do_page_cache_ra(ractl, req_size, 0);
|
|
return;
|
|
|
|
initial_readahead:
|
|
ra->start = index;
|
|
ra->size = get_init_ra_size(req_size, max_pages);
|
|
ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
|
|
|
|
readit:
|
|
/*
|
|
* Will this read hit the readahead marker made by itself?
|
|
* If so, trigger the readahead marker hit now, and merge
|
|
* the resulted next readahead window into the current one.
|
|
* Take care of maximum IO pages as above.
|
|
*/
|
|
if (index == ra->start && ra->size == ra->async_size) {
|
|
add_pages = get_next_ra_size(ra, max_pages);
|
|
if (ra->size + add_pages <= max_pages) {
|
|
ra->async_size = add_pages;
|
|
ra->size += add_pages;
|
|
} else {
|
|
ra->size = max_pages;
|
|
ra->async_size = max_pages >> 1;
|
|
}
|
|
}
|
|
|
|
ractl->_index = ra->start;
|
|
page_cache_ra_order(ractl, ra, order);
|
|
}
|
|
|
|
void page_cache_sync_ra(struct readahead_control *ractl,
|
|
unsigned long req_count)
|
|
{
|
|
bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
|
|
|
|
/*
|
|
* Even if readahead is disabled, issue this request as readahead
|
|
* as we'll need it to satisfy the requested range. The forced
|
|
* readahead will do the right thing and limit the read to just the
|
|
* requested range, which we'll set to 1 page for this case.
|
|
*/
|
|
if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
|
|
if (!ractl->file)
|
|
return;
|
|
req_count = 1;
|
|
do_forced_ra = true;
|
|
}
|
|
|
|
/* be dumb */
|
|
if (do_forced_ra) {
|
|
force_page_cache_ra(ractl, req_count);
|
|
return;
|
|
}
|
|
|
|
ondemand_readahead(ractl, NULL, req_count);
|
|
}
|
|
EXPORT_SYMBOL_GPL(page_cache_sync_ra);
|
|
|
|
void page_cache_async_ra(struct readahead_control *ractl,
|
|
struct folio *folio, unsigned long req_count)
|
|
{
|
|
/* no readahead */
|
|
if (!ractl->ra->ra_pages)
|
|
return;
|
|
|
|
/*
|
|
* Same bit is used for PG_readahead and PG_reclaim.
|
|
*/
|
|
if (folio_test_writeback(folio))
|
|
return;
|
|
|
|
folio_clear_readahead(folio);
|
|
|
|
if (blk_cgroup_congested())
|
|
return;
|
|
|
|
ondemand_readahead(ractl, folio, req_count);
|
|
}
|
|
EXPORT_SYMBOL_GPL(page_cache_async_ra);
|
|
|
|
ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
|
|
{
|
|
ssize_t ret;
|
|
struct fd f;
|
|
|
|
ret = -EBADF;
|
|
f = fdget(fd);
|
|
if (!f.file || !(f.file->f_mode & FMODE_READ))
|
|
goto out;
|
|
|
|
/*
|
|
* The readahead() syscall is intended to run only on files
|
|
* that can execute readahead. If readahead is not possible
|
|
* on this file, then we must return -EINVAL.
|
|
*/
|
|
ret = -EINVAL;
|
|
if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
|
|
(!S_ISREG(file_inode(f.file)->i_mode) &&
|
|
!S_ISBLK(file_inode(f.file)->i_mode)))
|
|
goto out;
|
|
|
|
ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
|
|
out:
|
|
fdput(f);
|
|
return ret;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
|
|
{
|
|
return ksys_readahead(fd, offset, count);
|
|
}
|
|
|
|
#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_READAHEAD)
|
|
COMPAT_SYSCALL_DEFINE4(readahead, int, fd, compat_arg_u64_dual(offset), size_t, count)
|
|
{
|
|
return ksys_readahead(fd, compat_arg_u64_glue(offset), count);
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* readahead_expand - Expand a readahead request
|
|
* @ractl: The request to be expanded
|
|
* @new_start: The revised start
|
|
* @new_len: The revised size of the request
|
|
*
|
|
* Attempt to expand a readahead request outwards from the current size to the
|
|
* specified size by inserting locked pages before and after the current window
|
|
* to increase the size to the new window. This may involve the insertion of
|
|
* THPs, in which case the window may get expanded even beyond what was
|
|
* requested.
|
|
*
|
|
* The algorithm will stop if it encounters a conflicting page already in the
|
|
* pagecache and leave a smaller expansion than requested.
|
|
*
|
|
* The caller must check for this by examining the revised @ractl object for a
|
|
* different expansion than was requested.
|
|
*/
|
|
void readahead_expand(struct readahead_control *ractl,
|
|
loff_t new_start, size_t new_len)
|
|
{
|
|
struct address_space *mapping = ractl->mapping;
|
|
struct file_ra_state *ra = ractl->ra;
|
|
pgoff_t new_index, new_nr_pages;
|
|
gfp_t gfp_mask = readahead_gfp_mask(mapping);
|
|
|
|
new_index = new_start / PAGE_SIZE;
|
|
|
|
/* Expand the leading edge downwards */
|
|
while (ractl->_index > new_index) {
|
|
unsigned long index = ractl->_index - 1;
|
|
struct page *page = xa_load(&mapping->i_pages, index);
|
|
|
|
if (page && !xa_is_value(page))
|
|
return; /* Page apparently present */
|
|
|
|
page = __page_cache_alloc(gfp_mask);
|
|
if (!page)
|
|
return;
|
|
if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
|
|
put_page(page);
|
|
return;
|
|
}
|
|
|
|
ractl->_nr_pages++;
|
|
ractl->_index = page->index;
|
|
}
|
|
|
|
new_len += new_start - readahead_pos(ractl);
|
|
new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);
|
|
|
|
/* Expand the trailing edge upwards */
|
|
while (ractl->_nr_pages < new_nr_pages) {
|
|
unsigned long index = ractl->_index + ractl->_nr_pages;
|
|
struct page *page = xa_load(&mapping->i_pages, index);
|
|
|
|
if (page && !xa_is_value(page))
|
|
return; /* Page apparently present */
|
|
|
|
page = __page_cache_alloc(gfp_mask);
|
|
if (!page)
|
|
return;
|
|
if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
|
|
put_page(page);
|
|
return;
|
|
}
|
|
if (unlikely(PageWorkingset(page)) && !ractl->_workingset) {
|
|
ractl->_workingset = true;
|
|
psi_memstall_enter(&ractl->_pflags);
|
|
}
|
|
ractl->_nr_pages++;
|
|
if (ra) {
|
|
ra->size++;
|
|
ra->async_size++;
|
|
}
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(readahead_expand);
|