android_kernel_samsung_sm8650/mm/readahead.c
Greg Kroah-Hartman 2b3ea8bdef This is the 6.1.63 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmVbOmsACgkQONu9yGCS
 aT5m1RAAx7hgbFDnLHCGh4YVBbNy8JngItsUBaJcI/67Mk5toNi0x8pqcS8mq7ED
 GTwRnRcKaIR2bTyco5Ed2OZn4jMCyHC4oiyBZnHWg6AMuQjSCYzIgm7DzlTCVYZ7
 2r8uRbt/uXADTILJ2kwR2mtVpGcwrXa+lsHrMqvt+MvNwRoSVHBHVVYCrAc+JXwR
 GXCopzV/RFGS6w4SBsX0K+8pV7GO+bhpxJ1lPz1T/xeLYfT4C3EwSTWDbUXPbez7
 IpJ+5yKJXXT9Xn9m/pekwZ/aOirLqtEbDxneEctsjvw140lCoQiEZn6ZRscgNEns
 3H+J3Asgc2zXqPzfZFH02TebPj31B8HZ43Upu0okr0hr4A4/4JL9pjXEhm1bON/Z
 x3jlTF4dyay4vOGGIEYOAuJSUbn6AqpZ318uBWCd3BSPocihEDMJz2aoazVHcb6k
 83MVxfFfEL6s9utcoSXB8VjHa4FQmpMYsozegloUSJJCsizgdzmih0buJYhBB9sI
 HbEohW+YAh3cACSn6arXUJIMH5F5xsfD89od2Pj+6UrapdlPz5gCaggA1RZplCho
 bjGc1k61Rp2qSdfMEcx+h4ypgoOdhgqZI0YhYDCgBSRcWOXnGrDjFvnnumatcT+H
 6vqyX6zlNt6U1NpE56Jtf7gt1Ds6PeoadD0L6B8vjXrkdeXOlUU=
 =AZ9s
 -----END PGP SIGNATURE-----

Merge 6.1.63 into android14-6.1-lts

Changes in 6.1.63
	hwmon: (nct6775) Fix incorrect variable reuse in fan_div calculation
	sched/fair: Fix cfs_rq_is_decayed() on !SMP
	iov_iter, x86: Be consistent about the __user tag on copy_mc_to_user()
	sched/uclamp: Set max_spare_cap_cpu even if max_spare_cap is 0
	sched/uclamp: Ignore (util == 0) optimization in feec() when p_util_max = 0
	objtool: Propagate early errors
	sched: Fix stop_one_cpu_nowait() vs hotplug
	vfs: fix readahead(2) on block devices
	writeback, cgroup: switch inodes with dirty timestamps to release dying cgwbs
	x86/srso: Fix SBPB enablement for (possible) future fixed HW
	futex: Don't include process MM in futex key on no-MMU
	x86/numa: Introduce numa_fill_memblks()
	ACPI/NUMA: Apply SRAT proximity domain to entire CFMWS window
	x86/sev-es: Allow copy_from_kernel_nofault() in earlier boot
	x86/boot: Fix incorrect startup_gdt_descr.size
	drivers/clocksource/timer-ti-dm: Don't call clk_get_rate() in stop function
	pstore/platform: Add check for kstrdup
	string: Adjust strtomem() logic to allow for smaller sources
	genirq/matrix: Exclude managed interrupts in irq_matrix_allocated()
	wifi: cfg80211: add flush functions for wiphy work
	wifi: mac80211: move radar detect work to wiphy work
	wifi: mac80211: move scan work to wiphy work
	wifi: mac80211: move offchannel works to wiphy work
	wifi: mac80211: move sched-scan stop work to wiphy work
	wifi: mac80211: fix # of MSDU in A-MSDU calculation
	wifi: iwlwifi: honor the enable_ini value
	i40e: fix potential memory leaks in i40e_remove()
	iavf: Fix promiscuous mode configuration flow messages
	selftests/bpf: Correct map_fd to data_fd in tailcalls
	udp: add missing WRITE_ONCE() around up->encap_rcv
	tcp: call tcp_try_undo_recovery when an RTOd TFO SYNACK is ACKed
	gve: Use size_add() in call to struct_size()
	mlxsw: Use size_mul() in call to struct_size()
	tls: Only use data field in crypto completion function
	tls: Use size_add() in call to struct_size()
	tipc: Use size_add() in calls to struct_size()
	net: spider_net: Use size_add() in call to struct_size()
	net: ethernet: mtk_wed: fix EXT_INT_STATUS_RX_FBUF definitions for MT7986 SoC
	wifi: rtw88: debug: Fix the NULL vs IS_ERR() bug for debugfs_create_file()
	wifi: ath11k: fix boot failure with one MSI vector
	wifi: mt76: mt7603: rework/fix rx pse hang check
	wifi: mt76: mt7603: improve watchdog reset reliablity
	wifi: mt76: mt7603: improve stuck beacon handling
	wifi: mt76: mt7915: fix beamforming availability check
	wifi: ath: dfs_pattern_detector: Fix a memory initialization issue
	tcp_metrics: add missing barriers on delete
	tcp_metrics: properly set tp->snd_ssthresh in tcp_init_metrics()
	tcp_metrics: do not create an entry from tcp_init_metrics()
	wifi: rtlwifi: fix EDCA limit set by BT coexistence
	ACPI: property: Allow _DSD buffer data only for byte accessors
	ACPI: video: Add acpi_backlight=vendor quirk for Toshiba Portégé R100
	wifi: ath11k: fix Tx power value during active CAC
	can: dev: can_restart(): don't crash kernel if carrier is OK
	can: dev: can_restart(): fix race condition between controller restart and netif_carrier_on()
	can: dev: can_put_echo_skb(): don't crash kernel if can_priv::echo_skb is accessed out of bounds
	PM / devfreq: rockchip-dfi: Make pmu regmap mandatory
	wifi: wfx: fix case where rates are out of order
	netfilter: nf_tables: Drop pointless memset when dumping rules
	thermal: core: prevent potential string overflow
	r8169: use tp_to_dev instead of open code
	r8169: fix rare issue with broken rx after link-down on RTL8125
	selftests: netfilter: test for sctp collision processing in nf_conntrack
	net: skb_find_text: Ignore patterns extending past 'to'
	chtls: fix tp->rcv_tstamp initialization
	tcp: fix cookie_init_timestamp() overflows
	wifi: iwlwifi: call napi_synchronize() before freeing rx/tx queues
	wifi: iwlwifi: pcie: synchronize IRQs before NAPI
	wifi: iwlwifi: empty overflow queue during flush
	Bluetooth: hci_sync: Fix Opcode prints in bt_dev_dbg/err
	bpf: Fix unnecessary -EBUSY from htab_lock_bucket
	ACPI: sysfs: Fix create_pnp_modalias() and create_of_modalias()
	ipv6: avoid atomic fragment on GSO packets
	net: add DEV_STATS_READ() helper
	ipvlan: properly track tx_errors
	regmap: debugfs: Fix a erroneous check after snprintf()
	spi: tegra: Fix missing IRQ check in tegra_slink_probe()
	clk: qcom: gcc-msm8996: Remove RPM bus clocks
	clk: qcom: clk-rcg2: Fix clock rate overflow for high parent frequencies
	clk: qcom: mmcc-msm8998: Don't check halt bit on some branch clks
	clk: qcom: mmcc-msm8998: Fix the SMMU GDSC
	clk: qcom: gcc-sm8150: Fix gcc_sdcc2_apps_clk_src
	regulator: mt6358: Fail probe on unknown chip ID
	clk: imx: Select MXC_CLK for CLK_IMX8QXP
	clk: imx: imx8mq: correct error handling path
	clk: imx: imx8qxp: Fix elcdif_pll clock
	clk: renesas: rcar-gen3: Extend SDnH divider table
	clk: renesas: rzg2l: Wait for status bit of SD mux before continuing
	clk: renesas: rzg2l: Lock around writes to mux register
	clk: renesas: rzg2l: Trust value returned by hardware
	clk: renesas: rzg2l: Use FIELD_GET() for PLL register fields
	clk: renesas: rzg2l: Fix computation formula
	clk: linux/clk-provider.h: fix kernel-doc warnings and typos
	spi: nxp-fspi: use the correct ioremap function
	clk: keystone: pll: fix a couple NULL vs IS_ERR() checks
	clk: ti: change ti_clk_register[_omap_hw]() API
	clk: ti: fix double free in of_ti_divider_clk_setup()
	clk: npcm7xx: Fix incorrect kfree
	clk: mediatek: clk-mt6765: Add check for mtk_alloc_clk_data
	clk: mediatek: clk-mt6779: Add check for mtk_alloc_clk_data
	clk: mediatek: clk-mt6797: Add check for mtk_alloc_clk_data
	clk: mediatek: clk-mt7629-eth: Add check for mtk_alloc_clk_data
	clk: mediatek: clk-mt7629: Add check for mtk_alloc_clk_data
	clk: mediatek: clk-mt2701: Add check for mtk_alloc_clk_data
	clk: qcom: config IPQ_APSS_6018 should depend on QCOM_SMEM
	platform/x86: wmi: Fix probe failure when failing to register WMI devices
	platform/x86: wmi: Fix opening of char device
	hwmon: (axi-fan-control) Fix possible NULL pointer dereference
	hwmon: (coretemp) Fix potentially truncated sysfs attribute name
	Revert "hwmon: (sch56xx-common) Add DMI override table"
	Revert "hwmon: (sch56xx-common) Add automatic module loading on supported devices"
	hwmon: (sch5627) Use bit macros when accessing the control register
	hwmon: (sch5627) Disallow write access if virtual registers are locked
	hte: tegra: Fix missing error code in tegra_hte_test_probe()
	drm/rockchip: vop: Fix reset of state in duplicate state crtc funcs
	drm/rockchip: vop: Fix call to crtc reset helper
	drm/rockchip: vop2: Don't crash for invalid duplicate_state
	drm/rockchip: vop2: Add missing call to crtc reset helper
	drm/radeon: possible buffer overflow
	drm: bridge: it66121: Fix invalid connector dereference
	drm/bridge: lt8912b: Add hot plug detection
	drm/bridge: lt8912b: Fix bridge_detach
	drm/bridge: lt8912b: Fix crash on bridge detach
	drm/bridge: lt8912b: Manually disable HPD only if it was enabled
	drm/bridge: lt8912b: Add missing drm_bridge_attach call
	drm/bridge: tc358768: Fix use of uninitialized variable
	drm/bridge: tc358768: Fix bit updates
	drm/bridge: tc358768: remove unused variable
	drm/bridge: tc358768: Use struct videomode
	drm/bridge: tc358768: Print logical values, not raw register values
	drm/bridge: tc358768: Use dev for dbg prints, not priv->dev
	drm/bridge: tc358768: Rename dsibclk to hsbyteclk
	drm/bridge: tc358768: Clean up clock period code
	drm/bridge: tc358768: Fix tc358768_ns_to_cnt()
	drm/amdkfd: fix some race conditions in vram buffer alloc/free of svm code
	drm/amd/display: Check all enabled planes in dm_check_crtc_cursor
	drm/amd/display: Refactor dm_get_plane_scale helper
	drm/amd/display: Bail from dm_check_crtc_cursor if no relevant change
	io_uring/kbuf: Fix check of BID wrapping in provided buffers
	io_uring/kbuf: Allow the full buffer id space for provided buffers
	drm/mediatek: Fix iommu fault by swapping FBs after updating plane state
	drm/mediatek: Fix iommu fault during crtc enabling
	drm/rockchip: cdn-dp: Fix some error handling paths in cdn_dp_probe()
	gpu: host1x: Correct allocated size for contexts
	drm/bridge: lt9611uxc: fix the race in the error path
	arm64/arm: xen: enlighten: Fix KPTI checks
	drm/rockchip: Fix type promotion bug in rockchip_gem_iommu_map()
	xenbus: fix error exit in xenbus_init()
	xen-pciback: Consider INTx disabled when MSI/MSI-X is enabled
	drm/msm/dsi: use msm_gem_kernel_put to free TX buffer
	drm/msm/dsi: free TX buffer in unbind
	clocksource/drivers/arm_arch_timer: limit XGene-1 workaround
	drm: mediatek: mtk_dsi: Fix NO_EOT_PACKET settings/handling
	drivers/perf: hisi: use cpuhp_state_remove_instance_nocalls() for hisi_hns3_pmu uninit process
	perf/arm-cmn: Revamp model detection
	perf/arm-cmn: Fix DTC domain detection
	drivers/perf: hisi_pcie: Check the type first in pmu::event_init()
	perf: hisi: Fix use-after-free when register pmu fails
	ARM: dts: renesas: blanche: Fix typo in GP_11_2 pin name
	arm64: dts: qcom: sdm845: cheza doesn't support LMh node
	arm64: dts: qcom: sc7280: link usb3_phy_wrapper_gcc_usb30_pipe_clk
	arm64: dts: qcom: msm8916: Fix iommu local address range
	arm64: dts: qcom: msm8992-libra: drop duplicated reserved memory
	arm64: dts: qcom: sc7280: Add missing LMH interrupts
	arm64: dts: qcom: sm8150: add ref clock to PCIe PHYs
	arm64: dts: qcom: sm8350: fix pinctrl for UART18
	arm64: dts: qcom: sdm845-mtp: fix WiFi configuration
	ARM64: dts: marvell: cn9310: Use appropriate label for spi1 pins
	arm64: dts: qcom: apq8016-sbc: Add missing ADV7533 regulators
	ARM: dts: qcom: mdm9615: populate vsdcc fixed regulator
	soc: qcom: llcc: Handle a second device without data corruption
	kunit: Fix missed memory release in kunit_free_suite_set()
	firmware: ti_sci: Mark driver as non removable
	arm64: dts: ti: k3-am62a7-sk: Drop i2c-1 to 100Khz
	firmware: arm_ffa: Assign the missing IDR allocation ID to the FFA device
	firmware: arm_ffa: Allow the FF-A drivers to use 32bit mode of messaging
	ARM: dts: am3517-evm: Fix LED3/4 pinmux
	clk: scmi: Free scmi_clk allocated when the clocks with invalid info are skipped
	arm64: dts: imx8qm-ss-img: Fix jpegenc compatible entry
	arm64: dts: imx8mm: Add sound-dai-cells to micfil node
	arm64: dts: imx8mn: Add sound-dai-cells to micfil node
	arm64: tegra: Use correct interrupts for Tegra234 TKE
	selftests/pidfd: Fix ksft print formats
	selftests/resctrl: Ensure the benchmark commands fits to its array
	module/decompress: use vmalloc() for gzip decompression workspace
	ASoC: cs35l41: Verify PM runtime resume errors in IRQ handler
	ASoC: cs35l41: Undo runtime PM changes at driver exit time
	ALSA: hda: cs35l41: Fix unbalanced pm_runtime_get()
	ALSA: hda: cs35l41: Undo runtime PM changes at driver exit time
	KEYS: Include linux/errno.h in linux/verification.h
	crypto: hisilicon/hpre - Fix a erroneous check after snprintf()
	hwrng: bcm2835 - Fix hwrng throughput regression
	hwrng: geode - fix accessing registers
	RDMA/core: Use size_{add,sub,mul}() in calls to struct_size()
	crypto: qat - ignore subsequent state up commands
	crypto: qat - relocate bufferlist logic
	crypto: qat - rename bufferlist functions
	crypto: qat - change bufferlist logic interface
	crypto: qat - generalize crypto request buffers
	crypto: qat - extend buffer list interface
	crypto: qat - fix unregistration of crypto algorithms
	scsi: ibmvfc: Fix erroneous use of rtas_busy_delay with hcall return code
	libnvdimm/of_pmem: Use devm_kstrdup instead of kstrdup and check its return value
	nd_btt: Make BTT lanes preemptible
	crypto: caam/qi2 - fix Chacha20 + Poly1305 self test failure
	crypto: caam/jr - fix Chacha20 + Poly1305 self test failure
	crypto: qat - increase size of buffers
	PCI: vmd: Correct PCI Header Type Register's multi-function check
	hid: cp2112: Fix duplicate workqueue initialization
	crypto: hisilicon/qm - delete redundant null assignment operations
	crypto: hisilicon/qm - modify the process of regs dfx
	crypto: hisilicon/qm - split a debugfs.c from qm
	crypto: hisilicon/qm - fix PF queue parameter issue
	ARM: 9321/1: memset: cast the constant byte to unsigned char
	ext4: move 'ix' sanity check to corrent position
	ASoC: fsl: mpc5200_dma.c: Fix warning of Function parameter or member not described
	IB/mlx5: Fix rdma counter binding for RAW QP
	RDMA/hns: Fix printing level of asynchronous events
	RDMA/hns: Fix uninitialized ucmd in hns_roce_create_qp_common()
	RDMA/hns: Fix signed-unsigned mixed comparisons
	RDMA/hns: Add check for SL
	RDMA/hns: The UD mode can only be configured with DCQCN
	ASoC: SOF: core: Ensure sof_ops_free() is still called when probe never ran.
	ASoC: fsl: Fix PM disable depth imbalance in fsl_easrc_probe
	scsi: ufs: core: Leave space for '\0' in utf8 desc string
	RDMA/hfi1: Workaround truncation compilation error
	HID: cp2112: Make irq_chip immutable
	hid: cp2112: Fix IRQ shutdown stopping polling for all IRQs on chip
	sh: bios: Revive earlyprintk support
	Revert "HID: logitech-hidpp: add a module parameter to keep firmware gestures"
	HID: logitech-hidpp: Remove HIDPP_QUIRK_NO_HIDINPUT quirk
	HID: logitech-hidpp: Don't restart IO, instead defer hid_connect() only
	HID: logitech-hidpp: Revert "Don't restart communication if not necessary"
	HID: logitech-hidpp: Move get_wireless_feature_index() check to hidpp_connect_event()
	ASoC: Intel: Skylake: Fix mem leak when parsing UUIDs fails
	padata: Fix refcnt handling in padata_free_shell()
	crypto: qat - fix deadlock in backlog processing
	ASoC: ams-delta.c: use component after check
	IB/mlx5: Fix init stage error handling to avoid double free of same QP and UAF
	mfd: core: Un-constify mfd_cell.of_reg
	mfd: core: Ensure disabled devices are skipped without aborting
	mfd: dln2: Fix double put in dln2_probe
	dt-bindings: mfd: mt6397: Add binding for MT6357
	dt-bindings: mfd: mt6397: Split out compatible for MediaTek MT6366 PMIC
	mfd: arizona-spi: Set pdata.hpdet_channel for ACPI enumerated devs
	leds: turris-omnia: Drop unnecessary mutex locking
	leds: turris-omnia: Do not use SMBUS calls
	leds: pwm: Don't disable the PWM when the LED should be off
	leds: trigger: ledtrig-cpu:: Fix 'output may be truncated' issue for 'cpu'
	kunit: add macro to allow conditionally exposing static symbols to tests
	apparmor: test: make static symbols visible during kunit testing
	apparmor: fix invalid reference on profile->disconnected
	perf stat: Fix aggr mode initialization
	iio: frequency: adf4350: Use device managed functions and fix power down issue.
	perf kwork: Fix incorrect and missing free atom in work_push_atom()
	perf kwork: Add the supported subcommands to the document
	perf kwork: Set ordered_events to true in 'struct perf_tool'
	filemap: add filemap_get_folios_tag()
	f2fs: convert f2fs_write_cache_pages() to use filemap_get_folios_tag()
	f2fs: compress: fix deadloop in f2fs_write_cache_pages()
	f2fs: compress: fix to avoid use-after-free on dic
	f2fs: compress: fix to avoid redundant compress extension
	tty: tty_jobctrl: fix pid memleak in disassociate_ctty()
	livepatch: Fix missing newline character in klp_resolve_symbols()
	pinctrl: renesas: rzg2l: Make reverse order of enable() for disable()
	perf record: Fix BTF type checks in the off-cpu profiling
	dmaengine: idxd: Register dsa_bus_type before registering idxd sub-drivers
	usb: dwc2: fix possible NULL pointer dereference caused by driver concurrency
	usb: chipidea: Fix DMA overwrite for Tegra
	usb: chipidea: Simplify Tegra DMA alignment code
	dmaengine: ti: edma: handle irq_of_parse_and_map() errors
	misc: st_core: Do not call kfree_skb() under spin_lock_irqsave()
	tools: iio: iio_generic_buffer ensure alignment
	USB: usbip: fix stub_dev hub disconnect
	dmaengine: pxa_dma: Remove an erroneous BUG_ON() in pxad_free_desc()
	f2fs: fix to initialize map.m_pblk in f2fs_precache_extents()
	interconnect: qcom: sc7180: Retire DEFINE_QBCM
	interconnect: qcom: sc7180: Set ACV enable_mask
	interconnect: qcom: sc7280: Set ACV enable_mask
	interconnect: qcom: sc8180x: Set ACV enable_mask
	interconnect: qcom: sc8280xp: Set ACV enable_mask
	interconnect: qcom: sdm845: Retire DEFINE_QBCM
	interconnect: qcom: sdm845: Set ACV enable_mask
	interconnect: qcom: sm6350: Retire DEFINE_QBCM
	interconnect: qcom: sm6350: Set ACV enable_mask
	interconnect: move ignore_list out of of_count_icc_providers()
	interconnect: qcom: sm8150: Drop IP0 interconnects
	interconnect: qcom: sm8150: Retire DEFINE_QBCM
	interconnect: qcom: sm8150: Set ACV enable_mask
	interconnect: qcom: sm8350: Retire DEFINE_QBCM
	interconnect: qcom: sm8350: Set ACV enable_mask
	powerpc: Only define __parse_fpscr() when required
	modpost: fix tee MODULE_DEVICE_TABLE built on big-endian host
	modpost: fix ishtp MODULE_DEVICE_TABLE built on big-endian host
	powerpc/40x: Remove stale PTE_ATOMIC_UPDATES macro
	powerpc/xive: Fix endian conversion size
	powerpc/vas: Limit open window failure messages in log bufffer
	powerpc/imc-pmu: Use the correct spinlock initializer.
	powerpc/pseries: fix potential memory leak in init_cpu_associativity()
	xhci: Loosen RPM as default policy to cover for AMD xHC 1.1
	usb: host: xhci-plat: fix possible kernel oops while resuming
	perf machine: Avoid out of bounds LBR memory read
	perf hist: Add missing puts to hist__account_cycles
	9p/net: fix possible memory leak in p9_check_errors()
	i3c: Fix potential refcount leak in i3c_master_register_new_i3c_devs
	cxl/mem: Fix shutdown order
	crypto: ccp - Name -1 return value as SEV_RET_NO_FW_CALL
	x86/sev: Change snp_guest_issue_request()'s fw_err argument
	virt: sevguest: Fix passing a stack buffer as a scatterlist target
	rtc: pcf85363: fix wrong mask/val parameters in regmap_update_bits call
	pcmcia: cs: fix possible hung task and memory leak pccardd()
	pcmcia: ds: fix refcount leak in pcmcia_device_add()
	pcmcia: ds: fix possible name leak in error path in pcmcia_device_add()
	media: hantro: Check whether reset op is defined before use
	media: verisilicon: Do not enable G2 postproc downscale if source is narrower than destination
	media: ov5640: Drop dead code using frame_interval
	media: ov5640: fix vblank unchange issue when work at dvp mode
	media: i2c: max9286: Fix some redundant of_node_put() calls
	media: ov5640: Fix a memory leak when ov5640_probe fails
	media: bttv: fix use after free error due to btv->timeout timer
	media: amphion: handle firmware debug message
	media: mtk-jpegenc: Fix bug in JPEG encode quality selection
	media: s3c-camif: Avoid inappropriate kfree()
	media: vidtv: psi: Add check for kstrdup
	media: vidtv: mux: Add check and kfree for kstrdup
	media: cedrus: Fix clock/reset sequence
	media: cadence: csi2rx: Unregister v4l2 async notifier
	media: dvb-usb-v2: af9035: fix missing unlock
	media: cec: meson: always include meson sub-directory in Makefile
	regmap: prevent noinc writes from clobbering cache
	pwm: sti: Reduce number of allocations and drop usage of chip_data
	pwm: brcmstb: Utilize appropriate clock APIs in suspend/resume
	Input: synaptics-rmi4 - fix use after free in rmi_unregister_function()
	watchdog: ixp4xx: Make sure restart always works
	llc: verify mac len before reading mac header
	hsr: Prevent use after free in prp_create_tagged_frame()
	tipc: Change nla_policy for bearer-related names to NLA_NUL_STRING
	bpf: Check map->usercnt after timer->timer is assigned
	inet: shrink struct flowi_common
	octeontx2-pf: Fix error codes
	octeontx2-pf: Fix holes in error code
	net: page_pool: add missing free_percpu when page_pool_init fail
	dccp: Call security_inet_conn_request() after setting IPv4 addresses.
	dccp/tcp: Call security_inet_conn_request() after setting IPv6 addresses.
	net: r8169: Disable multicast filter for RTL8168H and RTL8107E
	Fix termination state for idr_for_each_entry_ul()
	net: stmmac: xgmac: Enable support for multiple Flexible PPS outputs
	selftests: pmtu.sh: fix result checking
	octeontx2-pf: Rename tot_tx_queues to non_qos_queues
	octeontx2-pf: qos send queues management
	octeontx2-pf: Free pending and dropped SQEs
	net/smc: fix dangling sock under state SMC_APPFINCLOSEWAIT
	net/smc: allow cdc msg send rather than drop it with NULL sndbuf_desc
	net/smc: put sk reference if close work was canceled
	nvme: fix error-handling for io_uring nvme-passthrough
	tg3: power down device only on SYSTEM_POWER_OFF
	nbd: fix uaf in nbd_open
	blk-core: use pr_warn_ratelimited() in bio_check_ro()
	virtio/vsock: replace virtio_vsock_pkt with sk_buff
	vsock/virtio: remove socket from connected/bound list on shutdown
	r8169: respect userspace disabling IFF_MULTICAST
	i2c: iproc: handle invalid slave state
	netfilter: xt_recent: fix (increase) ipv6 literal buffer length
	netfilter: nft_redir: use `struct nf_nat_range2` throughout and deduplicate eval call-backs
	netfilter: nat: fix ipv6 nat redirect with mapped and scoped addresses
	RISC-V: Don't fail in riscv_of_parent_hartid() for disabled HARTs
	drm/syncobj: fix DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE
	ASoC: mediatek: mt8186_mt6366_rt1019_rt5682s: trivial: fix error messages
	ASoC: hdmi-codec: register hpd callback on component probe
	ASoC: dapm: fix clock get name
	spi: spi-zynq-qspi: add spi-mem to driver kconfig dependencies
	fbdev: imsttfb: Fix error path of imsttfb_probe()
	fbdev: imsttfb: fix a resource leak in probe
	fbdev: fsl-diu-fb: mark wr_reg_wa() static
	tracing/kprobes: Fix the order of argument descriptions
	io_uring/net: ensure socket is marked connected on connect retry
	x86/amd_nb: Use Family 19h Models 60h-7Fh Function 4 IDs
	Revert "mmc: core: Capture correct oemid-bits for eMMC cards"
	btrfs: use u64 for buffer sizes in the tree search ioctls
	wifi: cfg80211: fix kernel-doc for wiphy_delayed_work_flush()
	virtio/vsock: don't use skbuff state to account credit
	virtio/vsock: remove redundant 'skb_pull()' call
	virtio/vsock: don't drop skbuff on copy failure
	vsock/loopback: use only sk_buff_head.lock to protect the packet queue
	virtio/vsock: fix leaks due to missing skb owner
	virtio/vsock: Fix uninit-value in virtio_transport_recv_pkt()
	virtio/vsock: fix header length on skb merging
	Linux 6.1.63

Change-Id: I87b7a539b11c90cfaf16edb07d613f74d54458a4
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2023-11-27 16:59:46 +00:00

864 lines
26 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* mm/readahead.c - address_space-level file readahead.
*
* Copyright (C) 2002, Linus Torvalds
*
* 09Apr2002 Andrew Morton
* Initial version.
*/
/**
* DOC: Readahead Overview
*
* Readahead is used to read content into the page cache before it is
* explicitly requested by the application. Readahead only ever
* attempts to read folios that are not yet in the page cache. If a
* folio is present but not up-to-date, readahead will not try to read
* it. In that case a simple ->read_folio() will be requested.
*
* Readahead is triggered when an application read request (whether a
* system call or a page fault) finds that the requested folio is not in
* the page cache, or that it is in the page cache and has the
* readahead flag set. This flag indicates that the folio was read
* as part of a previous readahead request and now that it has been
* accessed, it is time for the next readahead.
*
* Each readahead request is partly synchronous read, and partly async
* readahead. This is reflected in the struct file_ra_state which
* contains ->size being the total number of pages, and ->async_size
* which is the number of pages in the async section. The readahead
* flag will be set on the first folio in this async section to trigger
* a subsequent readahead. Once a series of sequential reads has been
* established, there should be no need for a synchronous component and
* all readahead request will be fully asynchronous.
*
* When either of the triggers causes a readahead, three numbers need
* to be determined: the start of the region to read, the size of the
* region, and the size of the async tail.
*
* The start of the region is simply the first page address at or after
* the accessed address, which is not currently populated in the page
* cache. This is found with a simple search in the page cache.
*
* The size of the async tail is determined by subtracting the size that
* was explicitly requested from the determined request size, unless
* this would be less than zero - then zero is used. NOTE THIS
* CALCULATION IS WRONG WHEN THE START OF THE REGION IS NOT THE ACCESSED
* PAGE. ALSO THIS CALCULATION IS NOT USED CONSISTENTLY.
*
* The size of the region is normally determined from the size of the
* previous readahead which loaded the preceding pages. This may be
* discovered from the struct file_ra_state for simple sequential reads,
* or from examining the state of the page cache when multiple
* sequential reads are interleaved. Specifically: where the readahead
* was triggered by the readahead flag, the size of the previous
* readahead is assumed to be the number of pages from the triggering
* page to the start of the new readahead. In these cases, the size of
* the previous readahead is scaled, often doubled, for the new
* readahead, though see get_next_ra_size() for details.
*
* If the size of the previous read cannot be determined, the number of
* preceding pages in the page cache is used to estimate the size of
* a previous read. This estimate could easily be misled by random
* reads being coincidentally adjacent, so it is ignored unless it is
* larger than the current request, and it is not scaled up, unless it
* is at the start of file.
*
* In general readahead is accelerated at the start of the file, as
* reads from there are often sequential. There are other minor
* adjustments to the readahead size in various special cases and these
* are best discovered by reading the code.
*
* The above calculation, based on the previous readahead size,
* determines the size of the readahead, to which any requested read
* size may be added.
*
* Readahead requests are sent to the filesystem using the ->readahead()
* address space operation, for which mpage_readahead() is a canonical
* implementation. ->readahead() should normally initiate reads on all
* folios, but may fail to read any or all folios without causing an I/O
* error. The page cache reading code will issue a ->read_folio() request
* for any folio which ->readahead() did not read, and only an error
* from this will be final.
*
* ->readahead() will generally call readahead_folio() repeatedly to get
* each folio from those prepared for readahead. It may fail to read a
* folio by:
*
* * not calling readahead_folio() sufficiently many times, effectively
* ignoring some folios, as might be appropriate if the path to
* storage is congested.
*
* * failing to actually submit a read request for a given folio,
* possibly due to insufficient resources, or
*
* * getting an error during subsequent processing of a request.
*
* In the last two cases, the folio should be unlocked by the filesystem
* to indicate that the read attempt has failed. In the first case the
* folio will be unlocked by the VFS.
*
* Those folios not in the final ``async_size`` of the request should be
* considered to be important and ->readahead() should not fail them due
* to congestion or temporary resource unavailability, but should wait
* for necessary resources (e.g. memory or indexing information) to
* become available. Folios in the final ``async_size`` may be
* considered less urgent and failure to read them is more acceptable.
* In this case it is best to use filemap_remove_folio() to remove the
* folios from the page cache as is automatically done for folios that
* were not fetched with readahead_folio(). This will allow a
* subsequent synchronous readahead request to try them again. If they
* are left in the page cache, then they will be read individually using
* ->read_folio() which may be less efficient.
*/
#include <linux/blkdev.h>
#include <linux/kernel.h>
#include <linux/dax.h>
#include <linux/gfp.h>
#include <linux/export.h>
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/pagevec.h>
#include <linux/pagemap.h>
#include <linux/psi.h>
#include <linux/syscalls.h>
#include <linux/file.h>
#include <linux/mm_inline.h>
#include <linux/blk-cgroup.h>
#include <linux/fadvise.h>
#include <linux/sched/mm.h>
#include <trace/hooks/mm.h>
#include "internal.h"
/*
* Initialise a struct file's readahead state. Assumes that the caller has
* memset *ra to zero.
*/
void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
{
ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
ra->prev_pos = -1;
}
EXPORT_SYMBOL_GPL(file_ra_state_init);
gfp_t readahead_gfp_mask(struct address_space *x)
{
gfp_t mask = __readahead_gfp_mask(x);
trace_android_rvh_set_readahead_gfp_mask(&mask);
return mask;
}
EXPORT_SYMBOL_GPL(readahead_gfp_mask);
static void read_pages(struct readahead_control *rac)
{
const struct address_space_operations *aops = rac->mapping->a_ops;
struct folio *folio;
struct blk_plug plug;
if (!readahead_count(rac))
return;
if (unlikely(rac->_workingset))
psi_memstall_enter(&rac->_pflags);
blk_start_plug(&plug);
if (aops->readahead) {
aops->readahead(rac);
/*
* Clean up the remaining folios. The sizes in ->ra
* may be used to size the next readahead, so make sure
* they accurately reflect what happened.
*/
while ((folio = readahead_folio(rac)) != NULL) {
unsigned long nr = folio_nr_pages(folio);
folio_get(folio);
rac->ra->size -= nr;
if (rac->ra->async_size >= nr) {
rac->ra->async_size -= nr;
filemap_remove_folio(folio);
}
folio_unlock(folio);
folio_put(folio);
}
} else {
while ((folio = readahead_folio(rac)) != NULL)
aops->read_folio(rac->file, folio);
}
blk_finish_plug(&plug);
if (unlikely(rac->_workingset))
psi_memstall_leave(&rac->_pflags);
rac->_workingset = false;
BUG_ON(readahead_count(rac));
}
/**
* page_cache_ra_unbounded - Start unchecked readahead.
* @ractl: Readahead control.
* @nr_to_read: The number of pages to read.
* @lookahead_size: Where to start the next readahead.
*
* This function is for filesystems to call when they want to start
* readahead beyond a file's stated i_size. This is almost certainly
* not the function you want to call. Use page_cache_async_readahead()
* or page_cache_sync_readahead() instead.
*
* Context: File is referenced by caller. Mutexes may be held by caller.
* May sleep, but will not reenter filesystem to reclaim memory.
*/
void page_cache_ra_unbounded(struct readahead_control *ractl,
unsigned long nr_to_read, unsigned long lookahead_size)
{
struct address_space *mapping = ractl->mapping;
unsigned long index = readahead_index(ractl);
gfp_t gfp_mask = readahead_gfp_mask(mapping);
unsigned long i;
/*
* Partway through the readahead operation, we will have added
* locked pages to the page cache, but will not yet have submitted
* them for I/O. Adding another page may need to allocate memory,
* which can trigger memory reclaim. Telling the VM we're in
* the middle of a filesystem operation will cause it to not
* touch file-backed pages, preventing a deadlock. Most (all?)
* filesystems already specify __GFP_NOFS in their mapping's
* gfp_mask, but let's be explicit here.
*/
unsigned int nofs = memalloc_nofs_save();
filemap_invalidate_lock_shared(mapping);
/*
* Preallocate as many pages as we will need.
*/
for (i = 0; i < nr_to_read; i++) {
struct folio *folio = xa_load(&mapping->i_pages, index + i);
if (folio && !xa_is_value(folio)) {
/*
* Page already present? Kick off the current batch
* of contiguous pages before continuing with the
* next batch. This page may be the one we would
* have intended to mark as Readahead, but we don't
* have a stable reference to this page, and it's
* not worth getting one just for that.
*/
read_pages(ractl);
ractl->_index++;
i = ractl->_index + ractl->_nr_pages - index - 1;
continue;
}
folio = filemap_alloc_folio(gfp_mask, 0);
if (!folio)
break;
if (filemap_add_folio(mapping, folio, index + i,
gfp_mask) < 0) {
folio_put(folio);
read_pages(ractl);
ractl->_index++;
i = ractl->_index + ractl->_nr_pages - index - 1;
continue;
}
if (i == nr_to_read - lookahead_size)
folio_set_readahead(folio);
ractl->_workingset |= folio_test_workingset(folio);
ractl->_nr_pages++;
}
/*
* Now start the IO. We ignore I/O errors - if the folio is not
* uptodate then the caller will launch read_folio again, and
* will then handle the error.
*/
read_pages(ractl);
filemap_invalidate_unlock_shared(mapping);
memalloc_nofs_restore(nofs);
}
EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
/*
* do_page_cache_ra() actually reads a chunk of disk. It allocates
* the pages first, then submits them for I/O. This avoids the very bad
* behaviour which would occur if page allocations are causing VM writeback.
* We really don't want to intermingle reads and writes like that.
*/
static void do_page_cache_ra(struct readahead_control *ractl,
unsigned long nr_to_read, unsigned long lookahead_size)
{
struct inode *inode = ractl->mapping->host;
unsigned long index = readahead_index(ractl);
loff_t isize = i_size_read(inode);
pgoff_t end_index; /* The last page we want to read */
if (isize == 0)
return;
end_index = (isize - 1) >> PAGE_SHIFT;
if (index > end_index)
return;
/* Don't read past the page containing the last byte of the file */
if (nr_to_read > end_index - index)
nr_to_read = end_index - index + 1;
page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
}
/*
* Chunk the readahead into 2 megabyte units, so that we don't pin too much
* memory at once.
*/
void force_page_cache_ra(struct readahead_control *ractl,
unsigned long nr_to_read)
{
struct address_space *mapping = ractl->mapping;
struct file_ra_state *ra = ractl->ra;
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
unsigned long max_pages, index;
if (unlikely(!mapping->a_ops->read_folio && !mapping->a_ops->readahead))
return;
/*
* If the request exceeds the readahead window, allow the read to
* be up to the optimal hardware IO size
*/
index = readahead_index(ractl);
max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
while (nr_to_read) {
unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
if (this_chunk > nr_to_read)
this_chunk = nr_to_read;
ractl->_index = index;
do_page_cache_ra(ractl, this_chunk, 0);
index += this_chunk;
nr_to_read -= this_chunk;
}
}
/*
* Set the initial window size, round to next power of 2 and square
* for small size, x 4 for medium, and x 2 for large
* for 128k (32 page) max ra
* 1-2 page = 16k, 3-4 page 32k, 5-8 page = 64k, > 8 page = 128k initial
*/
static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
{
unsigned long newsize = roundup_pow_of_two(size);
if (newsize <= max / 32)
newsize = newsize * 4;
else if (newsize <= max / 4)
newsize = newsize * 2;
else
newsize = max;
return newsize;
}
/*
* Get the previous window size, ramp it up, and
* return it as the new window size.
*/
static unsigned long get_next_ra_size(struct file_ra_state *ra,
unsigned long max)
{
unsigned long cur = ra->size;
if (cur < max / 16)
return 4 * cur;
if (cur <= max / 2)
return 2 * cur;
return max;
}
/*
* On-demand readahead design.
*
* The fields in struct file_ra_state represent the most-recently-executed
* readahead attempt:
*
* |<----- async_size ---------|
* |------------------- size -------------------->|
* |==================#===========================|
* ^start ^page marked with PG_readahead
*
* To overlap application thinking time and disk I/O time, we do
* `readahead pipelining': Do not wait until the application consumed all
* readahead pages and stalled on the missing page at readahead_index;
* Instead, submit an asynchronous readahead I/O as soon as there are
* only async_size pages left in the readahead window. Normally async_size
* will be equal to size, for maximum pipelining.
*
* In interleaved sequential reads, concurrent streams on the same fd can
* be invalidating each other's readahead state. So we flag the new readahead
* page at (start+size-async_size) with PG_readahead, and use it as readahead
* indicator. The flag won't be set on already cached pages, to avoid the
* readahead-for-nothing fuss, saving pointless page cache lookups.
*
* prev_pos tracks the last visited byte in the _previous_ read request.
* It should be maintained by the caller, and will be used for detecting
* small random reads. Note that the readahead algorithm checks loosely
* for sequential patterns. Hence interleaved reads might be served as
* sequential ones.
*
* There is a special-case: if the first page which the application tries to
* read happens to be the first page of the file, it is assumed that a linear
* read is about to happen and the window is immediately set to the initial size
* based on I/O request size and the max_readahead.
*
* The code ramps up the readahead size aggressively at first, but slow down as
* it approaches max_readhead.
*/
/*
* Count contiguously cached pages from @index-1 to @index-@max,
* this count is a conservative estimation of
* - length of the sequential read sequence, or
* - thrashing threshold in memory tight systems
*/
static pgoff_t count_history_pages(struct address_space *mapping,
pgoff_t index, unsigned long max)
{
pgoff_t head;
rcu_read_lock();
head = page_cache_prev_miss(mapping, index - 1, max);
rcu_read_unlock();
return index - 1 - head;
}
/*
* page cache context based readahead
*/
static int try_context_readahead(struct address_space *mapping,
struct file_ra_state *ra,
pgoff_t index,
unsigned long req_size,
unsigned long max)
{
pgoff_t size;
size = count_history_pages(mapping, index, max);
/*
* not enough history pages:
* it could be a random read
*/
if (size <= req_size)
return 0;
/*
* starts from beginning of file:
* it is a strong indication of long-run stream (or whole-file-read)
*/
if (size >= index)
size *= 2;
ra->start = index;
ra->size = min(size + req_size, max);
ra->async_size = 1;
return 1;
}
/*
* There are some parts of the kernel which assume that PMD entries
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
* limit the maximum allocation order to PMD size. I'm not aware of any
* assumptions about maximum order if THP are disabled, but 8 seems like
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
#define MAX_PAGECACHE_ORDER 8
#endif
static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
pgoff_t mark, unsigned int order, gfp_t gfp)
{
int err;
struct folio *folio = filemap_alloc_folio(gfp, order);
if (!folio)
return -ENOMEM;
mark = round_up(mark, 1UL << order);
if (index == mark)
folio_set_readahead(folio);
err = filemap_add_folio(ractl->mapping, folio, index, gfp);
if (err) {
folio_put(folio);
return err;
}
ractl->_nr_pages += 1UL << order;
ractl->_workingset |= folio_test_workingset(folio);
return 0;
}
void page_cache_ra_order(struct readahead_control *ractl,
struct file_ra_state *ra, unsigned int new_order)
{
struct address_space *mapping = ractl->mapping;
pgoff_t index = readahead_index(ractl);
pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
pgoff_t mark = index + ra->size - ra->async_size;
int err = 0;
gfp_t gfp = readahead_gfp_mask(mapping);
if (!mapping_large_folio_support(mapping) || ra->size < 4)
goto fallback;
limit = min(limit, index + ra->size - 1);
if (new_order < MAX_PAGECACHE_ORDER) {
new_order += 2;
if (new_order > MAX_PAGECACHE_ORDER)
new_order = MAX_PAGECACHE_ORDER;
while ((1 << new_order) > ra->size)
new_order--;
}
filemap_invalidate_lock_shared(mapping);
while (index <= limit) {
unsigned int order = new_order;
/* Align with smaller pages if needed */
if (index & ((1UL << order) - 1)) {
order = __ffs(index);
if (order == 1)
order = 0;
}
/* Don't allocate pages past EOF */
while (index + (1UL << order) - 1 > limit) {
if (--order == 1)
order = 0;
}
err = ra_alloc_folio(ractl, index, mark, order, gfp);
if (err)
break;
index += 1UL << order;
}
if (index > limit) {
ra->size += index - limit - 1;
ra->async_size += index - limit - 1;
}
read_pages(ractl);
filemap_invalidate_unlock_shared(mapping);
/*
* If there were already pages in the page cache, then we may have
* left some gaps. Let the regular readahead code take care of this
* situation.
*/
if (!err)
return;
fallback:
do_page_cache_ra(ractl, ra->size, ra->async_size);
}
/*
* A minimal readahead algorithm for trivial sequential/random reads.
*/
static void ondemand_readahead(struct readahead_control *ractl,
struct folio *folio, unsigned long req_size)
{
struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
struct file_ra_state *ra = ractl->ra;
unsigned long max_pages = ra->ra_pages;
unsigned long add_pages;
pgoff_t index = readahead_index(ractl);
pgoff_t expected, prev_index;
unsigned int order = folio ? folio_order(folio) : 0;
/*
* If the request exceeds the readahead window, allow the read to
* be up to the optimal hardware IO size
*/
if (req_size > max_pages && bdi->io_pages > max_pages)
max_pages = min(req_size, bdi->io_pages);
trace_android_vh_ra_tuning_max_page(ractl, &max_pages);
/*
* start of file
*/
if (!index)
goto initial_readahead;
/*
* It's the expected callback index, assume sequential access.
* Ramp up sizes, and push forward the readahead window.
*/
expected = round_up(ra->start + ra->size - ra->async_size,
1UL << order);
if (index == expected || index == (ra->start + ra->size)) {
ra->start += ra->size;
ra->size = get_next_ra_size(ra, max_pages);
ra->async_size = ra->size;
goto readit;
}
/*
* Hit a marked folio without valid readahead state.
* E.g. interleaved reads.
* Query the pagecache for async_size, which normally equals to
* readahead size. Ramp it up and use it as the new readahead size.
*/
if (folio) {
pgoff_t start;
rcu_read_lock();
start = page_cache_next_miss(ractl->mapping, index + 1,
max_pages);
rcu_read_unlock();
if (!start || start - index > max_pages)
return;
ra->start = start;
ra->size = start - index; /* old async_size */
ra->size += req_size;
ra->size = get_next_ra_size(ra, max_pages);
ra->async_size = ra->size;
goto readit;
}
/*
* oversize read
*/
if (req_size > max_pages)
goto initial_readahead;
/*
* sequential cache miss
* trivial case: (index - prev_index) == 1
* unaligned reads: (index - prev_index) == 0
*/
prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
if (index - prev_index <= 1UL)
goto initial_readahead;
/*
* Query the page cache and look for the traces(cached history pages)
* that a sequential stream would leave behind.
*/
if (try_context_readahead(ractl->mapping, ra, index, req_size,
max_pages))
goto readit;
/*
* standalone, small random read
* Read as is, and do not pollute the readahead state.
*/
do_page_cache_ra(ractl, req_size, 0);
return;
initial_readahead:
ra->start = index;
ra->size = get_init_ra_size(req_size, max_pages);
ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
readit:
/*
* Will this read hit the readahead marker made by itself?
* If so, trigger the readahead marker hit now, and merge
* the resulted next readahead window into the current one.
* Take care of maximum IO pages as above.
*/
if (index == ra->start && ra->size == ra->async_size) {
add_pages = get_next_ra_size(ra, max_pages);
if (ra->size + add_pages <= max_pages) {
ra->async_size = add_pages;
ra->size += add_pages;
} else {
ra->size = max_pages;
ra->async_size = max_pages >> 1;
}
}
ractl->_index = ra->start;
page_cache_ra_order(ractl, ra, order);
}
void page_cache_sync_ra(struct readahead_control *ractl,
unsigned long req_count)
{
bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
/*
* Even if readahead is disabled, issue this request as readahead
* as we'll need it to satisfy the requested range. The forced
* readahead will do the right thing and limit the read to just the
* requested range, which we'll set to 1 page for this case.
*/
if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
if (!ractl->file)
return;
req_count = 1;
do_forced_ra = true;
}
/* be dumb */
if (do_forced_ra) {
force_page_cache_ra(ractl, req_count);
return;
}
ondemand_readahead(ractl, NULL, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_sync_ra);
void page_cache_async_ra(struct readahead_control *ractl,
struct folio *folio, unsigned long req_count)
{
/* no readahead */
if (!ractl->ra->ra_pages)
return;
/*
* Same bit is used for PG_readahead and PG_reclaim.
*/
if (folio_test_writeback(folio))
return;
folio_clear_readahead(folio);
if (blk_cgroup_congested())
return;
ondemand_readahead(ractl, folio, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_async_ra);
ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
{
ssize_t ret;
struct fd f;
ret = -EBADF;
f = fdget(fd);
if (!f.file || !(f.file->f_mode & FMODE_READ))
goto out;
/*
* The readahead() syscall is intended to run only on files
* that can execute readahead. If readahead is not possible
* on this file, then we must return -EINVAL.
*/
ret = -EINVAL;
if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
(!S_ISREG(file_inode(f.file)->i_mode) &&
!S_ISBLK(file_inode(f.file)->i_mode)))
goto out;
ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
out:
fdput(f);
return ret;
}
SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
{
return ksys_readahead(fd, offset, count);
}
#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_READAHEAD)
COMPAT_SYSCALL_DEFINE4(readahead, int, fd, compat_arg_u64_dual(offset), size_t, count)
{
return ksys_readahead(fd, compat_arg_u64_glue(offset), count);
}
#endif
/**
* readahead_expand - Expand a readahead request
* @ractl: The request to be expanded
* @new_start: The revised start
* @new_len: The revised size of the request
*
* Attempt to expand a readahead request outwards from the current size to the
* specified size by inserting locked pages before and after the current window
* to increase the size to the new window. This may involve the insertion of
* THPs, in which case the window may get expanded even beyond what was
* requested.
*
* The algorithm will stop if it encounters a conflicting page already in the
* pagecache and leave a smaller expansion than requested.
*
* The caller must check for this by examining the revised @ractl object for a
* different expansion than was requested.
*/
void readahead_expand(struct readahead_control *ractl,
loff_t new_start, size_t new_len)
{
struct address_space *mapping = ractl->mapping;
struct file_ra_state *ra = ractl->ra;
pgoff_t new_index, new_nr_pages;
gfp_t gfp_mask = readahead_gfp_mask(mapping);
new_index = new_start / PAGE_SIZE;
/* Expand the leading edge downwards */
while (ractl->_index > new_index) {
unsigned long index = ractl->_index - 1;
struct page *page = xa_load(&mapping->i_pages, index);
if (page && !xa_is_value(page))
return; /* Page apparently present */
page = __page_cache_alloc(gfp_mask);
if (!page)
return;
if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
put_page(page);
return;
}
ractl->_nr_pages++;
ractl->_index = page->index;
}
new_len += new_start - readahead_pos(ractl);
new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);
/* Expand the trailing edge upwards */
while (ractl->_nr_pages < new_nr_pages) {
unsigned long index = ractl->_index + ractl->_nr_pages;
struct page *page = xa_load(&mapping->i_pages, index);
if (page && !xa_is_value(page))
return; /* Page apparently present */
page = __page_cache_alloc(gfp_mask);
if (!page)
return;
if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
put_page(page);
return;
}
if (unlikely(PageWorkingset(page)) && !ractl->_workingset) {
ractl->_workingset = true;
psi_memstall_enter(&ractl->_pflags);
}
ractl->_nr_pages++;
if (ra) {
ra->size++;
ra->async_size++;
}
}
}
EXPORT_SYMBOL(readahead_expand);