android_kernel_xiaomi_sm8450/block/blk-mq-sched.c
Greg Kroah-Hartman 0847230e9b This is the 5.10.173 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmQMoPMACgkQONu9yGCS
 aT4a1Q//WHnQOEgEykqbHMree6UQD5F6crB0kUcJTSDB5lblviYGxpOadw2j+670
 AGsFg00cm8Sb8p78v3SA+X2UzScGnY5Cwhe+B/JucUSr+4rDlZ9FjOGXbKdlYFc2
 sOTp9j/9KrETf0K/VVuCa48rKBPUFvrT7pZUAblZ0vVmk6cSzPW/1iBa4W6Ho6Ec
 LxqNzCDtyTWX0JCzdv5DvjW7WALvPiEiw8CX8+psZTD8RHdAjtnW2DKp8ZnznzJS
 YVBf2ulsD1g3zKEqDm5nMcUyN3fSEWci97bmbEzIeMTULfsj+aQF5a7JoXIkj7Yb
 QIvZ1fG6RSviVplt5SoT5ucDN2cGqLt7+4b3v6DKQX1dMTDrAPdU+T1VU0LRxB6h
 5M3ZZ925ktJu2YTmKi4QvgP01ZVJv0dNWytbmAnIVvJRGY3gHQt5tx0W2lnQdHE9
 mJeW2MXcLKeho7d5p3wRl6yEWAJuAoioZCd95NPyNnVZMMhYRu6iTIIzY8EdNegQ
 5ve9Rsda9uobvWRCWefyS0pHvuJ2HJrJONnU92MHKSojEC9oAjURvRGWpXYcQFM/
 EiywE1oBRA6NrGI7BomAH6khVoTi01yBsQ0QEt30mTOuyxa6j/oR9iEsNv7bmjZC
 SoApcWDPNy6RpHX7SDtch0Qj1l7YfYDNNj66Y94o5E70eHebU9Q=
 =vHOd
 -----END PGP SIGNATURE-----

Merge 5.10.173 into android12-5.10-lts

Changes in 5.10.173
	HID: asus: Remove check for same LED brightness on set
	HID: asus: use spinlock to protect concurrent accesses
	HID: asus: use spinlock to safely schedule workers
	powerpc/mm: Rearrange if-else block to avoid clang warning
	ARM: OMAP2+: Fix memory leak in realtime_counter_init()
	arm64: dts: qcom: qcs404: use symbol names for PCIe resets
	ARM: zynq: Fix refcount leak in zynq_early_slcr_init
	arm64: dts: mediatek: mt8183: Fix systimer 13 MHz clock description
	arm64: dts: qcom: sdm845-db845c: fix audio codec interrupt pin name
	arm64: dts: qcom: sc7180: correct SPMI bus address cells
	arm64: dts: meson-gx: Fix Ethernet MAC address unit name
	arm64: dts: meson-g12a: Fix internal Ethernet PHY unit name
	arm64: dts: meson-gx: Fix the SCPI DVFS node name and unit address
	arm64: dts: qcom: ipq8074: correct USB3 QMP PHY-s clock output names
	arm64: dts: qcom: Fix IPQ8074 PCIe PHY nodes
	arm64: dts: qcom: ipq8074: fix PCIe PHY serdes size
	arm64: dts: qcom: ipq8074: fix Gen3 PCIe QMP PHY
	arm64: dts: qcom: ipq8074: correct Gen2 PCIe ranges
	arm64: dts: qcom: ipq8074: fix Gen3 PCIe node
	arm64: dts: qcom: ipq8074: correct PCIe QMP PHY output clock names
	arm64: dts: meson: remove CPU opps below 1GHz for G12A boards
	ARM: OMAP1: call platform_device_put() in error case in omap1_dm_timer_init()
	ARM: s3c: fix s3c64xx_set_timer_source prototype
	arm64: dts: ti: k3-j7200: Fix wakeup pinmux range
	ARM: dts: exynos: correct wr-active property in Exynos3250 Rinato
	ARM: imx: Call ida_simple_remove() for ida_simple_get
	arm64: dts: amlogic: meson-gx: fix SCPI clock dvfs node name
	arm64: dts: amlogic: meson-axg: fix SCPI clock dvfs node name
	arm64: dts: amlogic: meson-gx: add missing SCPI sensors compatible
	arm64: dts: amlogic: meson-gxl-s905d-sml5442tw: drop invalid clock-names property
	arm64: dts: amlogic: meson-gx: add missing unit address to rng node name
	arm64: dts: amlogic: meson-gxl: add missing unit address to eth-phy-mux node name
	arm64: dts: amlogic: meson-gx-libretech-pc: fix update button name
	arm64: dts: amlogic: meson-gxl-s905d-phicomm-n1: fix led node name
	arm64: dts: amlogic: meson-gxbb-kii-pro: fix led node name
	arm64: dts: renesas: beacon-renesom: Fix gpio expander reference
	ARM: dts: sun8i: nanopi-duo2: Fix regulator GPIO reference
	ARM: dts: imx7s: correct iomuxc gpr mux controller cells
	arm64: dts: mediatek: mt7622: Add missing pwm-cells to pwm node
	blk-mq: avoid sleep in blk_mq_alloc_request_hctx
	blk-mq: remove stale comment for blk_mq_sched_mark_restart_hctx
	blk-mq: correct stale comment of .get_budget
	s390/dasd: Prepare for additional path event handling
	s390/dasd: Fix potential memleak in dasd_eckd_init()
	sched/deadline,rt: Remove unused parameter from pick_next_[rt|dl]_entity()
	sched/rt: pick_next_rt_entity(): check list_entry
	x86/perf/zhaoxin: Add stepping check for ZXC
	block: bio-integrity: Copy flags when bio_integrity_payload is cloned
	wifi: rsi: Fix memory leak in rsi_coex_attach()
	wifi: rtlwifi: rtl8821ae: don't call kfree_skb() under spin_lock_irqsave()
	wifi: rtlwifi: rtl8188ee: don't call kfree_skb() under spin_lock_irqsave()
	wifi: rtlwifi: rtl8723be: don't call kfree_skb() under spin_lock_irqsave()
	wifi: iwlegacy: common: don't call dev_kfree_skb() under spin_lock_irqsave()
	wifi: libertas: fix memory leak in lbs_init_adapter()
	wifi: rtl8xxxu: don't call dev_kfree_skb() under spin_lock_irqsave()
	rtlwifi: fix -Wpointer-sign warning
	wifi: rtlwifi: Fix global-out-of-bounds bug in _rtl8812ae_phy_set_txpower_limit()
	libbpf: Fix btf__align_of() by taking into account field offsets
	wifi: ipw2x00: don't call dev_kfree_skb() under spin_lock_irqsave()
	wifi: ipw2200: fix memory leak in ipw_wdev_init()
	wifi: wilc1000: fix potential memory leak in wilc_mac_xmit()
	wifi: brcmfmac: fix potential memory leak in brcmf_netdev_start_xmit()
	wifi: brcmfmac: unmap dma buffer in brcmf_msgbuf_alloc_pktid()
	wifi: libertas_tf: don't call kfree_skb() under spin_lock_irqsave()
	wifi: libertas: if_usb: don't call kfree_skb() under spin_lock_irqsave()
	wifi: libertas: main: don't call kfree_skb() under spin_lock_irqsave()
	wifi: libertas: cmdresp: don't call kfree_skb() under spin_lock_irqsave()
	wifi: wl3501_cs: don't call kfree_skb() under spin_lock_irqsave()
	crypto: x86/ghash - fix unaligned access in ghash_setkey()
	ACPICA: Drop port I/O validation for some regions
	genirq: Fix the return type of kstat_cpu_irqs_sum()
	rcu-tasks: Improve comments explaining tasks_rcu_exit_srcu purpose
	rcu-tasks: Remove preemption disablement around srcu_read_[un]lock() calls
	rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes()
	lib/mpi: Fix buffer overrun when SG is too long
	crypto: ccp: Use the stack for small SEV command buffers
	crypto: ccp: Use the stack and common buffer for status commands
	crypto: ccp - Use kzalloc for sev ioctl interfaces to prevent kernel memory leak
	crypto: ccp - Avoid page allocation failure warning for SEV_GET_ID2
	ACPICA: nsrepair: handle cases without a return value correctly
	thermal/drivers/tsens: Drop msm8976-specific defines
	thermal/drivers/qcom/tsens_v1: Enable sensor 3 on MSM8976
	thermal/drivers/tsens: Add compat string for the qcom,msm8960
	thermal/drivers/tsens: Sort out msm8976 vs msm8956 data
	wifi: rtl8xxxu: Fix memory leaks with RTL8723BU, RTL8192EU
	wifi: orinoco: check return value of hermes_write_wordrec()
	wifi: ath9k: htc_hst: free skb in ath9k_htc_rx_msg() if there is no callback function
	ath9k: hif_usb: simplify if-if to if-else
	ath9k: htc: clean up statistics macros
	wifi: ath9k: hif_usb: clean up skbs if ath9k_hif_usb_rx_stream() fails
	wifi: ath9k: Fix potential stack-out-of-bounds write in ath9k_wmi_rsp_callback()
	wifi: ath11k: Fix memory leak in ath11k_peer_rx_frag_setup
	wifi: cfg80211: Fix extended KCK key length check in nl80211_set_rekey_data()
	ACPI: battery: Fix missing NUL-termination with large strings
	crypto: ccp - Failure on re-initialization due to duplicate sysfs filename
	crypto: essiv - Handle EBUSY correctly
	crypto: seqiv - Handle EBUSY correctly
	powercap: fix possible name leak in powercap_register_zone()
	x86/cpu: Init AP exception handling from cpu_init_secondary()
	x86/microcode: Replace deprecated CPU-hotplug functions.
	x86: Mark stop_this_cpu() __noreturn
	x86/microcode: Rip out the OLD_INTERFACE
	x86/microcode: Default-disable late loading
	x86/microcode: Print previous version of microcode after reload
	x86/microcode: Add a parameter to microcode_check() to store CPU capabilities
	x86/microcode: Check CPU capabilities after late microcode update correctly
	x86/microcode: Adjust late loading result reporting message
	net: ethernet: ti: am65-cpsw: fix tx csum offload for multi mac mode
	net: ethernet: ti: am65-cpsw: handle deferred probe with dev_err_probe()
	net: ethernet: ti: add missing of_node_put before return
	crypto: xts - Handle EBUSY correctly
	leds: led-class: Add missing put_device() to led_put()
	crypto: ccp - Refactor out sev_fw_alloc()
	crypto: ccp - Flush the SEV-ES TMR memory before giving it to firmware
	bpftool: profile online CPUs instead of possible
	net/mlx5: Enhance debug print in page allocation failure
	irqchip: Fix refcount leak in platform_irqchip_probe
	irqchip/alpine-msi: Fix refcount leak in alpine_msix_init_domains
	irqchip/irq-mvebu-gicp: Fix refcount leak in mvebu_gicp_probe
	irqchip/ti-sci: Fix refcount leak in ti_sci_intr_irq_domain_probe
	s390/vmem: fix empty page tables cleanup under KASAN
	net: add sock_init_data_uid()
	tun: tun_chr_open(): correctly initialize socket uid
	tap: tap_open(): correctly initialize socket uid
	OPP: fix error checking in opp_migrate_dentry()
	Bluetooth: L2CAP: Fix potential user-after-free
	libbpf: Fix alen calculation in libbpf_nla_dump_errormsg()
	rds: rds_rm_zerocopy_callback() correct order for list_add_tail()
	crypto: rsa-pkcs1pad - Use akcipher_request_complete
	m68k: /proc/hardware should depend on PROC_FS
	RISC-V: time: initialize hrtimer based broadcast clock event device
	wifi: iwl3945: Add missing check for create_singlethread_workqueue
	wifi: iwl4965: Add missing check for create_singlethread_workqueue()
	wifi: mwifiex: fix loop iterator in mwifiex_update_ampdu_txwinsize()
	selftests/bpf: Fix out-of-srctree build
	crypto: crypto4xx - Call dma_unmap_page when done
	wifi: mac80211: make rate u32 in sta_set_rate_info_rx()
	thermal/drivers/hisi: Drop second sensor hi3660
	can: esd_usb: Move mislocated storage of SJA1000_ECC_SEG bits in case of a bus error
	bpf: Fix global subprog context argument resolution logic
	irqchip/irq-brcmstb-l2: Set IRQ_LEVEL for level triggered interrupts
	irqchip/irq-bcm7120-l2: Set IRQ_LEVEL for level triggered interrupts
	selftests/net: Interpret UDP_GRO cmsg data as an int value
	l2tp: Avoid possible recursive deadlock in l2tp_tunnel_register()
	net: bcmgenet: fix MoCA LED control
	selftest: fib_tests: Always cleanup before exit
	sefltests: netdevsim: wait for devlink instance after netns removal
	drm: Fix potential null-ptr-deref due to drmm_mode_config_init()
	drm/fourcc: Add missing big-endian XRGB1555 and RGB565 formats
	drm: mxsfb: DRM_MXSFB should depend on ARCH_MXS || ARCH_MXC
	drm/bridge: megachips: Fix error handling in i2c_register_driver()
	drm/vkms: Fix null-ptr-deref in vkms_release()
	drm/vc4: dpi: Add option for inverting pixel clock and output enable
	drm/vc4: dpi: Fix format mapping for RGB565
	drm: tidss: Fix pixel format definition
	gpu: ipu-v3: common: Add of_node_put() for reference returned by of_graph_get_port_by_id()
	drm/msm/hdmi: Add missing check for alloc_ordered_workqueue
	pinctrl: qcom: pinctrl-msm8976: Correct function names for wcss pins
	pinctrl: stm32: Fix refcount leak in stm32_pctrl_get_irq_domain
	pinctrl: rockchip: add support for rk3568
	pinctrl: rockchip: do coding style for mux route struct
	pinctrl: rockchip: Fix refcount leak in rockchip_pinctrl_parse_groups
	drm/vc4: hvs: Set AXI panic modes
	drm/vc4: hvs: Fix colour order for xRGB1555 on HVS5
	drm/vc4: hdmi: Correct interlaced timings again
	ASoC: fsl_sai: initialize is_dsp_mode flag
	drm/msm/adreno: Fix null ptr access in adreno_gpu_cleanup()
	ALSA: hda/ca0132: minor fix for allocation size
	drm/msm/dpu: Disallow unallocated resources to be returned
	drm/bridge: lt9611: fix sleep mode setup
	drm/bridge: lt9611: fix HPD reenablement
	drm/bridge: lt9611: fix polarity programming
	drm/bridge: lt9611: fix programming of video modes
	drm/bridge: lt9611: fix clock calculation
	drm/bridge: lt9611: pass a pointer to the of node
	drm/mipi-dsi: Fix byte order of 16-bit DCS set/get brightness
	drm/msm: use strscpy instead of strncpy
	drm/msm/dpu: Add check for cstate
	drm/msm/dpu: Add check for pstates
	drm/msm/mdp5: Add check for kzalloc
	pinctrl: bcm2835: Remove of_node_put() in bcm2835_of_gpio_ranges_fallback()
	pinctrl: mediatek: Initialize variable pullen and pullup to zero
	pinctrl: mediatek: Initialize variable *buf to zero
	gpu: host1x: Don't skip assigning syncpoints to channels
	drm/mediatek: dsi: Reduce the time of dsi from LP11 to sending cmd
	drm/mediatek: Use NULL instead of 0 for NULL pointer
	drm/mediatek: Drop unbalanced obj unref
	drm/mediatek: mtk_drm_crtc: Add checks for devm_kcalloc
	drm/mediatek: Clean dangling pointer on bind error path
	ASoC: soc-compress.c: fixup private_data on snd_soc_new_compress()
	gpio: vf610: connect GPIO label to dev name
	spi: dw_bt1: fix MUX_MMIO dependencies
	ASoC: mchp-spdifrx: fix controls which rely on rsr register
	ASoC: atmel: fix spelling mistakes
	ASoC: mchp-spdifrx: fix return value in case completion times out
	ASoC: mchp-spdifrx: fix controls that works with completion mechanism
	ASoC: mchp-spdifrx: disable all interrupts in mchp_spdifrx_dai_remove()
	ASoC: mchp-spdifrx: Fix uninitialized use of mr in mchp_spdifrx_hw_params()
	ASoC: dt-bindings: meson: fix gx-card codec node regex
	hwmon: (ltc2945) Handle error case in ltc2945_value_store
	drm/amdgpu: fix enum odm_combine_mode mismatch
	scsi: mpt3sas: Fix a memory leak
	scsi: aic94xx: Add missing check for dma_map_single()
	spi: bcm63xx-hsspi: fix pm_runtime
	spi: bcm63xx-hsspi: Fix multi-bit mode setting
	hwmon: (mlxreg-fan) Return zero speed for broken fan
	ASoC: tlv320adcx140: fix 'ti,gpio-config' DT property init
	dm: remove flush_scheduled_work() during local_exit()
	NFS: Fix up handling of outstanding layoutcommit in nfs_update_inode()
	NFSv4: keep state manager thread active if swap is enabled
	nfs4trace: fix state manager flag printing
	NFS: fix disabling of swap
	spi: synquacer: Fix timeout handling in synquacer_spi_transfer_one()
	ASoC: soc-dapm.h: fixup warning struct snd_pcm_substream not declared
	HID: bigben: use spinlock to protect concurrent accesses
	HID: bigben_worker() remove unneeded check on report_field
	HID: bigben: use spinlock to safely schedule workers
	hid: bigben_probe(): validate report count
	nfsd: fix race to check ls_layouts
	cifs: Fix lost destroy smbd connection when MR allocate failed
	cifs: Fix warning and UAF when destroy the MR list
	gfs2: jdata writepage fix
	perf llvm: Fix inadvertent file creation
	leds: led-core: Fix refcount leak in of_led_get()
	perf tools: Fix auto-complete on aarch64
	sparc: allow PM configs for sparc32 COMPILE_TEST
	selftests/ftrace: Fix bash specific "==" operator
	printf: fix errname.c list
	objtool: add UACCESS exceptions for __tsan_volatile_read/write
	mfd: pcf50633-adc: Fix potential memleak in pcf50633_adc_async_read()
	clk: qcom: gcc-qcs404: disable gpll[04]_out_aux parents
	clk: qcom: gcc-qcs404: fix names of the DSI clocks used as parents
	RISC-V: fix funct4 definition for c.jalr in parse_asm.h
	mtd: rawnand: sunxi: Fix the size of the last OOB region
	Input: iqs269a - drop unused device node references
	Input: iqs269a - increase interrupt handler return delay
	Input: iqs269a - configure device with a single block write
	linux/kconfig.h: replace IF_ENABLED() with PTR_IF() in <linux/kernel.h>
	clk: renesas: cpg-mssr: Fix use after free if cpg_mssr_common_init() failed
	clk: renesas: cpg-mssr: Remove superfluous check in resume code
	clk: imx: avoid memory leak
	Input: ads7846 - don't report pressure for ads7845
	Input: ads7846 - convert to full duplex
	Input: ads7846 - convert to one message
	Input: ads7846 - always set last command to PWRDOWN
	Input: ads7846 - don't check penirq immediately for 7845
	clk: qcom: gpucc-sc7180: fix clk_dis_wait being programmed for CX GDSC
	clk: qcom: gpucc-sdm845: fix clk_dis_wait being programmed for CX GDSC
	powerpc/powernv/ioda: Skip unallocated resources when mapping to PE
	clk: Honor CLK_OPS_PARENT_ENABLE in clk_core_is_enabled()
	powerpc/perf/hv-24x7: add missing RTAS retry status handling
	powerpc/pseries/lpar: add missing RTAS retry status handling
	powerpc/pseries/lparcfg: add missing RTAS retry status handling
	powerpc/rtas: make all exports GPL
	powerpc/rtas: ensure 4KB alignment for rtas_data_buf
	powerpc/eeh: Small refactor of eeh_handle_normal_event()
	powerpc/eeh: Set channel state after notifying the drivers
	MIPS: SMP-CPS: fix build error when HOTPLUG_CPU not set
	MIPS: vpe-mt: drop physical_memsize
	vdpa/mlx5: Don't clear mr struct on destroy MR
	alpha/boot/tools/objstrip: fix the check for ELF header
	Input: iqs269a - do not poll during suspend or resume
	Input: iqs269a - do not poll during ATI
	remoteproc: qcom_q6v5_mss: Use a carveout to authenticate modem headers
	media: ti: cal: fix possible memory leak in cal_ctx_create()
	media: platform: ti: Add missing check for devm_regulator_get
	powerpc: Remove linker flag from KBUILD_AFLAGS
	builddeb: clean generated package content
	media: max9286: Fix memleak in max9286_v4l2_register()
	media: ov2740: Fix memleak in ov2740_init_controls()
	media: ov5675: Fix memleak in ov5675_init_controls()
	media: i2c: ov772x: Fix memleak in ov772x_probe()
	media: i2c: imx219: remove redundant writes
	media: i2c: imx219: Split common registers from mode tables
	media: i2c: imx219: Fix binning for RAW8 capture
	media: rc: Fix use-after-free bugs caused by ene_tx_irqsim()
	media: i2c: ov7670: 0 instead of -EINVAL was returned
	media: usb: siano: Fix use after free bugs caused by do_submit_urb
	media: saa7134: Use video_unregister_device for radio_dev
	rpmsg: glink: Avoid infinite loop on intent for missing channel
	udf: Define EFSCORRUPTED error code
	ARM: dts: exynos: Use Exynos5420 compatible for the MIPI video phy
	blk-iocost: fix divide by 0 error in calc_lcoefs()
	wifi: ath9k: Fix use-after-free in ath9k_hif_usb_disconnect()
	wifi: brcmfmac: Fix potential stack-out-of-bounds in brcmf_c_preinit_dcmds()
	rcu: Make RCU_LOCKDEP_WARN() avoid early lockdep checks
	rcu: Suppress smp_processor_id() complaint in synchronize_rcu_expedited_wait()
	rcu-tasks: Make rude RCU-Tasks work well with CPU hotplug
	wifi: ath11k: debugfs: fix to work with multiple PCI devices
	thermal: intel: Fix unsigned comparison with less than zero
	timers: Prevent union confusion from unexpected restart_syscall()
	x86/bugs: Reset speculation control settings on init
	wifi: brcmfmac: ensure CLM version is null-terminated to prevent stack-out-of-bounds
	wifi: mt7601u: fix an integer underflow
	inet: fix fast path in __inet_hash_connect()
	ice: add missing checks for PF vsi type
	ACPI: Don't build ACPICA with '-Os'
	clocksource: Suspend the watchdog temporarily when high read latency detected
	crypto: hisilicon: Wipe entire pool on error
	net: bcmgenet: Add a check for oversized packets
	m68k: Check syscall_trace_enter() return code
	wifi: mt76: dma: free rx_head in mt76_dma_rx_cleanup
	ACPI: video: Fix Lenovo Ideapad Z570 DMI match
	net/mlx5: fw_tracer: Fix debug print
	coda: Avoid partial allocation of sig_inputArgs
	uaccess: Add minimum bounds check on kernel buffer size
	PM: EM: fix memory leak with using debugfs_lookup()
	Bluetooth: btusb: Add VID:PID 13d3:3529 for Realtek RTL8821CE
	drm/amd/display: Fix potential null-deref in dm_resume
	drm/omap: dsi: Fix excessive stack usage
	HID: Add Mapping for System Microphone Mute
	drm/tiny: ili9486: Do not assume 8-bit only SPI controllers
	drm/radeon: free iio for atombios when driver shutdown
	drm: amd: display: Fix memory leakage
	drm/msm/dsi: Add missing check for alloc_ordered_workqueue
	docs/scripts/gdb: add necessary make scripts_gdb step
	ASoC: kirkwood: Iterate over array indexes instead of using pointer math
	regulator: max77802: Bounds check regulator id against opmode
	regulator: s5m8767: Bounds check id indexing into arrays
	gfs2: Improve gfs2_make_fs_rw error handling
	hwmon: (coretemp) Simplify platform device handling
	pinctrl: at91: use devm_kasprintf() to avoid potential leaks
	HID: logitech-hidpp: Don't restart communication if not necessary
	drm: panel-orientation-quirks: Add quirk for Lenovo IdeaPad Duet 3 10IGL5
	dm thin: add cond_resched() to various workqueue loops
	dm cache: add cond_resched() to various workqueue loops
	nfsd: zero out pointers after putting nfsd_files on COPY setup error
	wifi: rtl8xxxu: fixing transmisison failure for rtl8192eu
	firmware: coreboot: framebuffer: Ignore reserved pixel color bits
	rtc: pm8xxx: fix set-alarm race
	ipmi_ssif: Rename idle state and check
	s390/extmem: return correct segment type in __segment_load()
	s390: discard .interp section
	s390/kprobes: fix irq mask clobbering on kprobe reenter from post_handler
	s390/kprobes: fix current_kprobe never cleared after kprobes reenter
	cifs: Fix uninitialized memory read in smb3_qfs_tcon()
	ARM: dts: exynos: correct HDMI phy compatible in Exynos4
	hfs: fix missing hfs_bnode_get() in __hfs_bnode_create
	fs: hfsplus: fix UAF issue in hfsplus_put_super
	exfat: fix reporting fs error when reading dir beyond EOF
	exfat: fix unexpected EOF while reading dir
	exfat: redefine DIR_DELETED as the bad cluster number
	exfat: fix inode->i_blocks for non-512 byte sector size device
	f2fs: fix information leak in f2fs_move_inline_dirents()
	f2fs: fix cgroup writeback accounting with fs-layer encryption
	ocfs2: fix defrag path triggering jbd2 ASSERT
	ocfs2: fix non-auto defrag path not working issue
	udf: Truncate added extents on failed expansion
	udf: Do not bother merging very long extents
	udf: Do not update file length for failed writes to inline files
	udf: Preserve link count of system files
	udf: Detect system inodes linked into directory hierarchy
	udf: Fix file corruption when appending just after end of preallocated extent
	KVM: Destroy target device if coalesced MMIO unregistration fails
	KVM: x86: Inject #GP if WRMSR sets reserved bits in APIC Self-IPI
	KVM: s390: disable migration mode when dirty tracking is disabled
	x86/virt: Force GIF=1 prior to disabling SVM (for reboot flows)
	x86/crash: Disable virt in core NMI crash handler to avoid double shootdown
	x86/reboot: Disable virtualization in an emergency if SVM is supported
	x86/reboot: Disable SVM, not just VMX, when stopping CPUs
	x86/kprobes: Fix __recover_optprobed_insn check optimizing logic
	x86/kprobes: Fix arch_check_optimized_kprobe check within optimized_kprobe range
	x86/microcode/amd: Remove load_microcode_amd()'s bsp parameter
	x86/microcode/AMD: Add a @cpu parameter to the reloading functions
	x86/microcode/AMD: Fix mixed steppings support
	x86/speculation: Allow enabling STIBP with legacy IBRS
	Documentation/hw-vuln: Document the interaction between IBRS and STIBP
	brd: return 0/-error from brd_insert_page()
	ima: Align ima_file_mmap() parameters with mmap_file LSM hook
	irqdomain: Fix association race
	irqdomain: Fix disassociation race
	irqdomain: Drop bogus fwspec-mapping error handling
	io_uring: handle TIF_NOTIFY_RESUME when checking for task_work
	io_uring: mark task TASK_RUNNING before handling resume/task work
	io_uring: add a conditional reschedule to the IOPOLL cancelation loop
	io_uring/rsrc: disallow multi-source reg buffers
	io_uring: remove MSG_NOSIGNAL from recvmsg
	io_uring/poll: allow some retries for poll triggering spuriously
	ALSA: ice1712: Do not left ice->gpio_mutex locked in aureon_add_controls()
	ALSA: hda/realtek: Add quirk for HP EliteDesk 800 G6 Tower PC
	jbd2: fix data missing when reusing bh which is ready to be checkpointed
	ext4: optimize ea_inode block expansion
	ext4: refuse to create ea block when umounted
	mtd: spi-nor: Fix shift-out-of-bounds in spi_nor_set_erase_type
	dm: add cond_resched() to dm_wq_work()
	wifi: rtl8xxxu: Use a longer retry limit of 48
	wifi: cfg80211: Fix use after free for wext
	thermal: intel: powerclamp: Fix cur_state for multi package system
	dm flakey: fix logic when corrupting a bio
	dm flakey: don't corrupt the zero page
	ARM: dts: exynos: correct TMU phandle in Exynos4210
	ARM: dts: exynos: correct TMU phandle in Exynos4
	ARM: dts: exynos: correct TMU phandle in Odroid XU3 family
	ARM: dts: exynos: correct TMU phandle in Exynos5250
	ARM: dts: exynos: correct TMU phandle in Odroid XU
	ARM: dts: exynos: correct TMU phandle in Odroid HC1
	rbd: avoid use-after-free in do_rbd_add() when rbd_dev_create() fails
	alpha: fix FEN fault handling
	dax/kmem: Fix leak of memory-hotplug resources
	mips: fix syscall_get_nr
	media: ipu3-cio2: Fix PM runtime usage_count in driver unbind
	remoteproc/mtk_scp: Move clk ops outside send_lock
	docs: gdbmacros: print newest record
	mm: memcontrol: deprecate charge moving
	mm/thp: check and bail out if page in deferred queue already
	ktest.pl: Give back console on Ctrt^C on monitor
	ktest.pl: Fix missing "end_monitor" when machine check fails
	ktest.pl: Add RUN_TIMEOUT option with default unlimited
	ring-buffer: Handle race between rb_move_tail and rb_check_pages
	scsi: qla2xxx: Fix link failure in NPIV environment
	scsi: qla2xxx: Fix DMA-API call trace on NVMe LS requests
	scsi: qla2xxx: Fix erroneous link down
	scsi: ses: Don't attach if enclosure has no components
	scsi: ses: Fix slab-out-of-bounds in ses_enclosure_data_process()
	scsi: ses: Fix possible addl_desc_ptr out-of-bounds accesses
	scsi: ses: Fix possible desc_ptr out-of-bounds accesses
	scsi: ses: Fix slab-out-of-bounds in ses_intf_remove()
	riscv: jump_label: Fixup unaligned arch_static_branch function
	PCI/PM: Observe reset delay irrespective of bridge_d3
	PCI: hotplug: Allow marking devices as disconnected during bind/unbind
	PCI: Avoid FLR for AMD FCH AHCI adapters
	vfio/type1: prevent underflow of locked_vm via exec()
	drm/i915/quirks: Add inverted backlight quirk for HP 14-r206nv
	drm/radeon: Fix eDP for single-display iMac11,2
	drm/edid: fix AVI infoframe aspect ratio handling
	arm64: dts: qcom: ipq8074: fix Gen2 PCIe QMP PHY
	wifi: ath9k: use proper statements in conditionals
	pinctrl: rockchip: fix mux route data for rk3568
	pinctrl: rockchip: fix reading pull type on rk3568
	kbuild: Port silent mode detection to future gnu make.
	net/sched: Retire tcindex classifier
	fs/jfs: fix shift exponent db_agl2size negative
	objtool: Fix memory leak in create_static_call_sections()
	pwm: sifive: Reduce time the controller lock is held
	pwm: sifive: Always let the first pwm_apply_state succeed
	pwm: stm32-lp: fix the check on arr and cmp registers update
	f2fs: use memcpy_{to,from}_page() where possible
	fs: f2fs: initialize fsdata in pagecache_write()
	um: vector: Fix memory leak in vector_config
	ubi: ensure that VID header offset + VID header size <= alloc, size
	ubifs: Fix build errors as symbol undefined
	ubifs: Rectify space budget for ubifs_symlink() if symlink is encrypted
	ubifs: Rectify space budget for ubifs_xrename()
	ubifs: Fix wrong dirty space budget for dirty inode
	ubifs: do_rename: Fix wrong space budget when target inode's nlink > 1
	ubifs: Reserve one leb for each journal head while doing budget
	ubi: Fix use-after-free when volume resizing failed
	ubi: Fix unreferenced object reported by kmemleak in ubi_resize_volume()
	ubifs: Fix memory leak in alloc_wbufs()
	ubi: Fix possible null-ptr-deref in ubi_free_volume()
	ubifs: Re-statistic cleaned znode count if commit failed
	ubifs: dirty_cow_znode: Fix memleak in error handling path
	ubifs: ubifs_writepage: Mark page dirty after writing inode failed
	ubi: fastmap: Fix missed fm_anchor PEB in wear-leveling after disabling fastmap
	ubi: Fix UAF wear-leveling entry in eraseblk_count_seq_show()
	ubi: ubi_wl_put_peb: Fix infinite loop when wear-leveling work failed
	x86: um: vdso: Add '%rcx' and '%r11' to the syscall clobber list
	watchdog: at91sam9_wdt: use devm_request_irq to avoid missing free_irq() in error path
	watchdog: Fix kmemleak in watchdog_cdev_register
	watchdog: pcwd_usb: Fix attempting to access uninitialized memory
	netfilter: ctnetlink: fix possible refcount leak in ctnetlink_create_conntrack()
	netfilter: ebtables: fix table blob use-after-free
	ipv6: Add lwtunnel encap size of all siblings in nexthop calculation
	sctp: add a refcnt in sctp_stream_priorities to avoid a nested loop
	net: fix __dev_kfree_skb_any() vs drop monitor
	9p/xen: fix version parsing
	9p/xen: fix connection sequence
	9p/rdma: unmap receive dma buffer in rdma_request()/post_recv()
	net/mlx5: Geneve, Fix handling of Geneve object id as error code
	nfc: fix memory leak of se_io context in nfc_genl_se_io
	net/sched: act_sample: fix action bind logic
	ARM: dts: spear320-hmi: correct STMPE GPIO compatible
	tcp: tcp_check_req() can be called from process context
	vc_screen: modify vcs_size() handling in vcs_read()
	rtc: sun6i: Always export the internal oscillator
	scsi: ipr: Work around fortify-string warning
	loop: loop_set_status_from_info() check before assignment
	ASoC: adau7118: don't disable regulators on device unbind
	ASoC: zl38060: Remove spurious gpiolib select
	ASoC: zl38060 add gpiolib dependency
	thermal: intel: quark_dts: fix error pointer dereference
	thermal: intel: BXT_PMIC: select REGMAP instead of depending on it
	tracing: Add NULL checks for buffer in ring_buffer_free_read_page()
	firmware/efi sysfb_efi: Add quirk for Lenovo IdeaPad Duet 3
	bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support
	mfd: arizona: Use pm_runtime_resume_and_get() to prevent refcnt leak
	IB/hfi1: Update RMT size calculation
	media: uvcvideo: Handle cameras with invalid descriptors
	media: uvcvideo: Handle errors from calls to usb_string
	media: uvcvideo: Quirk for autosuspend in Logitech B910 and C910
	media: uvcvideo: Silence memcpy() run-time false positive warnings
	staging: emxx_udc: Add checks for dma_alloc_coherent()
	tty: fix out-of-bounds access in tty_driver_lookup_tty()
	tty: serial: fsl_lpuart: disable the CTS when send break signal
	serial: sc16is7xx: setup GPIO controller later in probe
	mei: bus-fixup:upon error print return values of send and receive
	tools/iio/iio_utils:fix memory leak
	iio: accel: mma9551_core: Prevent uninitialized variable in mma9551_read_status_word()
	iio: accel: mma9551_core: Prevent uninitialized variable in mma9551_read_config_word()
	PCI: loongson: Prevent LS7A MRRS increases
	usb: host: xhci: mvebu: Iterate over array indexes instead of using pointer math
	USB: ene_usb6250: Allocate enough memory for full object
	usb: uvc: Enumerate valid values for color matching
	usb: gadget: uvc: Make bSourceID read/write
	PCI: Align extra resources for hotplug bridges properly
	PCI: Take other bus devices into account when distributing resources
	kernel/fail_function: fix memory leak with using debugfs_lookup()
	PCI: loongson: Add more devices that need MRRS quirk
	PCI: Add ACS quirk for Wangxun NICs
	phy: rockchip-typec: Fix unsigned comparison with less than zero
	soundwire: cadence: Remove wasted space in response_buf
	soundwire: cadence: Drain the RX FIFO after an IO timeout
	net: tls: avoid hanging tasks on the tx_lock
	x86/resctrl: Apply READ_ONCE/WRITE_ONCE to task_struct.{rmid,closid}
	x86/resctl: fix scheduler confusion with 'current'
	drm/display/dp_mst: Fix down/up message handling after sink disconnect
	drm/display/dp_mst: Fix down message handling after a packet reception error
	Bluetooth: hci_sock: purge socket queues in the destruct() callback
	tcp: Fix listen() regression in 5.10.163
	drm/virtio: Fix error code in virtio_gpu_object_shmem_init()
	media: uvcvideo: Provide sync and async uvc_ctrl_status_event
	media: uvcvideo: Fix race condition with usb_kill_urb
	Revert "scsi: mpt3sas: Fix return value check of dma_get_required_mask()"
	scsi: mpt3sas: Don't change DMA mask while reallocating pools
	scsi: mpt3sas: re-do lost mpt3sas DMA mask fix
	scsi: mpt3sas: Remove usage of dma_get_required_mask() API
	malidp: Fix NULL vs IS_ERR() checking
	usb: gadget: uvc: fix missing mutex_unlock() if kstrtou8() fails
	Linux 5.10.173

Change-Id: Iedcbc093feb171d48c70976d0aa99e972fac3ad1
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2023-03-22 11:21:35 +00:00

654 lines
17 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* blk-mq scheduling framework
*
* Copyright (C) 2016 Jens Axboe
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blk-mq.h>
#include <linux/list_sort.h>
#include <trace/events/block.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
#include "blk-wbt.h"
void blk_mq_sched_assign_ioc(struct request *rq)
{
struct request_queue *q = rq->q;
struct io_context *ioc;
struct io_cq *icq;
/*
* May not have an IO context if it's a passthrough request
*/
ioc = current->io_context;
if (!ioc)
return;
spin_lock_irq(&q->queue_lock);
icq = ioc_lookup_icq(ioc, q);
spin_unlock_irq(&q->queue_lock);
if (!icq) {
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
if (!icq)
return;
}
get_io_context(icq->ioc);
rq->elv.icq = icq;
}
/*
* Mark a hardware queue as needing a restart.
*/
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
/*
* Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
* in blk_mq_run_hw_queue(). Its pair is the barrier in
* blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
* meantime new request added to hctx->dispatch is missed to check in
* blk_mq_run_hw_queue().
*/
smp_mb();
blk_mq_run_hw_queue(hctx, true);
}
static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct request *rqa = container_of(a, struct request, queuelist);
struct request *rqb = container_of(b, struct request, queuelist);
return rqa->mq_hctx > rqb->mq_hctx;
}
static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
{
struct blk_mq_hw_ctx *hctx =
list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
struct request *rq;
LIST_HEAD(hctx_list);
unsigned int count = 0;
list_for_each_entry(rq, rq_list, queuelist) {
if (rq->mq_hctx != hctx) {
list_cut_before(&hctx_list, rq_list, &rq->queuelist);
goto dispatch;
}
count++;
}
list_splice_tail_init(rq_list, &hctx_list);
dispatch:
return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
}
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
/*
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
* its queue by itself in its completion handler, so we don't need to
* restart queue if .get_budget() fails to get the budget.
*
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
* be run again. This is necessary to avoid starving flushes.
*/
static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
bool multi_hctxs = false, run_queue = false;
bool dispatched = false, busy = false;
unsigned int max_dispatch;
LIST_HEAD(rq_list);
int count = 0;
if (hctx->dispatch_busy)
max_dispatch = 1;
else
max_dispatch = hctx->queue->nr_requests;
do {
struct request *rq;
if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
break;
if (!list_empty_careful(&hctx->dispatch)) {
busy = true;
break;
}
if (!blk_mq_get_dispatch_budget(q))
break;
rq = e->type->ops.dispatch_request(hctx);
if (!rq) {
blk_mq_put_dispatch_budget(q);
/*
* We're releasing without dispatching. Holding the
* budget could have blocked any "hctx"s with the
* same queue and if we didn't dispatch then there's
* no guarantee anyone will kick the queue. Kick it
* ourselves.
*/
run_queue = true;
break;
}
/*
* Now this rq owns the budget which has to be released
* if this rq won't be queued to driver via .queue_rq()
* in blk_mq_dispatch_rq_list().
*/
list_add_tail(&rq->queuelist, &rq_list);
if (rq->mq_hctx != hctx)
multi_hctxs = true;
} while (++count < max_dispatch);
if (!count) {
if (run_queue)
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
} else if (multi_hctxs) {
/*
* Requests from different hctx may be dequeued from some
* schedulers, such as bfq and deadline.
*
* Sort the requests in the list according to their hctx,
* dispatch batching requests from same hctx at a time.
*/
list_sort(NULL, &rq_list, sched_rq_cmp);
do {
dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
} while (!list_empty(&rq_list));
} else {
dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
}
if (busy)
return -EAGAIN;
return !!dispatched;
}
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
unsigned long end = jiffies + HZ;
int ret;
do {
ret = __blk_mq_do_dispatch_sched(hctx);
if (ret != 1)
break;
if (need_resched() || time_is_before_jiffies(end)) {
blk_mq_delay_run_hw_queue(hctx, 0);
break;
}
} while (1);
return ret;
}
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx)
{
unsigned short idx = ctx->index_hw[hctx->type];
if (++idx == hctx->nr_ctx)
idx = 0;
return hctx->ctxs[idx];
}
/*
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
* its queue by itself in its completion handler, so we don't need to
* restart queue if .get_budget() fails to get the budget.
*
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
* be run again. This is necessary to avoid starving flushes.
*/
static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
LIST_HEAD(rq_list);
struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
int ret = 0;
struct request *rq;
do {
if (!list_empty_careful(&hctx->dispatch)) {
ret = -EAGAIN;
break;
}
if (!sbitmap_any_bit_set(&hctx->ctx_map))
break;
if (!blk_mq_get_dispatch_budget(q))
break;
rq = blk_mq_dequeue_from_ctx(hctx, ctx);
if (!rq) {
blk_mq_put_dispatch_budget(q);
/*
* We're releasing without dispatching. Holding the
* budget could have blocked any "hctx"s with the
* same queue and if we didn't dispatch then there's
* no guarantee anyone will kick the queue. Kick it
* ourselves.
*/
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
break;
}
/*
* Now this rq owns the budget which has to be released
* if this rq won't be queued to driver via .queue_rq()
* in blk_mq_dispatch_rq_list().
*/
list_add(&rq->queuelist, &rq_list);
/* round robin for fair dispatch */
ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
} while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
WRITE_ONCE(hctx->dispatch_from, ctx);
return ret;
}
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
int ret = 0;
LIST_HEAD(rq_list);
/*
* If we have previous entries on our dispatch list, grab them first for
* more fair dispatch.
*/
if (!list_empty_careful(&hctx->dispatch)) {
spin_lock(&hctx->lock);
if (!list_empty(&hctx->dispatch))
list_splice_init(&hctx->dispatch, &rq_list);
spin_unlock(&hctx->lock);
}
/*
* Only ask the scheduler for requests, if we didn't have residual
* requests from the dispatch list. This is to avoid the case where
* we only ever dispatch a fraction of the requests available because
* of low device queue depth. Once we pull requests out of the IO
* scheduler, we can no longer merge or sort them. So it's best to
* leave them there for as long as we can. Mark the hw queue as
* needing a restart in that case.
*
* We want to dispatch from the scheduler if there was nothing
* on the dispatch list or we were able to dispatch from the
* dispatch list.
*/
if (!list_empty(&rq_list)) {
blk_mq_sched_mark_restart_hctx(hctx);
if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
if (has_sched_dispatch)
ret = blk_mq_do_dispatch_sched(hctx);
else
ret = blk_mq_do_dispatch_ctx(hctx);
}
} else if (has_sched_dispatch) {
ret = blk_mq_do_dispatch_sched(hctx);
} else if (hctx->dispatch_busy) {
/* dequeue request one by one from sw queue if queue is busy */
ret = blk_mq_do_dispatch_ctx(hctx);
} else {
blk_mq_flush_busy_ctxs(hctx, &rq_list);
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
}
return ret;
}
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
/* RCU or SRCU read lock is needed before checking quiesced flag */
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
return;
hctx->run++;
/*
* A return of -EAGAIN is an indication that hctx->dispatch is not
* empty and we must run again in order to avoid starving flushes.
*/
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
blk_mq_run_hw_queue(hctx, true);
}
}
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs)
{
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx;
struct blk_mq_hw_ctx *hctx;
bool ret = false;
enum hctx_type type;
if (e && e->type->ops.bio_merge)
return e->type->ops.bio_merge(q, bio, nr_segs);
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
type = hctx->type;
if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
list_empty_careful(&ctx->rq_lists[type]))
return false;
/* default per sw-queue merge */
spin_lock(&ctx->lock);
/*
* Reverse check our software queue for entries that we could
* potentially merge with. Currently includes a hand-wavy stop
* count of 8, to not spend too much time checking for merges.
*/
if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) {
ctx->rq_merged++;
ret = true;
}
spin_unlock(&ctx->lock);
return ret;
}
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
{
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
void blk_mq_sched_request_inserted(struct request *rq)
{
trace_block_rq_insert(rq->q, rq);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
bool has_sched,
struct request *rq)
{
/*
* dispatch flush and passthrough rq directly
*
* passthrough request has to be added to hctx->dispatch directly.
* For some reason, device may be in one situation which can't
* handle FS request, so STS_RESOURCE is always returned and the
* FS request will be added to hctx->dispatch. However passthrough
* request may be required at that time for fixing the problem. If
* passthrough request is added to scheduler queue, there isn't any
* chance to dispatch it given we prioritize requests in hctx->dispatch.
*/
if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
return true;
if (has_sched)
rq->rq_flags |= RQF_SORTED;
return false;
}
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
bool run_queue, bool async)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
/*
* Firstly normal IO request is inserted to scheduler queue or
* sw queue, meantime we add flush request to dispatch queue(
* hctx->dispatch) directly and there is at most one in-flight
* flush request for each hw queue, so it doesn't matter to add
* flush request to tail or front of the dispatch queue.
*
* Secondly in case of NCQ, flush request belongs to non-NCQ
* command, and queueing it will fail when there is any
* in-flight normal IO request(NCQ command). When adding flush
* rq to the front of hctx->dispatch, it is easier to introduce
* extra time to flush rq's latency because of S_SCHED_RESTART
* compared with adding to the tail of dispatch queue, then
* chance of flush merge is increased, and less flush requests
* will be issued to controller. It is observed that ~10% time
* is saved in blktests block/004 on disk attached to AHCI/NCQ
* drive when adding flush rq to the front of hctx->dispatch.
*
* Simply queue flush rq to the front of hctx->dispatch so that
* intensive flush workloads can benefit in case of NCQ HW.
*/
at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
blk_mq_request_bypass_insert(rq, at_head, false);
goto run;
}
if (e && e->type->ops.insert_requests) {
LIST_HEAD(list);
list_add(&rq->queuelist, &list);
e->type->ops.insert_requests(hctx, &list, at_head);
} else {
spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq, at_head);
spin_unlock(&ctx->lock);
}
run:
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
}
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async)
{
struct elevator_queue *e;
struct request_queue *q = hctx->queue;
/*
* blk_mq_sched_insert_requests() is called from flush plug
* context only, and hold one usage counter to prevent queue
* from being released.
*/
percpu_ref_get(&q->q_usage_counter);
e = hctx->queue->elevator;
if (e && e->type->ops.insert_requests)
e->type->ops.insert_requests(hctx, list, false);
else {
/*
* try to issue requests directly if the hw queue isn't
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
if (!hctx->dispatch_busy && !e && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);
if (list_empty(list))
goto out;
}
blk_mq_insert_requests(hctx, ctx, list);
}
blk_mq_run_hw_queue(hctx, run_queue_async);
out:
percpu_ref_put(&q->q_usage_counter);
}
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
if (hctx->sched_tags) {
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
blk_mq_free_rq_map(hctx->sched_tags, flags);
hctx->sched_tags = NULL;
}
}
static int blk_mq_sched_alloc_tags(struct request_queue *q,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
struct blk_mq_tag_set *set = q->tag_set;
/* Clear HCTX_SHARED so tags are init'ed */
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
int ret;
hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
set->reserved_tags, flags);
if (!hctx->sched_tags)
return -ENOMEM;
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
if (ret)
blk_mq_sched_free_tags(set, hctx, hctx_idx);
return ret;
}
/* called in queue's release handler, tagset has gone away */
static void blk_mq_sched_tags_teardown(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
/* Clear HCTX_SHARED so tags are freed */
unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
if (hctx->sched_tags) {
blk_mq_free_rq_map(hctx->sched_tags, flags);
hctx->sched_tags = NULL;
}
}
}
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
struct blk_mq_hw_ctx *hctx;
struct elevator_queue *eq;
unsigned int i;
int ret;
if (!e) {
q->elevator = NULL;
q->nr_requests = q->tag_set->queue_depth;
return 0;
}
/*
* Default to double of smaller one between hw queue_depth and 128,
* since we don't split into sync/async like the old code did.
* Additionally, this is a per-hw queue depth.
*/
q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
BLKDEV_MAX_RQ);
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_sched_alloc_tags(q, hctx, i);
if (ret)
goto err;
}
ret = e->ops.init_sched(q, e);
if (ret)
goto err;
blk_mq_debugfs_register_sched(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (e->ops.init_hctx) {
ret = e->ops.init_hctx(hctx, i);
if (ret) {
eq = q->elevator;
blk_mq_sched_free_requests(q);
blk_mq_exit_sched(q, eq);
kobject_put(&eq->kobj);
return ret;
}
}
blk_mq_debugfs_register_sched_hctx(q, hctx);
}
return 0;
err:
blk_mq_sched_free_requests(q);
blk_mq_sched_tags_teardown(q);
q->elevator = NULL;
return ret;
}
/*
* called in either blk_queue_cleanup or elevator_switch, tagset
* is required for freeing requests
*/
void blk_mq_sched_free_requests(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags)
blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
}
}
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
queue_for_each_hw_ctx(q, hctx, i) {
blk_mq_debugfs_unregister_sched_hctx(hctx);
if (e->type->ops.exit_hctx && hctx->sched_data) {
e->type->ops.exit_hctx(hctx, i);
hctx->sched_data = NULL;
}
}
blk_mq_debugfs_unregister_sched(q);
if (e->type->ops.exit_sched)
e->type->ops.exit_sched(e);
blk_mq_sched_tags_teardown(q);
q->elevator = NULL;
}