android_kernel_xiaomi_sm8450/block/blk-mq-sched.c
Greg Kroah-Hartman 95f4203fc9 This is the 5.10.110 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmJQLWwACgkQONu9yGCS
 aT4R2BAAr/cGnf2/BQ6+zNPW+LlfGn75803yd+oWNL8WzjNiQGrTsQavE1jL0LXP
 45iPxvY6eOlP9oEoJGYyNYhzQfUM92Unysa/KemB/xUBsb2If0ZdWk1WB9Lnw0xq
 m65kACXovbcg4LsZGpgCv7ln1ykogo+bNMES9P6CLxwKR/DMKUeJxbRNKE/AkD5l
 DxF7IJEP+YRbKAtoLM2Xj4KdjVSfRIfs+Pf0A1t43GqAw6tt3beqmzeCwDzuzz5a
 DHpXS6PeJjTZOjz4LkuBSbyK5cKGFv1C6o7JVjWSZhDyI5E4OLdNDpNKqcjsXAN+
 wMqS1eh4gYUBXmPE44BGwkkugPyaR0/KHUebfkFZG2/H/8DfvrGqlbvsGSFNXxsV
 jH2/AV/rOxAFeM/U0c1I4Ve42MU18kdf1MRBo0Dq5xSoN9HFQhNp+HE5jpppgsvi
 FYpMqZoQzH31GIjOq7g0zLdj4NTBrkO9dh7kbpH0Xay1yBmigvD2PA4qpsL1+VMI
 v73Iq/RJVGUJFAeiYFjn9IGs9EsiKNG08v9uoKS+1m1VLrpVdgwtzo+RjJ/E51Mt
 Nk4WK94MyoivkRFKulDasv9yBWdcZCfljc91271UCKCERlyO/bmsTqhffeATGGRh
 N/7oxa71BHvxp0VYqvKD6xFUs+jFt9DQmIX7Pl1/yLpaz+sN0no=
 =31mv
 -----END PGP SIGNATURE-----

Merge 5.10.110 into android12-5.10-lts

Changes in 5.10.110
	swiotlb: fix info leak with DMA_FROM_DEVICE
	USB: serial: pl2303: add IBM device IDs
	USB: serial: simple: add Nokia phone driver
	hv: utils: add PTP_1588_CLOCK to Kconfig to fix build
	netdevice: add the case if dev is NULL
	HID: logitech-dj: add new lightspeed receiver id
	xfrm: fix tunnel model fragmentation behavior
	ARM: mstar: Select HAVE_ARM_ARCH_TIMER
	virtio_console: break out of buf poll on remove
	vdpa/mlx5: should verify CTRL_VQ feature exists for MQ
	tools/virtio: fix virtio_test execution
	ethernet: sun: Free the coherent when failing in probing
	gpio: Revert regression in sysfs-gpio (gpiolib.c)
	spi: Fix invalid sgs value
	net:mcf8390: Use platform_get_irq() to get the interrupt
	Revert "gpio: Revert regression in sysfs-gpio (gpiolib.c)"
	spi: Fix erroneous sgs value with min_t()
	Input: zinitix - do not report shadow fingers
	af_key: add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register
	net: dsa: microchip: add spi_device_id tables
	locking/lockdep: Avoid potential access of invalid memory in lock_class
	iommu/iova: Improve 32-bit free space estimate
	tpm: fix reference counting for struct tpm_chip
	virtio-blk: Use blk_validate_block_size() to validate block size
	USB: usb-storage: Fix use of bitfields for hardware data in ene_ub6250.c
	xhci: fix garbage USBSTS being logged in some cases
	xhci: fix runtime PM imbalance in USB2 resume
	xhci: make xhci_handshake timeout for xhci_reset() adjustable
	xhci: fix uninitialized string returned by xhci_decode_ctrl_ctx()
	mei: me: add Alder Lake N device id.
	mei: avoid iterator usage outside of list_for_each_entry
	coresight: Fix TRCCONFIGR.QE sysfs interface
	iio: afe: rescale: use s64 for temporary scale calculations
	iio: inkern: apply consumer scale on IIO_VAL_INT cases
	iio: inkern: apply consumer scale when no channel scale is available
	iio: inkern: make a best effort on offset calculation
	greybus: svc: fix an error handling bug in gb_svc_hello()
	clk: uniphier: Fix fixed-rate initialization
	ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE
	KEYS: fix length validation in keyctl_pkey_params_get_2()
	Documentation: add link to stable release candidate tree
	Documentation: update stable tree link
	firmware: stratix10-svc: add missing callback parameter on RSU
	HID: intel-ish-hid: Use dma_alloc_coherent for firmware update
	SUNRPC: avoid race between mod_timer() and del_timer_sync()
	NFSD: prevent underflow in nfssvc_decode_writeargs()
	NFSD: prevent integer overflow on 32 bit systems
	f2fs: fix to unlock page correctly in error path of is_alive()
	f2fs: quota: fix loop condition at f2fs_quota_sync()
	f2fs: fix to do sanity check on .cp_pack_total_block_count
	remoteproc: Fix count check in rproc_coredump_write()
	pinctrl: samsung: drop pin banks references on error paths
	spi: mxic: Fix the transmit path
	mtd: rawnand: protect access to rawnand devices while in suspend
	can: ems_usb: ems_usb_start_xmit(): fix double dev_kfree_skb() in error path
	jffs2: fix use-after-free in jffs2_clear_xattr_subsystem
	jffs2: fix memory leak in jffs2_do_mount_fs
	jffs2: fix memory leak in jffs2_scan_medium
	mm/pages_alloc.c: don't create ZONE_MOVABLE beyond the end of a node
	mm: invalidate hwpoison page cache page in fault path
	mempolicy: mbind_range() set_policy() after vma_merge()
	scsi: libsas: Fix sas_ata_qc_issue() handling of NCQ NON DATA commands
	qed: display VF trust config
	qed: validate and restrict untrusted VFs vlan promisc mode
	riscv: Fix fill_callchain return value
	riscv: Increase stack size under KASAN
	Revert "Input: clear BTN_RIGHT/MIDDLE on buttonpads"
	cifs: prevent bad output lengths in smb2_ioctl_query_info()
	cifs: fix NULL ptr dereference in smb2_ioctl_query_info()
	ALSA: cs4236: fix an incorrect NULL check on list iterator
	ALSA: hda: Avoid unsol event during RPM suspending
	ALSA: pcm: Fix potential AB/BA lock with buffer_mutex and mmap_lock
	ALSA: hda/realtek: Fix audio regression on Mi Notebook Pro 2020
	mm: madvise: skip unmapped vma holes passed to process_madvise
	mm: madvise: return correct bytes advised with process_madvise
	Revert "mm: madvise: skip unmapped vma holes passed to process_madvise"
	mm,hwpoison: unmap poisoned page before invalidation
	mm/kmemleak: reset tag when compare object pointer
	dm integrity: set journal entry unused when shrinking device
	drbd: fix potential silent data corruption
	can: isotp: sanitize CAN ID checks in isotp_bind()
	powerpc/kvm: Fix kvm_use_magic_page
	udp: call udp_encap_enable for v6 sockets when enabling encap
	arm64: signal: nofpsimd: Do not allocate fp/simd context when not available
	arm64: dts: ti: k3-am65: Fix gic-v3 compatible regs
	arm64: dts: ti: k3-j721e: Fix gic-v3 compatible regs
	arm64: dts: ti: k3-j7200: Fix gic-v3 compatible regs
	ACPI: properties: Consistently return -ENOENT if there are no more references
	coredump: Also dump first pages of non-executable ELF libraries
	ext4: fix ext4_fc_stats trace point
	ext4: fix fs corruption when tring to remove a non-empty directory with IO error
	drivers: hamradio: 6pack: fix UAF bug caused by mod_timer()
	mailbox: tegra-hsp: Flush whole channel
	block: limit request dispatch loop duration
	block: don't merge across cgroup boundaries if blkcg is enabled
	drm/edid: check basic audio support on CEA extension block
	video: fbdev: sm712fb: Fix crash in smtcfb_read()
	video: fbdev: atari: Atari 2 bpp (STe) palette bugfix
	ARM: dts: at91: sama5d2: Fix PMERRLOC resource size
	ARM: dts: exynos: fix UART3 pins configuration in Exynos5250
	ARM: dts: exynos: add missing HDMI supplies on SMDK5250
	ARM: dts: exynos: add missing HDMI supplies on SMDK5420
	mgag200 fix memmapsl configuration in GCTL6 register
	carl9170: fix missing bit-wise or operator for tx_params
	pstore: Don't use semaphores in always-atomic-context code
	thermal: int340x: Increase bitmap size
	lib/raid6/test: fix multiple definition linking error
	exec: Force single empty string when argv is empty
	crypto: rsa-pkcs1pad - only allow with rsa
	crypto: rsa-pkcs1pad - correctly get hash from source scatterlist
	crypto: rsa-pkcs1pad - restore signature length check
	crypto: rsa-pkcs1pad - fix buffer overread in pkcs1pad_verify_complete()
	bcache: fixup multiple threads crash
	DEC: Limit PMAX memory probing to R3k systems
	media: gpio-ir-tx: fix transmit with long spaces on Orange Pi PC
	media: davinci: vpif: fix unbalanced runtime PM get
	media: davinci: vpif: fix unbalanced runtime PM enable
	xtensa: fix stop_machine_cpuslocked call in patch_text
	xtensa: fix xtensa_wsr always writing 0
	brcmfmac: firmware: Allocate space for default boardrev in nvram
	brcmfmac: pcie: Release firmwares in the brcmf_pcie_setup error path
	brcmfmac: pcie: Replace brcmf_pcie_copy_mem_todev with memcpy_toio
	brcmfmac: pcie: Fix crashes due to early IRQs
	drm/i915/opregion: check port number bounds for SWSCI display power state
	drm/i915/gem: add missing boundary check in vm_access
	PCI: pciehp: Clear cmd_busy bit in polling mode
	PCI: xgene: Revert "PCI: xgene: Fix IB window setup"
	regulator: qcom_smd: fix for_each_child.cocci warnings
	selinux: check return value of sel_make_avc_files
	hwrng: cavium - Check health status while reading random data
	hwrng: cavium - HW_RANDOM_CAVIUM should depend on ARCH_THUNDER
	crypto: sun8i-ss - really disable hash on A80
	crypto: authenc - Fix sleep in atomic context in decrypt_tail
	crypto: mxs-dcp - Fix scatterlist processing
	thermal: int340x: Check for NULL after calling kmemdup()
	spi: tegra114: Add missing IRQ check in tegra_spi_probe
	arm64/mm: avoid fixmap race condition when create pud mapping
	selftests/x86: Add validity check and allow field splitting
	crypto: rockchip - ECB does not need IV
	audit: log AUDIT_TIME_* records only from rules
	EVM: fix the evm= __setup handler return value
	crypto: ccree - don't attempt 0 len DMA mappings
	spi: pxa2xx-pci: Balance reference count for PCI DMA device
	hwmon: (pmbus) Add mutex to regulator ops
	hwmon: (sch56xx-common) Replace WDOG_ACTIVE with WDOG_HW_RUNNING
	nvme: cleanup __nvme_check_ids
	block: don't delete queue kobject before its children
	PM: hibernate: fix __setup handler error handling
	PM: suspend: fix return value of __setup handler
	spi: spi-zynqmp-gqspi: Handle error for dma_set_mask
	hwrng: atmel - disable trng on failure path
	crypto: sun8i-ss - call finalize with bh disabled
	crypto: sun8i-ce - call finalize with bh disabled
	crypto: amlogic - call finalize with bh disabled
	crypto: vmx - add missing dependencies
	clocksource/drivers/timer-ti-dm: Fix regression from errata i940 fix
	clocksource/drivers/exynos_mct: Refactor resources allocation
	clocksource/drivers/exynos_mct: Handle DTS with higher number of interrupts
	clocksource/drivers/timer-microchip-pit64b: Use notrace
	clocksource/drivers/timer-of: Check return value of of_iomap in timer_of_base_init()
	ACPI: APEI: fix return value of __setup handlers
	crypto: ccp - ccp_dmaengine_unregister release dma channels
	crypto: ccree - Fix use after free in cc_cipher_exit()
	vfio: platform: simplify device removal
	amba: Make the remove callback return void
	hwrng: nomadik - Change clk_disable to clk_disable_unprepare
	hwmon: (pmbus) Add Vin unit off handling
	clocksource: acpi_pm: fix return value of __setup handler
	io_uring: terminate manual loop iterator loop correctly for non-vecs
	watch_queue: Fix NULL dereference in error cleanup
	watch_queue: Actually free the watch
	f2fs: fix to enable ATGC correctly via gc_idle sysfs interface
	sched/debug: Remove mpol_get/put and task_lock/unlock from sched_show_numa
	sched/core: Export pelt_thermal_tp
	rseq: Optimise rseq_get_rseq_cs() and clear_rseq_cs()
	rseq: Remove broken uapi field layout on 32-bit little endian
	perf/core: Fix address filter parser for multiple filters
	perf/x86/intel/pt: Fix address filter config for 32-bit kernel
	f2fs: fix missing free nid in f2fs_handle_failed_inode
	nfsd: more robust allocation failure handling in nfsd_file_cache_init
	f2fs: fix to avoid potential deadlock
	btrfs: fix unexpected error path when reflinking an inline extent
	f2fs: compress: remove unneeded read when rewrite whole cluster
	f2fs: fix compressed file start atomic write may cause data corruption
	selftests, x86: fix how check_cc.sh is being invoked
	kunit: make kunit_test_timeout compatible with comment
	media: staging: media: zoran: fix usage of vb2_dma_contig_set_max_seg_size
	media: v4l2-mem2mem: Apply DST_QUEUE_OFF_BASE on MMAP buffers across ioctls
	media: mtk-vcodec: potential dereference of null pointer
	media: bttv: fix WARNING regression on tunerless devices
	ASoC: xilinx: xlnx_formatter_pcm: Handle sysclk setting
	ASoC: generic: simple-card-utils: remove useless assignment
	media: coda: Fix missing put_device() call in coda_get_vdoa_data
	media: meson: vdec: potential dereference of null pointer
	media: hantro: Fix overfill bottom register field name
	media: aspeed: Correct value for h-total-pixels
	video: fbdev: matroxfb: set maxvram of vbG200eW to the same as vbG200 to avoid black screen
	video: fbdev: controlfb: Fix set but not used warnings
	video: fbdev: controlfb: Fix COMPILE_TEST build
	video: fbdev: smscufx: Fix null-ptr-deref in ufx_usb_probe()
	video: fbdev: atmel_lcdfb: fix an error code in atmel_lcdfb_probe()
	video: fbdev: fbcvt.c: fix printing in fb_cvt_print_name()
	firmware: qcom: scm: Remove reassignment to desc following initializer
	ARM: dts: qcom: ipq4019: fix sleep clock
	soc: qcom: rpmpd: Check for null return of devm_kcalloc
	soc: qcom: ocmem: Fix missing put_device() call in of_get_ocmem
	soc: qcom: aoss: remove spurious IRQF_ONESHOT flags
	arm64: dts: qcom: sdm845: fix microphone bias properties and values
	arm64: dts: qcom: sm8150: Correct TCS configuration for apps rsc
	firmware: ti_sci: Fix compilation failure when CONFIG_TI_SCI_PROTOCOL is not defined
	soc: ti: wkup_m3_ipc: Fix IRQ check in wkup_m3_ipc_probe
	ARM: dts: sun8i: v3s: Move the csi1 block to follow address order
	ARM: dts: imx: Add missing LVDS decoder on M53Menlo
	media: video/hdmi: handle short reads of hdmi info frame.
	media: em28xx: initialize refcount before kref_get
	media: usb: go7007: s2250-board: fix leak in probe()
	media: cedrus: H265: Fix neighbour info buffer size
	media: cedrus: h264: Fix neighbour info buffer size
	ASoC: codecs: wcd934x: fix return value of wcd934x_rx_hph_mode_put
	uaccess: fix nios2 and microblaze get_user_8()
	ASoC: rt5663: check the return value of devm_kzalloc() in rt5663_parse_dp()
	ASoC: ti: davinci-i2s: Add check for clk_enable()
	ALSA: spi: Add check for clk_enable()
	arm64: dts: ns2: Fix spi-cpol and spi-cpha property
	arm64: dts: broadcom: Fix sata nodename
	printk: fix return value of printk.devkmsg __setup handler
	ASoC: mxs-saif: Handle errors for clk_enable
	ASoC: atmel_ssc_dai: Handle errors for clk_enable
	ASoC: dwc-i2s: Handle errors for clk_enable
	ASoC: soc-compress: prevent the potentially use of null pointer
	memory: emif: Add check for setup_interrupts
	memory: emif: check the pointer temp in get_device_details()
	ALSA: firewire-lib: fix uninitialized flag for AV/C deferred transaction
	arm64: dts: rockchip: Fix SDIO regulator supply properties on rk3399-firefly
	m68k: coldfire/device.c: only build for MCF_EDMA when h/w macros are defined
	media: stk1160: If start stream fails, return buffers with VB2_BUF_STATE_QUEUED
	media: vidtv: Check for null return of vzalloc
	ASoC: atmel: Add missing of_node_put() in at91sam9g20ek_audio_probe
	ASoC: wm8350: Handle error for wm8350_register_irq
	ASoC: fsi: Add check for clk_enable
	video: fbdev: omapfb: Add missing of_node_put() in dvic_probe_of
	media: saa7134: convert list_for_each to entry variant
	media: saa7134: fix incorrect use to determine if list is empty
	ivtv: fix incorrect device_caps for ivtvfb
	ASoC: rockchip: i2s: Use devm_platform_get_and_ioremap_resource()
	ASoC: rockchip: i2s: Fix missing clk_disable_unprepare() in rockchip_i2s_probe
	ASoC: SOF: Add missing of_node_put() in imx8m_probe
	ASoC: dmaengine: do not use a NULL prepare_slave_config() callback
	ASoC: mxs: Fix error handling in mxs_sgtl5000_probe
	ASoC: fsl_spdif: Disable TX clock when stop
	ASoC: imx-es8328: Fix error return code in imx_es8328_probe()
	ASoC: msm8916-wcd-digital: Fix missing clk_disable_unprepare() in msm8916_wcd_digital_probe
	mmc: davinci_mmc: Handle error for clk_enable
	ASoC: atmel: sam9x5_wm8731: use devm_snd_soc_register_card()
	ASoC: atmel: Fix error handling in sam9x5_wm8731_driver_probe
	ASoC: msm8916-wcd-analog: Fix error handling in pm8916_wcd_analog_spmi_probe
	ASoC: codecs: wcd934x: Add missing of_node_put() in wcd934x_codec_parse_data
	ARM: configs: multi_v5_defconfig: re-enable CONFIG_V4L_PLATFORM_DRIVERS
	drm/meson: osd_afbcd: Add an exit callback to struct meson_afbcd_ops
	drm/bridge: Fix free wrong object in sii8620_init_rcp_input_dev
	drm/bridge: Add missing pm_runtime_disable() in __dw_mipi_dsi_probe
	drm/bridge: nwl-dsi: Fix PM disable depth imbalance in nwl_dsi_probe
	drm: bridge: adv7511: Fix ADV7535 HPD enablement
	ath10k: fix memory overwrite of the WoWLAN wakeup packet pattern
	drm/panfrost: Check for error num after setting mask
	libbpf: Fix possible NULL pointer dereference when destroying skeleton
	udmabuf: validate ubuf->pagecount
	Bluetooth: hci_serdev: call init_rwsem() before p->open()
	mtd: onenand: Check for error irq
	mtd: rawnand: gpmi: fix controller timings setting
	drm/edid: Don't clear formats if using deep color
	ionic: fix type complaint in ionic_dev_cmd_clean()
	drm/nouveau/acr: Fix undefined behavior in nvkm_acr_hsfw_load_bl()
	drm/amd/display: Fix a NULL pointer dereference in amdgpu_dm_connector_add_common_modes()
	drm/amd/pm: return -ENOTSUPP if there is no get_dpm_ultimate_freq function
	ath9k_htc: fix uninit value bugs
	RDMA/core: Set MR type in ib_reg_user_mr
	KVM: PPC: Fix vmx/vsx mixup in mmio emulation
	i40e: don't reserve excessive XDP_PACKET_HEADROOM on XSK Rx to skb
	i40e: respect metadata on XSK Rx to skb
	power: reset: gemini-poweroff: Fix IRQ check in gemini_poweroff_probe
	ray_cs: Check ioremap return value
	powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch
	KVM: PPC: Book3S HV: Check return value of kvmppc_radix_init
	powerpc/perf: Don't use perf_hw_context for trace IMC PMU
	mt76: mt7915: use proper aid value in mt7915_mcu_wtbl_generic_tlv in sta mode
	mt76: mt7915: use proper aid value in mt7915_mcu_sta_basic_tlv
	mt76: mt7603: check sta_rates pointer in mt7603_sta_rate_tbl_update
	mt76: mt7615: check sta_rates pointer in mt7615_sta_rate_tbl_update
	net: dsa: mv88e6xxx: Enable port policy support on 6097
	scripts/dtc: Call pkg-config POSIXly correct
	livepatch: Fix build failure on 32 bits processors
	PCI: aardvark: Fix reading PCI_EXP_RTSTA_PME bit on emulated bridge
	drm/bridge: dw-hdmi: use safe format when first in bridge chain
	power: supply: ab8500: Fix memory leak in ab8500_fg_sysfs_init
	HID: i2c-hid: fix GET/SET_REPORT for unnumbered reports
	iommu/ipmmu-vmsa: Check for error num after setting mask
	drm/amd/pm: enable pm sysfs write for one VF mode
	drm/amd/display: Add affected crtcs to atomic state for dsc mst unplug
	IB/cma: Allow XRC INI QPs to set their local ACK timeout
	dax: make sure inodes are flushed before destroy cache
	iwlwifi: Fix -EIO error code that is never returned
	iwlwifi: mvm: Fix an error code in iwl_mvm_up()
	drm/msm/dp: populate connector of struct dp_panel
	drm/msm/dpu: add DSPP blocks teardown
	drm/msm/dpu: fix dp audio condition
	dm crypt: fix get_key_size compiler warning if !CONFIG_KEYS
	scsi: pm8001: Fix command initialization in pm80XX_send_read_log()
	scsi: pm8001: Fix command initialization in pm8001_chip_ssp_tm_req()
	scsi: pm8001: Fix payload initialization in pm80xx_set_thermal_config()
	scsi: pm8001: Fix le32 values handling in pm80xx_set_sas_protocol_timer_config()
	scsi: pm8001: Fix payload initialization in pm80xx_encrypt_update()
	scsi: pm8001: Fix le32 values handling in pm80xx_chip_ssp_io_req()
	scsi: pm8001: Fix le32 values handling in pm80xx_chip_sata_req()
	scsi: pm8001: Fix NCQ NON DATA command task initialization
	scsi: pm8001: Fix NCQ NON DATA command completion handling
	scsi: pm8001: Fix abort all task initialization
	RDMA/mlx5: Fix the flow of a miss in the allocation of a cache ODP MR
	drm/amd/display: Remove vupdate_int_entry definition
	TOMOYO: fix __setup handlers return values
	ext2: correct max file size computing
	drm/tegra: Fix reference leak in tegra_dsi_ganged_probe
	power: supply: bq24190_charger: Fix bq24190_vbus_is_enabled() wrong false return
	scsi: hisi_sas: Change permission of parameter prot_mask
	drm/bridge: cdns-dsi: Make sure to to create proper aliases for dt
	bpf, arm64: Call build_prologue() first in first JIT pass
	bpf, arm64: Feed byte-offset into bpf line info
	gpu: host1x: Fix a memory leak in 'host1x_remove()'
	libbpf: Skip forward declaration when counting duplicated type names
	powerpc/mm/numa: skip NUMA_NO_NODE onlining in parse_numa_properties()
	powerpc/Makefile: Don't pass -mcpu=powerpc64 when building 32-bit
	KVM: x86: Fix emulation in writing cr8
	KVM: x86/emulator: Defer not-present segment check in __load_segment_descriptor()
	hv_balloon: rate-limit "Unhandled message" warning
	i2c: xiic: Make bus names unique
	power: supply: wm8350-power: Handle error for wm8350_register_irq
	power: supply: wm8350-power: Add missing free in free_charger_irq
	IB/hfi1: Allow larger MTU without AIP
	PCI: Reduce warnings on possible RW1C corruption
	net: axienet: fix RX ring refill allocation failure handling
	mips: DEC: honor CONFIG_MIPS_FP_SUPPORT=n
	powerpc/sysdev: fix incorrect use to determine if list is empty
	mfd: mc13xxx: Add check for mc13xxx_irq_request
	libbpf: Unmap rings when umem deleted
	selftests/bpf: Make test_lwt_ip_encap more stable and faster
	platform/x86: huawei-wmi: check the return value of device_create_file()
	powerpc: 8xx: fix a return value error in mpc8xx_pic_init
	vxcan: enable local echo for sent CAN frames
	ath10k: Fix error handling in ath10k_setup_msa_resources
	mips: cdmm: Fix refcount leak in mips_cdmm_phys_base
	MIPS: RB532: fix return value of __setup handler
	MIPS: pgalloc: fix memory leak caused by pgd_free()
	mtd: rawnand: atmel: fix refcount issue in atmel_nand_controller_init
	RDMA/mlx5: Fix memory leak in error flow for subscribe event routine
	bpf, sockmap: Fix memleak in tcp_bpf_sendmsg while sk msg is full
	bpf, sockmap: Fix more uncharged while msg has more_data
	bpf, sockmap: Fix double uncharge the mem of sk_msg
	samples/bpf, xdpsock: Fix race when running for fix duration of time
	USB: storage: ums-realtek: fix error code in rts51x_read_mem()
	can: isotp: return -EADDRNOTAVAIL when reading from unbound socket
	can: isotp: support MSG_TRUNC flag when reading from socket
	bareudp: use ipv6_mod_enabled to check if IPv6 enabled
	selftests/bpf: Fix error reporting from sock_fields programs
	Bluetooth: call hci_le_conn_failed with hdev lock in hci_le_conn_failed
	Bluetooth: btmtksdio: Fix kernel oops in btmtksdio_interrupt
	ipv4: Fix route lookups when handling ICMP redirects and PMTU updates
	af_netlink: Fix shift out of bounds in group mask calculation
	i2c: meson: Fix wrong speed use from probe
	i2c: mux: demux-pinctrl: do not deactivate a master that is not active
	selftests/bpf/test_lirc_mode2.sh: Exit with proper code
	PCI: Avoid broken MSI on SB600 USB devices
	net: bcmgenet: Use stronger register read/writes to assure ordering
	tcp: ensure PMTU updates are processed during fastopen
	openvswitch: always update flow key after nat
	tipc: fix the timer expires after interval 100ms
	mfd: asic3: Add missing iounmap() on error asic3_mfd_probe
	mxser: fix xmit_buf leak in activate when LSR == 0xff
	pwm: lpc18xx-sct: Initialize driver data and hardware before pwmchip_add()
	fsi: aspeed: convert to devm_platform_ioremap_resource
	fsi: Aspeed: Fix a potential double free
	misc: alcor_pci: Fix an error handling path
	cpufreq: qcom-cpufreq-nvmem: fix reading of PVS Valid fuse
	soundwire: intel: fix wrong register name in intel_shim_wake
	clk: qcom: ipq8074: fix PCI-E clock oops
	iio: mma8452: Fix probe failing when an i2c_device_id is used
	staging:iio:adc:ad7280a: Fix handing of device address bit reversing.
	pinctrl: renesas: r8a77470: Reduce size for narrow VIN1 channel
	pinctrl: renesas: checker: Fix miscalculation of number of states
	clk: qcom: ipq8074: Use floor ops for SDCC1 clock
	phy: dphy: Correct lpx parameter and its derivatives(ta_{get,go,sure})
	serial: 8250_mid: Balance reference count for PCI DMA device
	serial: 8250_lpss: Balance reference count for PCI DMA device
	NFS: Use of mapping_set_error() results in spurious errors
	serial: 8250: Fix race condition in RTS-after-send handling
	iio: adc: Add check for devm_request_threaded_irq
	habanalabs: Add check for pci_enable_device
	NFS: Return valid errors from nfs2/3_decode_dirent()
	dma-debug: fix return value of __setup handlers
	clk: imx7d: Remove audio_mclk_root_clk
	clk: at91: sama7g5: fix parents of PDMCs' GCLK
	clk: qcom: clk-rcg2: Update logic to calculate D value for RCG
	clk: qcom: clk-rcg2: Update the frac table for pixel clock
	dmaengine: hisi_dma: fix MSI allocate fail when reload hisi_dma
	remoteproc: qcom: Fix missing of_node_put in adsp_alloc_memory_region
	remoteproc: qcom_wcnss: Add missing of_node_put() in wcnss_alloc_memory_region
	remoteproc: qcom_q6v5_mss: Fix some leaks in q6v5_alloc_memory_region
	nvdimm/region: Fix default alignment for small regions
	clk: actions: Terminate clk_div_table with sentinel element
	clk: loongson1: Terminate clk_div_table with sentinel element
	clk: clps711x: Terminate clk_div_table with sentinel element
	clk: tegra: tegra124-emc: Fix missing put_device() call in emc_ensure_emc_driver
	NFS: remove unneeded check in decode_devicenotify_args()
	staging: mt7621-dts: fix LEDs and pinctrl on GB-PC1 devicetree
	staging: mt7621-dts: fix formatting
	staging: mt7621-dts: fix pinctrl properties for ethernet
	staging: mt7621-dts: fix GB-PC2 devicetree
	pinctrl: mediatek: Fix missing of_node_put() in mtk_pctrl_init
	pinctrl: mediatek: paris: Fix PIN_CONFIG_BIAS_* readback
	pinctrl: mediatek: paris: Fix "argument" argument type for mtk_pinconf_get()
	pinctrl: mediatek: paris: Fix pingroup pin config state readback
	pinctrl: mediatek: paris: Skip custom extra pin config dump for virtual GPIOs
	pinctrl: nomadik: Add missing of_node_put() in nmk_pinctrl_probe
	pinctrl/rockchip: Add missing of_node_put() in rockchip_pinctrl_probe
	tty: hvc: fix return value of __setup handler
	kgdboc: fix return value of __setup handler
	serial: 8250: fix XOFF/XON sending when DMA is used
	kgdbts: fix return value of __setup handler
	firmware: google: Properly state IOMEM dependency
	driver core: dd: fix return value of __setup handler
	jfs: fix divide error in dbNextAG
	netfilter: nf_conntrack_tcp: preserve liberal flag in tcp options
	NFSv4.1: don't retry BIND_CONN_TO_SESSION on session error
	kdb: Fix the putarea helper function
	clk: qcom: gcc-msm8994: Fix gpll4 width
	clk: Initialize orphan req_rate
	xen: fix is_xen_pmu()
	net: enetc: report software timestamping via SO_TIMESTAMPING
	net: hns3: fix bug when PF set the duplicate MAC address for VFs
	net: phy: broadcom: Fix brcm_fet_config_init()
	selftests: test_vxlan_under_vrf: Fix broken test case
	qlcnic: dcb: default to returning -EOPNOTSUPP
	net/x25: Fix null-ptr-deref caused by x25_disconnect
	NFSv4/pNFS: Fix another issue with a list iterator pointing to the head
	net: dsa: bcm_sf2_cfp: fix an incorrect NULL check on list iterator
	fs: fd tables have to be multiples of BITS_PER_LONG
	lib/test: use after free in register_test_dev_kmod()
	fs: fix fd table size alignment properly
	LSM: general protection fault in legacy_parse_param
	regulator: rpi-panel: Handle I2C errors/timing to the Atmel
	gcc-plugins/stackleak: Exactly match strings instead of prefixes
	pinctrl: npcm: Fix broken references to chip->parent_device
	block, bfq: don't move oom_bfqq
	selinux: use correct type for context length
	selinux: allow FIOCLEX and FIONCLEX with policy capability
	loop: use sysfs_emit() in the sysfs xxx show()
	Fix incorrect type in assignment of ipv6 port for audit
	irqchip/qcom-pdc: Fix broken locking
	irqchip/nvic: Release nvic_base upon failure
	fs/binfmt_elf: Fix AT_PHDR for unusual ELF files
	bfq: fix use-after-free in bfq_dispatch_request
	ACPICA: Avoid walking the ACPI Namespace if it is not there
	lib/raid6/test/Makefile: Use $(pound) instead of \# for Make 4.3
	Revert "Revert "block, bfq: honor already-setup queue merges""
	ACPI/APEI: Limit printable size of BERT table data
	PM: core: keep irq flags in device_pm_check_callbacks()
	parisc: Fix handling off probe non-access faults
	nvme-tcp: lockdep: annotate in-kernel sockets
	spi: tegra20: Use of_device_get_match_data()
	locking/lockdep: Iterate lock_classes directly when reading lockdep files
	ext4: correct cluster len and clusters changed accounting in ext4_mb_mark_bb
	ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit
	ext4: don't BUG if someone dirty pages without asking ext4 first
	f2fs: fix to do sanity check on curseg->alloc_type
	NFSD: Fix nfsd_breaker_owns_lease() return values
	f2fs: compress: fix to print raw data size in error path of lz4 decompression
	ntfs: add sanity check on allocation size
	media: staging: media: zoran: move videodev alloc
	media: staging: media: zoran: calculate the right buffer number for zoran_reap_stat_com
	media: staging: media: zoran: fix various V4L2 compliance errors
	media: ir_toy: free before error exiting
	video: fbdev: nvidiafb: Use strscpy() to prevent buffer overflow
	video: fbdev: w100fb: Reset global state
	video: fbdev: cirrusfb: check pixclock to avoid divide by zero
	video: fbdev: omapfb: acx565akm: replace snprintf with sysfs_emit
	ARM: dts: qcom: fix gic_irq_domain_translate warnings for msm8960
	ARM: dts: bcm2837: Add the missing L1/L2 cache information
	ASoC: madera: Add dependencies on MFD
	media: atomisp_gmin_platform: Add DMI quirk to not turn AXP ELDO2 regulator off on some boards
	media: atomisp: fix dummy_ptr check to avoid duplicate active_bo
	ARM: ftrace: avoid redundant loads or clobbering IP
	ARM: dts: imx7: Use audio_mclk_post_div instead audio_mclk_root_clk
	arm64: defconfig: build imx-sdma as a module
	video: fbdev: omapfb: panel-dsi-cm: Use sysfs_emit() instead of snprintf()
	video: fbdev: omapfb: panel-tpo-td043mtea1: Use sysfs_emit() instead of snprintf()
	video: fbdev: udlfb: replace snprintf in show functions with sysfs_emit
	ARM: dts: bcm2711: Add the missing L1/L2 cache information
	ASoC: soc-core: skip zero num_dai component in searching dai name
	media: cx88-mpeg: clear interrupt status register before streaming video
	uaccess: fix type mismatch warnings from access_ok()
	lib/test_lockup: fix kernel pointer check for separate address spaces
	ARM: tegra: tamonten: Fix I2C3 pad setting
	ARM: mmp: Fix failure to remove sram device
	video: fbdev: sm712fb: Fix crash in smtcfb_write()
	media: Revert "media: em28xx: add missing em28xx_close_extension"
	media: hdpvr: initialize dev->worker at hdpvr_register_videodev
	mmc: host: Return an error when ->enable_sdio_irq() ops is missing
	media: atomisp: fix bad usage at error handling logic
	ALSA: hda/realtek: Add alc256-samsung-headphone fixup
	KVM: x86/mmu: Check for present SPTE when clearing dirty bit in TDP MMU
	powerpc/kasan: Fix early region not updated correctly
	powerpc/lib/sstep: Fix 'sthcx' instruction
	powerpc/lib/sstep: Fix build errors with newer binutils
	powerpc: Fix build errors with newer binutils
	scsi: qla2xxx: Fix stuck session in gpdb
	scsi: qla2xxx: Fix scheduling while atomic
	scsi: qla2xxx: Fix wrong FDMI data for 64G adapter
	scsi: qla2xxx: Fix warning for missing error code
	scsi: qla2xxx: Fix device reconnect in loop topology
	scsi: qla2xxx: Add devids and conditionals for 28xx
	scsi: qla2xxx: Check for firmware dump already collected
	scsi: qla2xxx: Suppress a kernel complaint in qla_create_qpair()
	scsi: qla2xxx: Fix disk failure to rediscover
	scsi: qla2xxx: Fix incorrect reporting of task management failure
	scsi: qla2xxx: Fix hang due to session stuck
	scsi: qla2xxx: Fix missed DMA unmap for NVMe ls requests
	scsi: qla2xxx: Fix N2N inconsistent PLOGI
	scsi: qla2xxx: Reduce false trigger to login
	scsi: qla2xxx: Use correct feature type field during RFF_ID processing
	platform: chrome: Split trace include file
	KVM: x86: Forbid VMM to set SYNIC/STIMER MSRs when SynIC wasn't activated
	KVM: Prevent module exit until all VMs are freed
	KVM: x86: fix sending PV IPI
	KVM: SVM: fix panic on out-of-bounds guest IRQ
	ASoC: SOF: Intel: Fix NULL ptr dereference when ENOMEM
	ubifs: rename_whiteout: Fix double free for whiteout_ui->data
	ubifs: Fix deadlock in concurrent rename whiteout and inode writeback
	ubifs: Add missing iput if do_tmpfile() failed in rename whiteout
	ubifs: setflags: Make dirtied_ino_d 8 bytes aligned
	ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock()
	ubifs: Fix to add refcount once page is set private
	ubifs: rename_whiteout: correct old_dir size computing
	wireguard: queueing: use CFI-safe ptr_ring cleanup function
	wireguard: socket: free skb in send6 when ipv6 is disabled
	wireguard: socket: ignore v6 endpoints when ipv6 is disabled
	XArray: Fix xas_create_range() when multi-order entry present
	can: mcba_usb: mcba_usb_start_xmit(): fix double dev_kfree_skb in error path
	can: mcba_usb: properly check endpoint type
	can: mcp251xfd: mcp251xfd_register_get_dev_id(): fix return of error value
	XArray: Update the LRU list in xas_split()
	rtc: check if __rtc_read_time was successful
	gfs2: Make sure FITRIM minlen is rounded up to fs block size
	net: hns3: fix software vlan talbe of vlan 0 inconsistent with hardware
	rxrpc: Fix call timer start racing with call destruction
	mailbox: imx: fix wakeup failure from freeze mode
	crypto: arm/aes-neonbs-cbc - Select generic cbc and aes
	watch_queue: Free the page array when watch_queue is dismantled
	pinctrl: pinconf-generic: Print arguments for bias-pull-*
	watchdog: rti-wdt: Add missing pm_runtime_disable() in probe function
	pinctrl: nuvoton: npcm7xx: Rename DS() macro to DSTR()
	pinctrl: nuvoton: npcm7xx: Use %zu printk format for ARRAY_SIZE()
	ASoC: mediatek: mt6358: add missing EXPORT_SYMBOLs
	ubi: Fix race condition between ctrl_cdev_ioctl and ubi_cdev_ioctl
	ARM: iop32x: offset IRQ numbers by 1
	io_uring: fix memory leak of uid in files registration
	riscv module: remove (NOLOAD)
	ACPI: CPPC: Avoid out of bounds access when parsing _CPC data
	platform/chrome: cros_ec_typec: Check for EC device
	can: isotp: restore accidentally removed MSG_PEEK feature
	proc: bootconfig: Add null pointer check
	staging: mt7621-dts: fix pinctrl-0 items to be size-1 items on ethernet
	ASoC: soc-compress: Change the check for codec_dai
	batman-adv: Check ptr for NULL before reducing its refcnt
	mm/mmap: return 1 from stack_guard_gap __setup() handler
	ARM: 9187/1: JIVE: fix return value of __setup handler
	mm/memcontrol: return 1 from cgroup.memory __setup() handler
	mm/usercopy: return 1 from hardened_usercopy __setup() handler
	bpf: Adjust BPF stack helper functions to accommodate skip > 0
	bpf: Fix comment for helper bpf_current_task_under_cgroup()
	dt-bindings: mtd: nand-controller: Fix the reg property description
	dt-bindings: mtd: nand-controller: Fix a comment in the examples
	dt-bindings: spi: mxic: The interrupt property is not mandatory
	ubi: fastmap: Return error code if memory allocation fails in add_aeb()
	ASoC: topology: Allow TLV control to be either read or write
	ARM: dts: spear1340: Update serial node properties
	ARM: dts: spear13xx: Update SPI dma properties
	um: Fix uml_mconsole stop/go
	docs: sysctl/kernel: add missing bit to panic_print
	openvswitch: Fixed nd target mask field in the flow dump.
	KVM: x86/mmu: do compare-and-exchange of gPTE via the user address
	can: m_can: m_can_tx_handler(): fix use after free of skb
	can: usb_8dev: usb_8dev_start_xmit(): fix double dev_kfree_skb() in error path
	coredump: Snapshot the vmas in do_coredump
	coredump: Remove the WARN_ON in dump_vma_snapshot
	coredump/elf: Pass coredump_params into fill_note_info
	coredump: Use the vma snapshot in fill_files_note
	arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones
	PCI: xgene: Revert "PCI: xgene: Use inbound resources for setup"
	Linux 5.10.110

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I12fbe227793dd40c0582588e1700cf88cafd0ac6
2022-04-18 17:41:18 +02:00

655 lines
17 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* blk-mq scheduling framework
*
* Copyright (C) 2016 Jens Axboe
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blk-mq.h>
#include <linux/list_sort.h>
#include <trace/events/block.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
#include "blk-wbt.h"
void blk_mq_sched_assign_ioc(struct request *rq)
{
struct request_queue *q = rq->q;
struct io_context *ioc;
struct io_cq *icq;
/*
* May not have an IO context if it's a passthrough request
*/
ioc = current->io_context;
if (!ioc)
return;
spin_lock_irq(&q->queue_lock);
icq = ioc_lookup_icq(ioc, q);
spin_unlock_irq(&q->queue_lock);
if (!icq) {
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
if (!icq)
return;
}
get_io_context(icq->ioc);
rq->elv.icq = icq;
}
/*
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
*/
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
/*
* Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
* in blk_mq_run_hw_queue(). Its pair is the barrier in
* blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
* meantime new request added to hctx->dispatch is missed to check in
* blk_mq_run_hw_queue().
*/
smp_mb();
blk_mq_run_hw_queue(hctx, true);
}
static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct request *rqa = container_of(a, struct request, queuelist);
struct request *rqb = container_of(b, struct request, queuelist);
return rqa->mq_hctx > rqb->mq_hctx;
}
static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
{
struct blk_mq_hw_ctx *hctx =
list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
struct request *rq;
LIST_HEAD(hctx_list);
unsigned int count = 0;
list_for_each_entry(rq, rq_list, queuelist) {
if (rq->mq_hctx != hctx) {
list_cut_before(&hctx_list, rq_list, &rq->queuelist);
goto dispatch;
}
count++;
}
list_splice_tail_init(rq_list, &hctx_list);
dispatch:
return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
}
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
/*
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
* its queue by itself in its completion handler, so we don't need to
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
*
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
* be run again. This is necessary to avoid starving flushes.
*/
static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
bool multi_hctxs = false, run_queue = false;
bool dispatched = false, busy = false;
unsigned int max_dispatch;
LIST_HEAD(rq_list);
int count = 0;
if (hctx->dispatch_busy)
max_dispatch = 1;
else
max_dispatch = hctx->queue->nr_requests;
do {
struct request *rq;
if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
break;
if (!list_empty_careful(&hctx->dispatch)) {
busy = true;
break;
}
if (!blk_mq_get_dispatch_budget(q))
break;
rq = e->type->ops.dispatch_request(hctx);
if (!rq) {
blk_mq_put_dispatch_budget(q);
/*
* We're releasing without dispatching. Holding the
* budget could have blocked any "hctx"s with the
* same queue and if we didn't dispatch then there's
* no guarantee anyone will kick the queue. Kick it
* ourselves.
*/
run_queue = true;
break;
}
/*
* Now this rq owns the budget which has to be released
* if this rq won't be queued to driver via .queue_rq()
* in blk_mq_dispatch_rq_list().
*/
list_add_tail(&rq->queuelist, &rq_list);
if (rq->mq_hctx != hctx)
multi_hctxs = true;
} while (++count < max_dispatch);
if (!count) {
if (run_queue)
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
} else if (multi_hctxs) {
/*
* Requests from different hctx may be dequeued from some
* schedulers, such as bfq and deadline.
*
* Sort the requests in the list according to their hctx,
* dispatch batching requests from same hctx at a time.
*/
list_sort(NULL, &rq_list, sched_rq_cmp);
do {
dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
} while (!list_empty(&rq_list));
} else {
dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
}
if (busy)
return -EAGAIN;
return !!dispatched;
}
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
unsigned long end = jiffies + HZ;
int ret;
do {
ret = __blk_mq_do_dispatch_sched(hctx);
if (ret != 1)
break;
if (need_resched() || time_is_before_jiffies(end)) {
blk_mq_delay_run_hw_queue(hctx, 0);
break;
}
} while (1);
return ret;
}
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx)
{
unsigned short idx = ctx->index_hw[hctx->type];
if (++idx == hctx->nr_ctx)
idx = 0;
return hctx->ctxs[idx];
}
/*
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
* its queue by itself in its completion handler, so we don't need to
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
*
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
* be run again. This is necessary to avoid starving flushes.
*/
static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
LIST_HEAD(rq_list);
struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
int ret = 0;
struct request *rq;
do {
if (!list_empty_careful(&hctx->dispatch)) {
ret = -EAGAIN;
break;
}
if (!sbitmap_any_bit_set(&hctx->ctx_map))
break;
if (!blk_mq_get_dispatch_budget(q))
break;
rq = blk_mq_dequeue_from_ctx(hctx, ctx);
if (!rq) {
blk_mq_put_dispatch_budget(q);
/*
* We're releasing without dispatching. Holding the
* budget could have blocked any "hctx"s with the
* same queue and if we didn't dispatch then there's
* no guarantee anyone will kick the queue. Kick it
* ourselves.
*/
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
break;
}
/*
* Now this rq owns the budget which has to be released
* if this rq won't be queued to driver via .queue_rq()
* in blk_mq_dispatch_rq_list().
*/
list_add(&rq->queuelist, &rq_list);
/* round robin for fair dispatch */
ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
} while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
WRITE_ONCE(hctx->dispatch_from, ctx);
return ret;
}
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
int ret = 0;
LIST_HEAD(rq_list);
/*
* If we have previous entries on our dispatch list, grab them first for
* more fair dispatch.
*/
if (!list_empty_careful(&hctx->dispatch)) {
spin_lock(&hctx->lock);
if (!list_empty(&hctx->dispatch))
list_splice_init(&hctx->dispatch, &rq_list);
spin_unlock(&hctx->lock);
}
/*
* Only ask the scheduler for requests, if we didn't have residual
* requests from the dispatch list. This is to avoid the case where
* we only ever dispatch a fraction of the requests available because
* of low device queue depth. Once we pull requests out of the IO
* scheduler, we can no longer merge or sort them. So it's best to
* leave them there for as long as we can. Mark the hw queue as
* needing a restart in that case.
*
* We want to dispatch from the scheduler if there was nothing
* on the dispatch list or we were able to dispatch from the
* dispatch list.
*/
if (!list_empty(&rq_list)) {
blk_mq_sched_mark_restart_hctx(hctx);
if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
if (has_sched_dispatch)
ret = blk_mq_do_dispatch_sched(hctx);
else
ret = blk_mq_do_dispatch_ctx(hctx);
}
} else if (has_sched_dispatch) {
ret = blk_mq_do_dispatch_sched(hctx);
} else if (hctx->dispatch_busy) {
/* dequeue request one by one from sw queue if queue is busy */
ret = blk_mq_do_dispatch_ctx(hctx);
} else {
blk_mq_flush_busy_ctxs(hctx, &rq_list);
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
}
return ret;
}
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
/* RCU or SRCU read lock is needed before checking quiesced flag */
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
return;
hctx->run++;
/*
* A return of -EAGAIN is an indication that hctx->dispatch is not
* empty and we must run again in order to avoid starving flushes.
*/
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
blk_mq_run_hw_queue(hctx, true);
}
}
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs)
{
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx;
struct blk_mq_hw_ctx *hctx;
bool ret = false;
enum hctx_type type;
if (e && e->type->ops.bio_merge)
return e->type->ops.bio_merge(q, bio, nr_segs);
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
type = hctx->type;
if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
list_empty_careful(&ctx->rq_lists[type]))
return false;
/* default per sw-queue merge */
spin_lock(&ctx->lock);
/*
* Reverse check our software queue for entries that we could
* potentially merge with. Currently includes a hand-wavy stop
* count of 8, to not spend too much time checking for merges.
*/
if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) {
ctx->rq_merged++;
ret = true;
}
spin_unlock(&ctx->lock);
return ret;
}
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
{
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
void blk_mq_sched_request_inserted(struct request *rq)
{
trace_block_rq_insert(rq->q, rq);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
bool has_sched,
struct request *rq)
{
/*
* dispatch flush and passthrough rq directly
*
* passthrough request has to be added to hctx->dispatch directly.
* For some reason, device may be in one situation which can't
* handle FS request, so STS_RESOURCE is always returned and the
* FS request will be added to hctx->dispatch. However passthrough
* request may be required at that time for fixing the problem. If
* passthrough request is added to scheduler queue, there isn't any
* chance to dispatch it given we prioritize requests in hctx->dispatch.
*/
if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
return true;
if (has_sched)
rq->rq_flags |= RQF_SORTED;
return false;
}
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
bool run_queue, bool async)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
/*
* Firstly normal IO request is inserted to scheduler queue or
* sw queue, meantime we add flush request to dispatch queue(
* hctx->dispatch) directly and there is at most one in-flight
* flush request for each hw queue, so it doesn't matter to add
* flush request to tail or front of the dispatch queue.
*
* Secondly in case of NCQ, flush request belongs to non-NCQ
* command, and queueing it will fail when there is any
* in-flight normal IO request(NCQ command). When adding flush
* rq to the front of hctx->dispatch, it is easier to introduce
* extra time to flush rq's latency because of S_SCHED_RESTART
* compared with adding to the tail of dispatch queue, then
* chance of flush merge is increased, and less flush requests
* will be issued to controller. It is observed that ~10% time
* is saved in blktests block/004 on disk attached to AHCI/NCQ
* drive when adding flush rq to the front of hctx->dispatch.
*
* Simply queue flush rq to the front of hctx->dispatch so that
* intensive flush workloads can benefit in case of NCQ HW.
*/
at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
blk_mq_request_bypass_insert(rq, at_head, false);
goto run;
}
if (e && e->type->ops.insert_requests) {
LIST_HEAD(list);
list_add(&rq->queuelist, &list);
e->type->ops.insert_requests(hctx, &list, at_head);
} else {
spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq, at_head);
spin_unlock(&ctx->lock);
}
run:
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
}
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async)
{
struct elevator_queue *e;
struct request_queue *q = hctx->queue;
/*
* blk_mq_sched_insert_requests() is called from flush plug
* context only, and hold one usage counter to prevent queue
* from being released.
*/
percpu_ref_get(&q->q_usage_counter);
e = hctx->queue->elevator;
if (e && e->type->ops.insert_requests)
e->type->ops.insert_requests(hctx, list, false);
else {
/*
* try to issue requests directly if the hw queue isn't
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
if (!hctx->dispatch_busy && !e && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);
if (list_empty(list))
goto out;
}
blk_mq_insert_requests(hctx, ctx, list);
}
blk_mq_run_hw_queue(hctx, run_queue_async);
out:
percpu_ref_put(&q->q_usage_counter);
}
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
if (hctx->sched_tags) {
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
blk_mq_free_rq_map(hctx->sched_tags, flags);
hctx->sched_tags = NULL;
}
}
static int blk_mq_sched_alloc_tags(struct request_queue *q,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
struct blk_mq_tag_set *set = q->tag_set;
/* Clear HCTX_SHARED so tags are init'ed */
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
int ret;
hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
set->reserved_tags, flags);
if (!hctx->sched_tags)
return -ENOMEM;
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
if (ret)
blk_mq_sched_free_tags(set, hctx, hctx_idx);
return ret;
}
/* called in queue's release handler, tagset has gone away */
static void blk_mq_sched_tags_teardown(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
/* Clear HCTX_SHARED so tags are freed */
unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
if (hctx->sched_tags) {
blk_mq_free_rq_map(hctx->sched_tags, flags);
hctx->sched_tags = NULL;
}
}
}
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
struct blk_mq_hw_ctx *hctx;
struct elevator_queue *eq;
unsigned int i;
int ret;
if (!e) {
q->elevator = NULL;
q->nr_requests = q->tag_set->queue_depth;
return 0;
}
/*
* Default to double of smaller one between hw queue_depth and 128,
* since we don't split into sync/async like the old code did.
* Additionally, this is a per-hw queue depth.
*/
q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
BLKDEV_MAX_RQ);
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_sched_alloc_tags(q, hctx, i);
if (ret)
goto err;
}
ret = e->ops.init_sched(q, e);
if (ret)
goto err;
blk_mq_debugfs_register_sched(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (e->ops.init_hctx) {
ret = e->ops.init_hctx(hctx, i);
if (ret) {
eq = q->elevator;
blk_mq_sched_free_requests(q);
blk_mq_exit_sched(q, eq);
kobject_put(&eq->kobj);
return ret;
}
}
blk_mq_debugfs_register_sched_hctx(q, hctx);
}
return 0;
err:
blk_mq_sched_free_requests(q);
blk_mq_sched_tags_teardown(q);
q->elevator = NULL;
return ret;
}
/*
* called in either blk_queue_cleanup or elevator_switch, tagset
* is required for freeing requests
*/
void blk_mq_sched_free_requests(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags)
blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
}
}
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
queue_for_each_hw_ctx(q, hctx, i) {
blk_mq_debugfs_unregister_sched_hctx(hctx);
if (e->type->ops.exit_hctx && hctx->sched_data) {
e->type->ops.exit_hctx(hctx, i);
hctx->sched_data = NULL;
}
}
blk_mq_debugfs_unregister_sched(q);
if (e->type->ops.exit_sched)
e->type->ops.exit_sched(e);
blk_mq_sched_tags_teardown(q);
q->elevator = NULL;
}