-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmQMoPMACgkQONu9yGCS aT4a1Q//WHnQOEgEykqbHMree6UQD5F6crB0kUcJTSDB5lblviYGxpOadw2j+670 AGsFg00cm8Sb8p78v3SA+X2UzScGnY5Cwhe+B/JucUSr+4rDlZ9FjOGXbKdlYFc2 sOTp9j/9KrETf0K/VVuCa48rKBPUFvrT7pZUAblZ0vVmk6cSzPW/1iBa4W6Ho6Ec LxqNzCDtyTWX0JCzdv5DvjW7WALvPiEiw8CX8+psZTD8RHdAjtnW2DKp8ZnznzJS YVBf2ulsD1g3zKEqDm5nMcUyN3fSEWci97bmbEzIeMTULfsj+aQF5a7JoXIkj7Yb QIvZ1fG6RSviVplt5SoT5ucDN2cGqLt7+4b3v6DKQX1dMTDrAPdU+T1VU0LRxB6h 5M3ZZ925ktJu2YTmKi4QvgP01ZVJv0dNWytbmAnIVvJRGY3gHQt5tx0W2lnQdHE9 mJeW2MXcLKeho7d5p3wRl6yEWAJuAoioZCd95NPyNnVZMMhYRu6iTIIzY8EdNegQ 5ve9Rsda9uobvWRCWefyS0pHvuJ2HJrJONnU92MHKSojEC9oAjURvRGWpXYcQFM/ EiywE1oBRA6NrGI7BomAH6khVoTi01yBsQ0QEt30mTOuyxa6j/oR9iEsNv7bmjZC SoApcWDPNy6RpHX7SDtch0Qj1l7YfYDNNj66Y94o5E70eHebU9Q= =vHOd -----END PGP SIGNATURE----- Merge 5.10.173 into android12-5.10-lts Changes in 5.10.173 HID: asus: Remove check for same LED brightness on set HID: asus: use spinlock to protect concurrent accesses HID: asus: use spinlock to safely schedule workers powerpc/mm: Rearrange if-else block to avoid clang warning ARM: OMAP2+: Fix memory leak in realtime_counter_init() arm64: dts: qcom: qcs404: use symbol names for PCIe resets ARM: zynq: Fix refcount leak in zynq_early_slcr_init arm64: dts: mediatek: mt8183: Fix systimer 13 MHz clock description arm64: dts: qcom: sdm845-db845c: fix audio codec interrupt pin name arm64: dts: qcom: sc7180: correct SPMI bus address cells arm64: dts: meson-gx: Fix Ethernet MAC address unit name arm64: dts: meson-g12a: Fix internal Ethernet PHY unit name arm64: dts: meson-gx: Fix the SCPI DVFS node name and unit address arm64: dts: qcom: ipq8074: correct USB3 QMP PHY-s clock output names arm64: dts: qcom: Fix IPQ8074 PCIe PHY nodes arm64: dts: qcom: ipq8074: fix PCIe PHY serdes size arm64: dts: qcom: ipq8074: fix Gen3 PCIe QMP PHY arm64: dts: qcom: ipq8074: correct Gen2 PCIe ranges arm64: dts: qcom: ipq8074: fix Gen3 PCIe node arm64: dts: qcom: ipq8074: correct PCIe QMP PHY output clock names arm64: dts: meson: remove CPU opps below 1GHz for G12A boards ARM: OMAP1: call platform_device_put() in error case in omap1_dm_timer_init() ARM: s3c: fix s3c64xx_set_timer_source prototype arm64: dts: ti: k3-j7200: Fix wakeup pinmux range ARM: dts: exynos: correct wr-active property in Exynos3250 Rinato ARM: imx: Call ida_simple_remove() for ida_simple_get arm64: dts: amlogic: meson-gx: fix SCPI clock dvfs node name arm64: dts: amlogic: meson-axg: fix SCPI clock dvfs node name arm64: dts: amlogic: meson-gx: add missing SCPI sensors compatible arm64: dts: amlogic: meson-gxl-s905d-sml5442tw: drop invalid clock-names property arm64: dts: amlogic: meson-gx: add missing unit address to rng node name arm64: dts: amlogic: meson-gxl: add missing unit address to eth-phy-mux node name arm64: dts: amlogic: meson-gx-libretech-pc: fix update button name arm64: dts: amlogic: meson-gxl-s905d-phicomm-n1: fix led node name arm64: dts: amlogic: meson-gxbb-kii-pro: fix led node name arm64: dts: renesas: beacon-renesom: Fix gpio expander reference ARM: dts: sun8i: nanopi-duo2: Fix regulator GPIO reference ARM: dts: imx7s: correct iomuxc gpr mux controller cells arm64: dts: mediatek: mt7622: Add missing pwm-cells to pwm node blk-mq: avoid sleep in blk_mq_alloc_request_hctx blk-mq: remove stale comment for blk_mq_sched_mark_restart_hctx blk-mq: correct stale comment of .get_budget s390/dasd: Prepare for additional path event handling s390/dasd: Fix potential memleak in dasd_eckd_init() sched/deadline,rt: Remove unused parameter from pick_next_[rt|dl]_entity() sched/rt: pick_next_rt_entity(): check list_entry x86/perf/zhaoxin: Add stepping check for ZXC block: bio-integrity: Copy flags when bio_integrity_payload is cloned wifi: rsi: Fix memory leak in rsi_coex_attach() wifi: rtlwifi: rtl8821ae: don't call kfree_skb() under spin_lock_irqsave() wifi: rtlwifi: rtl8188ee: don't call kfree_skb() under spin_lock_irqsave() wifi: rtlwifi: rtl8723be: don't call kfree_skb() under spin_lock_irqsave() wifi: iwlegacy: common: don't call dev_kfree_skb() under spin_lock_irqsave() wifi: libertas: fix memory leak in lbs_init_adapter() wifi: rtl8xxxu: don't call dev_kfree_skb() under spin_lock_irqsave() rtlwifi: fix -Wpointer-sign warning wifi: rtlwifi: Fix global-out-of-bounds bug in _rtl8812ae_phy_set_txpower_limit() libbpf: Fix btf__align_of() by taking into account field offsets wifi: ipw2x00: don't call dev_kfree_skb() under spin_lock_irqsave() wifi: ipw2200: fix memory leak in ipw_wdev_init() wifi: wilc1000: fix potential memory leak in wilc_mac_xmit() wifi: brcmfmac: fix potential memory leak in brcmf_netdev_start_xmit() wifi: brcmfmac: unmap dma buffer in brcmf_msgbuf_alloc_pktid() wifi: libertas_tf: don't call kfree_skb() under spin_lock_irqsave() wifi: libertas: if_usb: don't call kfree_skb() under spin_lock_irqsave() wifi: libertas: main: don't call kfree_skb() under spin_lock_irqsave() wifi: libertas: cmdresp: don't call kfree_skb() under spin_lock_irqsave() wifi: wl3501_cs: don't call kfree_skb() under spin_lock_irqsave() crypto: x86/ghash - fix unaligned access in ghash_setkey() ACPICA: Drop port I/O validation for some regions genirq: Fix the return type of kstat_cpu_irqs_sum() rcu-tasks: Improve comments explaining tasks_rcu_exit_srcu purpose rcu-tasks: Remove preemption disablement around srcu_read_[un]lock() calls rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes() lib/mpi: Fix buffer overrun when SG is too long crypto: ccp: Use the stack for small SEV command buffers crypto: ccp: Use the stack and common buffer for status commands crypto: ccp - Use kzalloc for sev ioctl interfaces to prevent kernel memory leak crypto: ccp - Avoid page allocation failure warning for SEV_GET_ID2 ACPICA: nsrepair: handle cases without a return value correctly thermal/drivers/tsens: Drop msm8976-specific defines thermal/drivers/qcom/tsens_v1: Enable sensor 3 on MSM8976 thermal/drivers/tsens: Add compat string for the qcom,msm8960 thermal/drivers/tsens: Sort out msm8976 vs msm8956 data wifi: rtl8xxxu: Fix memory leaks with RTL8723BU, RTL8192EU wifi: orinoco: check return value of hermes_write_wordrec() wifi: ath9k: htc_hst: free skb in ath9k_htc_rx_msg() if there is no callback function ath9k: hif_usb: simplify if-if to if-else ath9k: htc: clean up statistics macros wifi: ath9k: hif_usb: clean up skbs if ath9k_hif_usb_rx_stream() fails wifi: ath9k: Fix potential stack-out-of-bounds write in ath9k_wmi_rsp_callback() wifi: ath11k: Fix memory leak in ath11k_peer_rx_frag_setup wifi: cfg80211: Fix extended KCK key length check in nl80211_set_rekey_data() ACPI: battery: Fix missing NUL-termination with large strings crypto: ccp - Failure on re-initialization due to duplicate sysfs filename crypto: essiv - Handle EBUSY correctly crypto: seqiv - Handle EBUSY correctly powercap: fix possible name leak in powercap_register_zone() x86/cpu: Init AP exception handling from cpu_init_secondary() x86/microcode: Replace deprecated CPU-hotplug functions. x86: Mark stop_this_cpu() __noreturn x86/microcode: Rip out the OLD_INTERFACE x86/microcode: Default-disable late loading x86/microcode: Print previous version of microcode after reload x86/microcode: Add a parameter to microcode_check() to store CPU capabilities x86/microcode: Check CPU capabilities after late microcode update correctly x86/microcode: Adjust late loading result reporting message net: ethernet: ti: am65-cpsw: fix tx csum offload for multi mac mode net: ethernet: ti: am65-cpsw: handle deferred probe with dev_err_probe() net: ethernet: ti: add missing of_node_put before return crypto: xts - Handle EBUSY correctly leds: led-class: Add missing put_device() to led_put() crypto: ccp - Refactor out sev_fw_alloc() crypto: ccp - Flush the SEV-ES TMR memory before giving it to firmware bpftool: profile online CPUs instead of possible net/mlx5: Enhance debug print in page allocation failure irqchip: Fix refcount leak in platform_irqchip_probe irqchip/alpine-msi: Fix refcount leak in alpine_msix_init_domains irqchip/irq-mvebu-gicp: Fix refcount leak in mvebu_gicp_probe irqchip/ti-sci: Fix refcount leak in ti_sci_intr_irq_domain_probe s390/vmem: fix empty page tables cleanup under KASAN net: add sock_init_data_uid() tun: tun_chr_open(): correctly initialize socket uid tap: tap_open(): correctly initialize socket uid OPP: fix error checking in opp_migrate_dentry() Bluetooth: L2CAP: Fix potential user-after-free libbpf: Fix alen calculation in libbpf_nla_dump_errormsg() rds: rds_rm_zerocopy_callback() correct order for list_add_tail() crypto: rsa-pkcs1pad - Use akcipher_request_complete m68k: /proc/hardware should depend on PROC_FS RISC-V: time: initialize hrtimer based broadcast clock event device wifi: iwl3945: Add missing check for create_singlethread_workqueue wifi: iwl4965: Add missing check for create_singlethread_workqueue() wifi: mwifiex: fix loop iterator in mwifiex_update_ampdu_txwinsize() selftests/bpf: Fix out-of-srctree build crypto: crypto4xx - Call dma_unmap_page when done wifi: mac80211: make rate u32 in sta_set_rate_info_rx() thermal/drivers/hisi: Drop second sensor hi3660 can: esd_usb: Move mislocated storage of SJA1000_ECC_SEG bits in case of a bus error bpf: Fix global subprog context argument resolution logic irqchip/irq-brcmstb-l2: Set IRQ_LEVEL for level triggered interrupts irqchip/irq-bcm7120-l2: Set IRQ_LEVEL for level triggered interrupts selftests/net: Interpret UDP_GRO cmsg data as an int value l2tp: Avoid possible recursive deadlock in l2tp_tunnel_register() net: bcmgenet: fix MoCA LED control selftest: fib_tests: Always cleanup before exit sefltests: netdevsim: wait for devlink instance after netns removal drm: Fix potential null-ptr-deref due to drmm_mode_config_init() drm/fourcc: Add missing big-endian XRGB1555 and RGB565 formats drm: mxsfb: DRM_MXSFB should depend on ARCH_MXS || ARCH_MXC drm/bridge: megachips: Fix error handling in i2c_register_driver() drm/vkms: Fix null-ptr-deref in vkms_release() drm/vc4: dpi: Add option for inverting pixel clock and output enable drm/vc4: dpi: Fix format mapping for RGB565 drm: tidss: Fix pixel format definition gpu: ipu-v3: common: Add of_node_put() for reference returned by of_graph_get_port_by_id() drm/msm/hdmi: Add missing check for alloc_ordered_workqueue pinctrl: qcom: pinctrl-msm8976: Correct function names for wcss pins pinctrl: stm32: Fix refcount leak in stm32_pctrl_get_irq_domain pinctrl: rockchip: add support for rk3568 pinctrl: rockchip: do coding style for mux route struct pinctrl: rockchip: Fix refcount leak in rockchip_pinctrl_parse_groups drm/vc4: hvs: Set AXI panic modes drm/vc4: hvs: Fix colour order for xRGB1555 on HVS5 drm/vc4: hdmi: Correct interlaced timings again ASoC: fsl_sai: initialize is_dsp_mode flag drm/msm/adreno: Fix null ptr access in adreno_gpu_cleanup() ALSA: hda/ca0132: minor fix for allocation size drm/msm/dpu: Disallow unallocated resources to be returned drm/bridge: lt9611: fix sleep mode setup drm/bridge: lt9611: fix HPD reenablement drm/bridge: lt9611: fix polarity programming drm/bridge: lt9611: fix programming of video modes drm/bridge: lt9611: fix clock calculation drm/bridge: lt9611: pass a pointer to the of node drm/mipi-dsi: Fix byte order of 16-bit DCS set/get brightness drm/msm: use strscpy instead of strncpy drm/msm/dpu: Add check for cstate drm/msm/dpu: Add check for pstates drm/msm/mdp5: Add check for kzalloc pinctrl: bcm2835: Remove of_node_put() in bcm2835_of_gpio_ranges_fallback() pinctrl: mediatek: Initialize variable pullen and pullup to zero pinctrl: mediatek: Initialize variable *buf to zero gpu: host1x: Don't skip assigning syncpoints to channels drm/mediatek: dsi: Reduce the time of dsi from LP11 to sending cmd drm/mediatek: Use NULL instead of 0 for NULL pointer drm/mediatek: Drop unbalanced obj unref drm/mediatek: mtk_drm_crtc: Add checks for devm_kcalloc drm/mediatek: Clean dangling pointer on bind error path ASoC: soc-compress.c: fixup private_data on snd_soc_new_compress() gpio: vf610: connect GPIO label to dev name spi: dw_bt1: fix MUX_MMIO dependencies ASoC: mchp-spdifrx: fix controls which rely on rsr register ASoC: atmel: fix spelling mistakes ASoC: mchp-spdifrx: fix return value in case completion times out ASoC: mchp-spdifrx: fix controls that works with completion mechanism ASoC: mchp-spdifrx: disable all interrupts in mchp_spdifrx_dai_remove() ASoC: mchp-spdifrx: Fix uninitialized use of mr in mchp_spdifrx_hw_params() ASoC: dt-bindings: meson: fix gx-card codec node regex hwmon: (ltc2945) Handle error case in ltc2945_value_store drm/amdgpu: fix enum odm_combine_mode mismatch scsi: mpt3sas: Fix a memory leak scsi: aic94xx: Add missing check for dma_map_single() spi: bcm63xx-hsspi: fix pm_runtime spi: bcm63xx-hsspi: Fix multi-bit mode setting hwmon: (mlxreg-fan) Return zero speed for broken fan ASoC: tlv320adcx140: fix 'ti,gpio-config' DT property init dm: remove flush_scheduled_work() during local_exit() NFS: Fix up handling of outstanding layoutcommit in nfs_update_inode() NFSv4: keep state manager thread active if swap is enabled nfs4trace: fix state manager flag printing NFS: fix disabling of swap spi: synquacer: Fix timeout handling in synquacer_spi_transfer_one() ASoC: soc-dapm.h: fixup warning struct snd_pcm_substream not declared HID: bigben: use spinlock to protect concurrent accesses HID: bigben_worker() remove unneeded check on report_field HID: bigben: use spinlock to safely schedule workers hid: bigben_probe(): validate report count nfsd: fix race to check ls_layouts cifs: Fix lost destroy smbd connection when MR allocate failed cifs: Fix warning and UAF when destroy the MR list gfs2: jdata writepage fix perf llvm: Fix inadvertent file creation leds: led-core: Fix refcount leak in of_led_get() perf tools: Fix auto-complete on aarch64 sparc: allow PM configs for sparc32 COMPILE_TEST selftests/ftrace: Fix bash specific "==" operator printf: fix errname.c list objtool: add UACCESS exceptions for __tsan_volatile_read/write mfd: pcf50633-adc: Fix potential memleak in pcf50633_adc_async_read() clk: qcom: gcc-qcs404: disable gpll[04]_out_aux parents clk: qcom: gcc-qcs404: fix names of the DSI clocks used as parents RISC-V: fix funct4 definition for c.jalr in parse_asm.h mtd: rawnand: sunxi: Fix the size of the last OOB region Input: iqs269a - drop unused device node references Input: iqs269a - increase interrupt handler return delay Input: iqs269a - configure device with a single block write linux/kconfig.h: replace IF_ENABLED() with PTR_IF() in <linux/kernel.h> clk: renesas: cpg-mssr: Fix use after free if cpg_mssr_common_init() failed clk: renesas: cpg-mssr: Remove superfluous check in resume code clk: imx: avoid memory leak Input: ads7846 - don't report pressure for ads7845 Input: ads7846 - convert to full duplex Input: ads7846 - convert to one message Input: ads7846 - always set last command to PWRDOWN Input: ads7846 - don't check penirq immediately for 7845 clk: qcom: gpucc-sc7180: fix clk_dis_wait being programmed for CX GDSC clk: qcom: gpucc-sdm845: fix clk_dis_wait being programmed for CX GDSC powerpc/powernv/ioda: Skip unallocated resources when mapping to PE clk: Honor CLK_OPS_PARENT_ENABLE in clk_core_is_enabled() powerpc/perf/hv-24x7: add missing RTAS retry status handling powerpc/pseries/lpar: add missing RTAS retry status handling powerpc/pseries/lparcfg: add missing RTAS retry status handling powerpc/rtas: make all exports GPL powerpc/rtas: ensure 4KB alignment for rtas_data_buf powerpc/eeh: Small refactor of eeh_handle_normal_event() powerpc/eeh: Set channel state after notifying the drivers MIPS: SMP-CPS: fix build error when HOTPLUG_CPU not set MIPS: vpe-mt: drop physical_memsize vdpa/mlx5: Don't clear mr struct on destroy MR alpha/boot/tools/objstrip: fix the check for ELF header Input: iqs269a - do not poll during suspend or resume Input: iqs269a - do not poll during ATI remoteproc: qcom_q6v5_mss: Use a carveout to authenticate modem headers media: ti: cal: fix possible memory leak in cal_ctx_create() media: platform: ti: Add missing check for devm_regulator_get powerpc: Remove linker flag from KBUILD_AFLAGS builddeb: clean generated package content media: max9286: Fix memleak in max9286_v4l2_register() media: ov2740: Fix memleak in ov2740_init_controls() media: ov5675: Fix memleak in ov5675_init_controls() media: i2c: ov772x: Fix memleak in ov772x_probe() media: i2c: imx219: remove redundant writes media: i2c: imx219: Split common registers from mode tables media: i2c: imx219: Fix binning for RAW8 capture media: rc: Fix use-after-free bugs caused by ene_tx_irqsim() media: i2c: ov7670: 0 instead of -EINVAL was returned media: usb: siano: Fix use after free bugs caused by do_submit_urb media: saa7134: Use video_unregister_device for radio_dev rpmsg: glink: Avoid infinite loop on intent for missing channel udf: Define EFSCORRUPTED error code ARM: dts: exynos: Use Exynos5420 compatible for the MIPI video phy blk-iocost: fix divide by 0 error in calc_lcoefs() wifi: ath9k: Fix use-after-free in ath9k_hif_usb_disconnect() wifi: brcmfmac: Fix potential stack-out-of-bounds in brcmf_c_preinit_dcmds() rcu: Make RCU_LOCKDEP_WARN() avoid early lockdep checks rcu: Suppress smp_processor_id() complaint in synchronize_rcu_expedited_wait() rcu-tasks: Make rude RCU-Tasks work well with CPU hotplug wifi: ath11k: debugfs: fix to work with multiple PCI devices thermal: intel: Fix unsigned comparison with less than zero timers: Prevent union confusion from unexpected restart_syscall() x86/bugs: Reset speculation control settings on init wifi: brcmfmac: ensure CLM version is null-terminated to prevent stack-out-of-bounds wifi: mt7601u: fix an integer underflow inet: fix fast path in __inet_hash_connect() ice: add missing checks for PF vsi type ACPI: Don't build ACPICA with '-Os' clocksource: Suspend the watchdog temporarily when high read latency detected crypto: hisilicon: Wipe entire pool on error net: bcmgenet: Add a check for oversized packets m68k: Check syscall_trace_enter() return code wifi: mt76: dma: free rx_head in mt76_dma_rx_cleanup ACPI: video: Fix Lenovo Ideapad Z570 DMI match net/mlx5: fw_tracer: Fix debug print coda: Avoid partial allocation of sig_inputArgs uaccess: Add minimum bounds check on kernel buffer size PM: EM: fix memory leak with using debugfs_lookup() Bluetooth: btusb: Add VID:PID 13d3:3529 for Realtek RTL8821CE drm/amd/display: Fix potential null-deref in dm_resume drm/omap: dsi: Fix excessive stack usage HID: Add Mapping for System Microphone Mute drm/tiny: ili9486: Do not assume 8-bit only SPI controllers drm/radeon: free iio for atombios when driver shutdown drm: amd: display: Fix memory leakage drm/msm/dsi: Add missing check for alloc_ordered_workqueue docs/scripts/gdb: add necessary make scripts_gdb step ASoC: kirkwood: Iterate over array indexes instead of using pointer math regulator: max77802: Bounds check regulator id against opmode regulator: s5m8767: Bounds check id indexing into arrays gfs2: Improve gfs2_make_fs_rw error handling hwmon: (coretemp) Simplify platform device handling pinctrl: at91: use devm_kasprintf() to avoid potential leaks HID: logitech-hidpp: Don't restart communication if not necessary drm: panel-orientation-quirks: Add quirk for Lenovo IdeaPad Duet 3 10IGL5 dm thin: add cond_resched() to various workqueue loops dm cache: add cond_resched() to various workqueue loops nfsd: zero out pointers after putting nfsd_files on COPY setup error wifi: rtl8xxxu: fixing transmisison failure for rtl8192eu firmware: coreboot: framebuffer: Ignore reserved pixel color bits rtc: pm8xxx: fix set-alarm race ipmi_ssif: Rename idle state and check s390/extmem: return correct segment type in __segment_load() s390: discard .interp section s390/kprobes: fix irq mask clobbering on kprobe reenter from post_handler s390/kprobes: fix current_kprobe never cleared after kprobes reenter cifs: Fix uninitialized memory read in smb3_qfs_tcon() ARM: dts: exynos: correct HDMI phy compatible in Exynos4 hfs: fix missing hfs_bnode_get() in __hfs_bnode_create fs: hfsplus: fix UAF issue in hfsplus_put_super exfat: fix reporting fs error when reading dir beyond EOF exfat: fix unexpected EOF while reading dir exfat: redefine DIR_DELETED as the bad cluster number exfat: fix inode->i_blocks for non-512 byte sector size device f2fs: fix information leak in f2fs_move_inline_dirents() f2fs: fix cgroup writeback accounting with fs-layer encryption ocfs2: fix defrag path triggering jbd2 ASSERT ocfs2: fix non-auto defrag path not working issue udf: Truncate added extents on failed expansion udf: Do not bother merging very long extents udf: Do not update file length for failed writes to inline files udf: Preserve link count of system files udf: Detect system inodes linked into directory hierarchy udf: Fix file corruption when appending just after end of preallocated extent KVM: Destroy target device if coalesced MMIO unregistration fails KVM: x86: Inject #GP if WRMSR sets reserved bits in APIC Self-IPI KVM: s390: disable migration mode when dirty tracking is disabled x86/virt: Force GIF=1 prior to disabling SVM (for reboot flows) x86/crash: Disable virt in core NMI crash handler to avoid double shootdown x86/reboot: Disable virtualization in an emergency if SVM is supported x86/reboot: Disable SVM, not just VMX, when stopping CPUs x86/kprobes: Fix __recover_optprobed_insn check optimizing logic x86/kprobes: Fix arch_check_optimized_kprobe check within optimized_kprobe range x86/microcode/amd: Remove load_microcode_amd()'s bsp parameter x86/microcode/AMD: Add a @cpu parameter to the reloading functions x86/microcode/AMD: Fix mixed steppings support x86/speculation: Allow enabling STIBP with legacy IBRS Documentation/hw-vuln: Document the interaction between IBRS and STIBP brd: return 0/-error from brd_insert_page() ima: Align ima_file_mmap() parameters with mmap_file LSM hook irqdomain: Fix association race irqdomain: Fix disassociation race irqdomain: Drop bogus fwspec-mapping error handling io_uring: handle TIF_NOTIFY_RESUME when checking for task_work io_uring: mark task TASK_RUNNING before handling resume/task work io_uring: add a conditional reschedule to the IOPOLL cancelation loop io_uring/rsrc: disallow multi-source reg buffers io_uring: remove MSG_NOSIGNAL from recvmsg io_uring/poll: allow some retries for poll triggering spuriously ALSA: ice1712: Do not left ice->gpio_mutex locked in aureon_add_controls() ALSA: hda/realtek: Add quirk for HP EliteDesk 800 G6 Tower PC jbd2: fix data missing when reusing bh which is ready to be checkpointed ext4: optimize ea_inode block expansion ext4: refuse to create ea block when umounted mtd: spi-nor: Fix shift-out-of-bounds in spi_nor_set_erase_type dm: add cond_resched() to dm_wq_work() wifi: rtl8xxxu: Use a longer retry limit of 48 wifi: cfg80211: Fix use after free for wext thermal: intel: powerclamp: Fix cur_state for multi package system dm flakey: fix logic when corrupting a bio dm flakey: don't corrupt the zero page ARM: dts: exynos: correct TMU phandle in Exynos4210 ARM: dts: exynos: correct TMU phandle in Exynos4 ARM: dts: exynos: correct TMU phandle in Odroid XU3 family ARM: dts: exynos: correct TMU phandle in Exynos5250 ARM: dts: exynos: correct TMU phandle in Odroid XU ARM: dts: exynos: correct TMU phandle in Odroid HC1 rbd: avoid use-after-free in do_rbd_add() when rbd_dev_create() fails alpha: fix FEN fault handling dax/kmem: Fix leak of memory-hotplug resources mips: fix syscall_get_nr media: ipu3-cio2: Fix PM runtime usage_count in driver unbind remoteproc/mtk_scp: Move clk ops outside send_lock docs: gdbmacros: print newest record mm: memcontrol: deprecate charge moving mm/thp: check and bail out if page in deferred queue already ktest.pl: Give back console on Ctrt^C on monitor ktest.pl: Fix missing "end_monitor" when machine check fails ktest.pl: Add RUN_TIMEOUT option with default unlimited ring-buffer: Handle race between rb_move_tail and rb_check_pages scsi: qla2xxx: Fix link failure in NPIV environment scsi: qla2xxx: Fix DMA-API call trace on NVMe LS requests scsi: qla2xxx: Fix erroneous link down scsi: ses: Don't attach if enclosure has no components scsi: ses: Fix slab-out-of-bounds in ses_enclosure_data_process() scsi: ses: Fix possible addl_desc_ptr out-of-bounds accesses scsi: ses: Fix possible desc_ptr out-of-bounds accesses scsi: ses: Fix slab-out-of-bounds in ses_intf_remove() riscv: jump_label: Fixup unaligned arch_static_branch function PCI/PM: Observe reset delay irrespective of bridge_d3 PCI: hotplug: Allow marking devices as disconnected during bind/unbind PCI: Avoid FLR for AMD FCH AHCI adapters vfio/type1: prevent underflow of locked_vm via exec() drm/i915/quirks: Add inverted backlight quirk for HP 14-r206nv drm/radeon: Fix eDP for single-display iMac11,2 drm/edid: fix AVI infoframe aspect ratio handling arm64: dts: qcom: ipq8074: fix Gen2 PCIe QMP PHY wifi: ath9k: use proper statements in conditionals pinctrl: rockchip: fix mux route data for rk3568 pinctrl: rockchip: fix reading pull type on rk3568 kbuild: Port silent mode detection to future gnu make. net/sched: Retire tcindex classifier fs/jfs: fix shift exponent db_agl2size negative objtool: Fix memory leak in create_static_call_sections() pwm: sifive: Reduce time the controller lock is held pwm: sifive: Always let the first pwm_apply_state succeed pwm: stm32-lp: fix the check on arr and cmp registers update f2fs: use memcpy_{to,from}_page() where possible fs: f2fs: initialize fsdata in pagecache_write() um: vector: Fix memory leak in vector_config ubi: ensure that VID header offset + VID header size <= alloc, size ubifs: Fix build errors as symbol undefined ubifs: Rectify space budget for ubifs_symlink() if symlink is encrypted ubifs: Rectify space budget for ubifs_xrename() ubifs: Fix wrong dirty space budget for dirty inode ubifs: do_rename: Fix wrong space budget when target inode's nlink > 1 ubifs: Reserve one leb for each journal head while doing budget ubi: Fix use-after-free when volume resizing failed ubi: Fix unreferenced object reported by kmemleak in ubi_resize_volume() ubifs: Fix memory leak in alloc_wbufs() ubi: Fix possible null-ptr-deref in ubi_free_volume() ubifs: Re-statistic cleaned znode count if commit failed ubifs: dirty_cow_znode: Fix memleak in error handling path ubifs: ubifs_writepage: Mark page dirty after writing inode failed ubi: fastmap: Fix missed fm_anchor PEB in wear-leveling after disabling fastmap ubi: Fix UAF wear-leveling entry in eraseblk_count_seq_show() ubi: ubi_wl_put_peb: Fix infinite loop when wear-leveling work failed x86: um: vdso: Add '%rcx' and '%r11' to the syscall clobber list watchdog: at91sam9_wdt: use devm_request_irq to avoid missing free_irq() in error path watchdog: Fix kmemleak in watchdog_cdev_register watchdog: pcwd_usb: Fix attempting to access uninitialized memory netfilter: ctnetlink: fix possible refcount leak in ctnetlink_create_conntrack() netfilter: ebtables: fix table blob use-after-free ipv6: Add lwtunnel encap size of all siblings in nexthop calculation sctp: add a refcnt in sctp_stream_priorities to avoid a nested loop net: fix __dev_kfree_skb_any() vs drop monitor 9p/xen: fix version parsing 9p/xen: fix connection sequence 9p/rdma: unmap receive dma buffer in rdma_request()/post_recv() net/mlx5: Geneve, Fix handling of Geneve object id as error code nfc: fix memory leak of se_io context in nfc_genl_se_io net/sched: act_sample: fix action bind logic ARM: dts: spear320-hmi: correct STMPE GPIO compatible tcp: tcp_check_req() can be called from process context vc_screen: modify vcs_size() handling in vcs_read() rtc: sun6i: Always export the internal oscillator scsi: ipr: Work around fortify-string warning loop: loop_set_status_from_info() check before assignment ASoC: adau7118: don't disable regulators on device unbind ASoC: zl38060: Remove spurious gpiolib select ASoC: zl38060 add gpiolib dependency thermal: intel: quark_dts: fix error pointer dereference thermal: intel: BXT_PMIC: select REGMAP instead of depending on it tracing: Add NULL checks for buffer in ring_buffer_free_read_page() firmware/efi sysfb_efi: Add quirk for Lenovo IdeaPad Duet 3 bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support mfd: arizona: Use pm_runtime_resume_and_get() to prevent refcnt leak IB/hfi1: Update RMT size calculation media: uvcvideo: Handle cameras with invalid descriptors media: uvcvideo: Handle errors from calls to usb_string media: uvcvideo: Quirk for autosuspend in Logitech B910 and C910 media: uvcvideo: Silence memcpy() run-time false positive warnings staging: emxx_udc: Add checks for dma_alloc_coherent() tty: fix out-of-bounds access in tty_driver_lookup_tty() tty: serial: fsl_lpuart: disable the CTS when send break signal serial: sc16is7xx: setup GPIO controller later in probe mei: bus-fixup:upon error print return values of send and receive tools/iio/iio_utils:fix memory leak iio: accel: mma9551_core: Prevent uninitialized variable in mma9551_read_status_word() iio: accel: mma9551_core: Prevent uninitialized variable in mma9551_read_config_word() PCI: loongson: Prevent LS7A MRRS increases usb: host: xhci: mvebu: Iterate over array indexes instead of using pointer math USB: ene_usb6250: Allocate enough memory for full object usb: uvc: Enumerate valid values for color matching usb: gadget: uvc: Make bSourceID read/write PCI: Align extra resources for hotplug bridges properly PCI: Take other bus devices into account when distributing resources kernel/fail_function: fix memory leak with using debugfs_lookup() PCI: loongson: Add more devices that need MRRS quirk PCI: Add ACS quirk for Wangxun NICs phy: rockchip-typec: Fix unsigned comparison with less than zero soundwire: cadence: Remove wasted space in response_buf soundwire: cadence: Drain the RX FIFO after an IO timeout net: tls: avoid hanging tasks on the tx_lock x86/resctrl: Apply READ_ONCE/WRITE_ONCE to task_struct.{rmid,closid} x86/resctl: fix scheduler confusion with 'current' drm/display/dp_mst: Fix down/up message handling after sink disconnect drm/display/dp_mst: Fix down message handling after a packet reception error Bluetooth: hci_sock: purge socket queues in the destruct() callback tcp: Fix listen() regression in 5.10.163 drm/virtio: Fix error code in virtio_gpu_object_shmem_init() media: uvcvideo: Provide sync and async uvc_ctrl_status_event media: uvcvideo: Fix race condition with usb_kill_urb Revert "scsi: mpt3sas: Fix return value check of dma_get_required_mask()" scsi: mpt3sas: Don't change DMA mask while reallocating pools scsi: mpt3sas: re-do lost mpt3sas DMA mask fix scsi: mpt3sas: Remove usage of dma_get_required_mask() API malidp: Fix NULL vs IS_ERR() checking usb: gadget: uvc: fix missing mutex_unlock() if kstrtou8() fails Linux 5.10.173 Change-Id: Iedcbc093feb171d48c70976d0aa99e972fac3ad1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
654 lines
17 KiB
C
654 lines
17 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* blk-mq scheduling framework
|
|
*
|
|
* Copyright (C) 2016 Jens Axboe
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/list_sort.h>
|
|
|
|
#include <trace/events/block.h>
|
|
|
|
#include "blk.h"
|
|
#include "blk-mq.h"
|
|
#include "blk-mq-debugfs.h"
|
|
#include "blk-mq-sched.h"
|
|
#include "blk-mq-tag.h"
|
|
#include "blk-wbt.h"
|
|
|
|
void blk_mq_sched_assign_ioc(struct request *rq)
|
|
{
|
|
struct request_queue *q = rq->q;
|
|
struct io_context *ioc;
|
|
struct io_cq *icq;
|
|
|
|
/*
|
|
* May not have an IO context if it's a passthrough request
|
|
*/
|
|
ioc = current->io_context;
|
|
if (!ioc)
|
|
return;
|
|
|
|
spin_lock_irq(&q->queue_lock);
|
|
icq = ioc_lookup_icq(ioc, q);
|
|
spin_unlock_irq(&q->queue_lock);
|
|
|
|
if (!icq) {
|
|
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
|
|
if (!icq)
|
|
return;
|
|
}
|
|
get_io_context(icq->ioc);
|
|
rq->elv.icq = icq;
|
|
}
|
|
|
|
/*
|
|
* Mark a hardware queue as needing a restart.
|
|
*/
|
|
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
|
return;
|
|
|
|
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
|
|
|
|
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
|
return;
|
|
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
|
|
|
/*
|
|
* Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
|
|
* in blk_mq_run_hw_queue(). Its pair is the barrier in
|
|
* blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
|
|
* meantime new request added to hctx->dispatch is missed to check in
|
|
* blk_mq_run_hw_queue().
|
|
*/
|
|
smp_mb();
|
|
|
|
blk_mq_run_hw_queue(hctx, true);
|
|
}
|
|
|
|
static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
|
|
{
|
|
struct request *rqa = container_of(a, struct request, queuelist);
|
|
struct request *rqb = container_of(b, struct request, queuelist);
|
|
|
|
return rqa->mq_hctx > rqb->mq_hctx;
|
|
}
|
|
|
|
static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx =
|
|
list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
|
|
struct request *rq;
|
|
LIST_HEAD(hctx_list);
|
|
unsigned int count = 0;
|
|
|
|
list_for_each_entry(rq, rq_list, queuelist) {
|
|
if (rq->mq_hctx != hctx) {
|
|
list_cut_before(&hctx_list, rq_list, &rq->queuelist);
|
|
goto dispatch;
|
|
}
|
|
count++;
|
|
}
|
|
list_splice_tail_init(rq_list, &hctx_list);
|
|
|
|
dispatch:
|
|
return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
|
|
}
|
|
|
|
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
|
|
|
|
/*
|
|
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
|
* its queue by itself in its completion handler, so we don't need to
|
|
* restart queue if .get_budget() fails to get the budget.
|
|
*
|
|
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
|
* be run again. This is necessary to avoid starving flushes.
|
|
*/
|
|
static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
struct request_queue *q = hctx->queue;
|
|
struct elevator_queue *e = q->elevator;
|
|
bool multi_hctxs = false, run_queue = false;
|
|
bool dispatched = false, busy = false;
|
|
unsigned int max_dispatch;
|
|
LIST_HEAD(rq_list);
|
|
int count = 0;
|
|
|
|
if (hctx->dispatch_busy)
|
|
max_dispatch = 1;
|
|
else
|
|
max_dispatch = hctx->queue->nr_requests;
|
|
|
|
do {
|
|
struct request *rq;
|
|
|
|
if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
|
|
break;
|
|
|
|
if (!list_empty_careful(&hctx->dispatch)) {
|
|
busy = true;
|
|
break;
|
|
}
|
|
|
|
if (!blk_mq_get_dispatch_budget(q))
|
|
break;
|
|
|
|
rq = e->type->ops.dispatch_request(hctx);
|
|
if (!rq) {
|
|
blk_mq_put_dispatch_budget(q);
|
|
/*
|
|
* We're releasing without dispatching. Holding the
|
|
* budget could have blocked any "hctx"s with the
|
|
* same queue and if we didn't dispatch then there's
|
|
* no guarantee anyone will kick the queue. Kick it
|
|
* ourselves.
|
|
*/
|
|
run_queue = true;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Now this rq owns the budget which has to be released
|
|
* if this rq won't be queued to driver via .queue_rq()
|
|
* in blk_mq_dispatch_rq_list().
|
|
*/
|
|
list_add_tail(&rq->queuelist, &rq_list);
|
|
if (rq->mq_hctx != hctx)
|
|
multi_hctxs = true;
|
|
} while (++count < max_dispatch);
|
|
|
|
if (!count) {
|
|
if (run_queue)
|
|
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
|
|
} else if (multi_hctxs) {
|
|
/*
|
|
* Requests from different hctx may be dequeued from some
|
|
* schedulers, such as bfq and deadline.
|
|
*
|
|
* Sort the requests in the list according to their hctx,
|
|
* dispatch batching requests from same hctx at a time.
|
|
*/
|
|
list_sort(NULL, &rq_list, sched_rq_cmp);
|
|
do {
|
|
dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
|
|
} while (!list_empty(&rq_list));
|
|
} else {
|
|
dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
|
|
}
|
|
|
|
if (busy)
|
|
return -EAGAIN;
|
|
return !!dispatched;
|
|
}
|
|
|
|
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
unsigned long end = jiffies + HZ;
|
|
int ret;
|
|
|
|
do {
|
|
ret = __blk_mq_do_dispatch_sched(hctx);
|
|
if (ret != 1)
|
|
break;
|
|
if (need_resched() || time_is_before_jiffies(end)) {
|
|
blk_mq_delay_run_hw_queue(hctx, 0);
|
|
break;
|
|
}
|
|
} while (1);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
|
|
struct blk_mq_ctx *ctx)
|
|
{
|
|
unsigned short idx = ctx->index_hw[hctx->type];
|
|
|
|
if (++idx == hctx->nr_ctx)
|
|
idx = 0;
|
|
|
|
return hctx->ctxs[idx];
|
|
}
|
|
|
|
/*
|
|
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
|
* its queue by itself in its completion handler, so we don't need to
|
|
* restart queue if .get_budget() fails to get the budget.
|
|
*
|
|
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
|
* be run again. This is necessary to avoid starving flushes.
|
|
*/
|
|
static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
struct request_queue *q = hctx->queue;
|
|
LIST_HEAD(rq_list);
|
|
struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
|
|
int ret = 0;
|
|
struct request *rq;
|
|
|
|
do {
|
|
if (!list_empty_careful(&hctx->dispatch)) {
|
|
ret = -EAGAIN;
|
|
break;
|
|
}
|
|
|
|
if (!sbitmap_any_bit_set(&hctx->ctx_map))
|
|
break;
|
|
|
|
if (!blk_mq_get_dispatch_budget(q))
|
|
break;
|
|
|
|
rq = blk_mq_dequeue_from_ctx(hctx, ctx);
|
|
if (!rq) {
|
|
blk_mq_put_dispatch_budget(q);
|
|
/*
|
|
* We're releasing without dispatching. Holding the
|
|
* budget could have blocked any "hctx"s with the
|
|
* same queue and if we didn't dispatch then there's
|
|
* no guarantee anyone will kick the queue. Kick it
|
|
* ourselves.
|
|
*/
|
|
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Now this rq owns the budget which has to be released
|
|
* if this rq won't be queued to driver via .queue_rq()
|
|
* in blk_mq_dispatch_rq_list().
|
|
*/
|
|
list_add(&rq->queuelist, &rq_list);
|
|
|
|
/* round robin for fair dispatch */
|
|
ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
|
|
|
|
} while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
|
|
|
|
WRITE_ONCE(hctx->dispatch_from, ctx);
|
|
return ret;
|
|
}
|
|
|
|
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
struct request_queue *q = hctx->queue;
|
|
struct elevator_queue *e = q->elevator;
|
|
const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
|
|
int ret = 0;
|
|
LIST_HEAD(rq_list);
|
|
|
|
/*
|
|
* If we have previous entries on our dispatch list, grab them first for
|
|
* more fair dispatch.
|
|
*/
|
|
if (!list_empty_careful(&hctx->dispatch)) {
|
|
spin_lock(&hctx->lock);
|
|
if (!list_empty(&hctx->dispatch))
|
|
list_splice_init(&hctx->dispatch, &rq_list);
|
|
spin_unlock(&hctx->lock);
|
|
}
|
|
|
|
/*
|
|
* Only ask the scheduler for requests, if we didn't have residual
|
|
* requests from the dispatch list. This is to avoid the case where
|
|
* we only ever dispatch a fraction of the requests available because
|
|
* of low device queue depth. Once we pull requests out of the IO
|
|
* scheduler, we can no longer merge or sort them. So it's best to
|
|
* leave them there for as long as we can. Mark the hw queue as
|
|
* needing a restart in that case.
|
|
*
|
|
* We want to dispatch from the scheduler if there was nothing
|
|
* on the dispatch list or we were able to dispatch from the
|
|
* dispatch list.
|
|
*/
|
|
if (!list_empty(&rq_list)) {
|
|
blk_mq_sched_mark_restart_hctx(hctx);
|
|
if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
|
|
if (has_sched_dispatch)
|
|
ret = blk_mq_do_dispatch_sched(hctx);
|
|
else
|
|
ret = blk_mq_do_dispatch_ctx(hctx);
|
|
}
|
|
} else if (has_sched_dispatch) {
|
|
ret = blk_mq_do_dispatch_sched(hctx);
|
|
} else if (hctx->dispatch_busy) {
|
|
/* dequeue request one by one from sw queue if queue is busy */
|
|
ret = blk_mq_do_dispatch_ctx(hctx);
|
|
} else {
|
|
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
|
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
|
{
|
|
struct request_queue *q = hctx->queue;
|
|
|
|
/* RCU or SRCU read lock is needed before checking quiesced flag */
|
|
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
|
|
return;
|
|
|
|
hctx->run++;
|
|
|
|
/*
|
|
* A return of -EAGAIN is an indication that hctx->dispatch is not
|
|
* empty and we must run again in order to avoid starving flushes.
|
|
*/
|
|
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
|
|
if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
|
|
blk_mq_run_hw_queue(hctx, true);
|
|
}
|
|
}
|
|
|
|
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
|
|
unsigned int nr_segs)
|
|
{
|
|
struct elevator_queue *e = q->elevator;
|
|
struct blk_mq_ctx *ctx;
|
|
struct blk_mq_hw_ctx *hctx;
|
|
bool ret = false;
|
|
enum hctx_type type;
|
|
|
|
if (e && e->type->ops.bio_merge)
|
|
return e->type->ops.bio_merge(q, bio, nr_segs);
|
|
|
|
ctx = blk_mq_get_ctx(q);
|
|
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
|
|
type = hctx->type;
|
|
if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
|
|
list_empty_careful(&ctx->rq_lists[type]))
|
|
return false;
|
|
|
|
/* default per sw-queue merge */
|
|
spin_lock(&ctx->lock);
|
|
/*
|
|
* Reverse check our software queue for entries that we could
|
|
* potentially merge with. Currently includes a hand-wavy stop
|
|
* count of 8, to not spend too much time checking for merges.
|
|
*/
|
|
if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) {
|
|
ctx->rq_merged++;
|
|
ret = true;
|
|
}
|
|
|
|
spin_unlock(&ctx->lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
|
|
{
|
|
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
|
|
|
|
void blk_mq_sched_request_inserted(struct request *rq)
|
|
{
|
|
trace_block_rq_insert(rq->q, rq);
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
|
|
|
|
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
|
|
bool has_sched,
|
|
struct request *rq)
|
|
{
|
|
/*
|
|
* dispatch flush and passthrough rq directly
|
|
*
|
|
* passthrough request has to be added to hctx->dispatch directly.
|
|
* For some reason, device may be in one situation which can't
|
|
* handle FS request, so STS_RESOURCE is always returned and the
|
|
* FS request will be added to hctx->dispatch. However passthrough
|
|
* request may be required at that time for fixing the problem. If
|
|
* passthrough request is added to scheduler queue, there isn't any
|
|
* chance to dispatch it given we prioritize requests in hctx->dispatch.
|
|
*/
|
|
if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
|
|
return true;
|
|
|
|
if (has_sched)
|
|
rq->rq_flags |= RQF_SORTED;
|
|
|
|
return false;
|
|
}
|
|
|
|
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
|
bool run_queue, bool async)
|
|
{
|
|
struct request_queue *q = rq->q;
|
|
struct elevator_queue *e = q->elevator;
|
|
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
|
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
|
|
|
WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
|
|
|
|
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
|
|
/*
|
|
* Firstly normal IO request is inserted to scheduler queue or
|
|
* sw queue, meantime we add flush request to dispatch queue(
|
|
* hctx->dispatch) directly and there is at most one in-flight
|
|
* flush request for each hw queue, so it doesn't matter to add
|
|
* flush request to tail or front of the dispatch queue.
|
|
*
|
|
* Secondly in case of NCQ, flush request belongs to non-NCQ
|
|
* command, and queueing it will fail when there is any
|
|
* in-flight normal IO request(NCQ command). When adding flush
|
|
* rq to the front of hctx->dispatch, it is easier to introduce
|
|
* extra time to flush rq's latency because of S_SCHED_RESTART
|
|
* compared with adding to the tail of dispatch queue, then
|
|
* chance of flush merge is increased, and less flush requests
|
|
* will be issued to controller. It is observed that ~10% time
|
|
* is saved in blktests block/004 on disk attached to AHCI/NCQ
|
|
* drive when adding flush rq to the front of hctx->dispatch.
|
|
*
|
|
* Simply queue flush rq to the front of hctx->dispatch so that
|
|
* intensive flush workloads can benefit in case of NCQ HW.
|
|
*/
|
|
at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
|
|
blk_mq_request_bypass_insert(rq, at_head, false);
|
|
goto run;
|
|
}
|
|
|
|
if (e && e->type->ops.insert_requests) {
|
|
LIST_HEAD(list);
|
|
|
|
list_add(&rq->queuelist, &list);
|
|
e->type->ops.insert_requests(hctx, &list, at_head);
|
|
} else {
|
|
spin_lock(&ctx->lock);
|
|
__blk_mq_insert_request(hctx, rq, at_head);
|
|
spin_unlock(&ctx->lock);
|
|
}
|
|
|
|
run:
|
|
if (run_queue)
|
|
blk_mq_run_hw_queue(hctx, async);
|
|
}
|
|
|
|
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
|
struct blk_mq_ctx *ctx,
|
|
struct list_head *list, bool run_queue_async)
|
|
{
|
|
struct elevator_queue *e;
|
|
struct request_queue *q = hctx->queue;
|
|
|
|
/*
|
|
* blk_mq_sched_insert_requests() is called from flush plug
|
|
* context only, and hold one usage counter to prevent queue
|
|
* from being released.
|
|
*/
|
|
percpu_ref_get(&q->q_usage_counter);
|
|
|
|
e = hctx->queue->elevator;
|
|
if (e && e->type->ops.insert_requests)
|
|
e->type->ops.insert_requests(hctx, list, false);
|
|
else {
|
|
/*
|
|
* try to issue requests directly if the hw queue isn't
|
|
* busy in case of 'none' scheduler, and this way may save
|
|
* us one extra enqueue & dequeue to sw queue.
|
|
*/
|
|
if (!hctx->dispatch_busy && !e && !run_queue_async) {
|
|
blk_mq_try_issue_list_directly(hctx, list);
|
|
if (list_empty(list))
|
|
goto out;
|
|
}
|
|
blk_mq_insert_requests(hctx, ctx, list);
|
|
}
|
|
|
|
blk_mq_run_hw_queue(hctx, run_queue_async);
|
|
out:
|
|
percpu_ref_put(&q->q_usage_counter);
|
|
}
|
|
|
|
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
|
|
struct blk_mq_hw_ctx *hctx,
|
|
unsigned int hctx_idx)
|
|
{
|
|
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
|
|
|
if (hctx->sched_tags) {
|
|
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
|
|
blk_mq_free_rq_map(hctx->sched_tags, flags);
|
|
hctx->sched_tags = NULL;
|
|
}
|
|
}
|
|
|
|
static int blk_mq_sched_alloc_tags(struct request_queue *q,
|
|
struct blk_mq_hw_ctx *hctx,
|
|
unsigned int hctx_idx)
|
|
{
|
|
struct blk_mq_tag_set *set = q->tag_set;
|
|
/* Clear HCTX_SHARED so tags are init'ed */
|
|
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
|
int ret;
|
|
|
|
hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
|
|
set->reserved_tags, flags);
|
|
if (!hctx->sched_tags)
|
|
return -ENOMEM;
|
|
|
|
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
|
|
if (ret)
|
|
blk_mq_sched_free_tags(set, hctx, hctx_idx);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* called in queue's release handler, tagset has gone away */
|
|
static void blk_mq_sched_tags_teardown(struct request_queue *q)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
int i;
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
/* Clear HCTX_SHARED so tags are freed */
|
|
unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
|
|
|
if (hctx->sched_tags) {
|
|
blk_mq_free_rq_map(hctx->sched_tags, flags);
|
|
hctx->sched_tags = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
struct elevator_queue *eq;
|
|
unsigned int i;
|
|
int ret;
|
|
|
|
if (!e) {
|
|
q->elevator = NULL;
|
|
q->nr_requests = q->tag_set->queue_depth;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Default to double of smaller one between hw queue_depth and 128,
|
|
* since we don't split into sync/async like the old code did.
|
|
* Additionally, this is a per-hw queue depth.
|
|
*/
|
|
q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
|
|
BLKDEV_MAX_RQ);
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
ret = blk_mq_sched_alloc_tags(q, hctx, i);
|
|
if (ret)
|
|
goto err;
|
|
}
|
|
|
|
ret = e->ops.init_sched(q, e);
|
|
if (ret)
|
|
goto err;
|
|
|
|
blk_mq_debugfs_register_sched(q);
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
if (e->ops.init_hctx) {
|
|
ret = e->ops.init_hctx(hctx, i);
|
|
if (ret) {
|
|
eq = q->elevator;
|
|
blk_mq_sched_free_requests(q);
|
|
blk_mq_exit_sched(q, eq);
|
|
kobject_put(&eq->kobj);
|
|
return ret;
|
|
}
|
|
}
|
|
blk_mq_debugfs_register_sched_hctx(q, hctx);
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
blk_mq_sched_free_requests(q);
|
|
blk_mq_sched_tags_teardown(q);
|
|
q->elevator = NULL;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* called in either blk_queue_cleanup or elevator_switch, tagset
|
|
* is required for freeing requests
|
|
*/
|
|
void blk_mq_sched_free_requests(struct request_queue *q)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
int i;
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
if (hctx->sched_tags)
|
|
blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
|
|
}
|
|
}
|
|
|
|
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
unsigned int i;
|
|
|
|
queue_for_each_hw_ctx(q, hctx, i) {
|
|
blk_mq_debugfs_unregister_sched_hctx(hctx);
|
|
if (e->type->ops.exit_hctx && hctx->sched_data) {
|
|
e->type->ops.exit_hctx(hctx, i);
|
|
hctx->sched_data = NULL;
|
|
}
|
|
}
|
|
blk_mq_debugfs_unregister_sched(q);
|
|
if (e->type->ops.exit_sched)
|
|
e->type->ops.exit_sched(e);
|
|
blk_mq_sched_tags_teardown(q);
|
|
q->elevator = NULL;
|
|
}
|