android_kernel_asus_sm8350/mm/vmscan.c
Srinivasarao P b403cd66bd Merge android11-5.4.86+ (75c93eb) into msm-5.4
* refs/heads/tmp-75c93eb:
  Revert one chunk from 37432a83fa commit
  Revert "rpmsg: glink: Use complete_all for open states"
  ANDROID: Incremental fs: Fix selinux issues
  ANDROID: Incremental fs: Set credentials before reading/writing
  ANDROID: Incremental fs: Fix memory leak on closing file
  ANDROID: GKI: update Sony KMI symbol list
  ANDROID: ABI updates for db845c (enabling wifi)
  ANDROID: db845c_gki.fragment: Enable wifi on db845c w/ android-5.4
  UPSTREAM: arm64: dts: qcom: sdm845: Add APSS watchdog node
  UPSTREAM: arm64: dts: qcom: db845c: Move remoteproc firmware to sdm845
  UPSTREAM: ath10k: qmi: Sleep for a while before assigning MSA memory
  UPSTREAM: soc: qcom: qmi: Return EPROBE_DEFER if no address family
  ANDROID: ABI: Update allowed list for QCOM
  ANDROID: GKI: update Sony symbol list for texfat
  ANDROID: GKI: update KMI for db845c with idr_alloc_u32 added
  ANDROID: ABI: Update allowed list for QCOM
  ANDROID: ABI: update allowed list for QCOM
  Revert "media: v4l2-fwnode: Return -EINVAL for invalid bus-type"
  Revert "seq_buf: Avoid type mismatch for seq_buf_init"
  Linux 5.4.86
  x86/CPU/AMD: Save AMD NodeId as cpu_die_id
  Revert: "ring-buffer: Remove HAVE_64BIT_ALIGNED_ACCESS"
  rtc: ep93xx: Fix NULL pointer dereference in ep93xx_rtc_read_time
  regulator: axp20x: Fix DLDO2 voltage control register mask for AXP22x
  PCI: Fix pci_slot_release() NULL pointer dereference
  platform/x86: intel-vbtn: Allow switch events on Acer Switch Alpha 12
  libnvdimm/namespace: Fix reaping of invalidated block-window-namespace labels
  xenbus/xenbus_backend: Disallow pending watch messages
  xen/xenbus: Count pending messages for each watch
  xen/xenbus/xen_bus_type: Support will_handle watch callback
  xen/xenbus: Add 'will_handle' callback support in xenbus_watch_path()
  xen/xenbus: Allow watches discard events before queueing
  xen-blkback: set ring->xenblkd to NULL after kthread_stop()
  dma-buf/dma-resv: Respect num_fences when initializing the shared fence list.
  device-dax/core: Fix memory leak when rmmod dax.ko
  clk: tegra: Do not return 0 on failure
  clk: mvebu: a3700: fix the XTAL MODE pin to MPP1_9
  clk: ingenic: Fix divider calculation with div tables
  pinctrl: sunxi: Always call chained_irq_{enter, exit} in sunxi_pinctrl_irq_handler
  md/cluster: fix deadlock when node is doing resync job
  md/cluster: block reshape with remote resync job
  iio:adc:ti-ads124s08: Fix alignment and data leak issues.
  iio:adc:ti-ads124s08: Fix buffer being too long.
  iio:imu:bmi160: Fix too large a buffer.
  iio:pressure:mpl3115: Force alignment of buffer
  iio:magnetometer:mag3110: Fix alignment and data leak issues.
  iio:light:st_uvis25: Fix timestamp alignment and prevent data leak.
  iio:light:rpr0521: Fix timestamp alignment and prevent data leak.
  iio: adc: rockchip_saradc: fix missing clk_disable_unprepare() on error in rockchip_saradc_resume
  iio: buffer: Fix demux update
  scsi: lpfc: Re-fix use after free in lpfc_rq_buf_free()
  scsi: lpfc: Fix invalid sleeping context in lpfc_sli4_nvmet_alloc()
  scsi: qla2xxx: Fix crash during driver load on big endian machines
  mtd: rawnand: meson: fix meson_nfc_dma_buffer_release() arguments
  mtd: rawnand: qcom: Fix DMA sync on FLASH_STATUS register read
  mtd: parser: cmdline: Fix parsing of part-names with colons
  mtd: spinand: Fix OOB read
  soc: qcom: smp2p: Safely acquire spinlock without IRQs
  spi: atmel-quadspi: Fix AHB memory accesses
  spi: atmel-quadspi: Disable clock in probe error path
  spi: mt7621: Don't leak SPI master in probe error path
  spi: mt7621: Disable clock in probe error path
  spi: synquacer: Disable clock in probe error path
  spi: st-ssc4: Fix unbalanced pm_runtime_disable() in probe error path
  spi: sc18is602: Don't leak SPI master in probe error path
  spi: rb4xx: Don't leak SPI master in probe error path
  spi: pic32: Don't leak DMA channels in probe error path
  spi: mxic: Don't leak SPI master in probe error path
  spi: gpio: Don't leak SPI master in probe error path
  spi: fsl: fix use of spisel_boot signal on MPC8309
  spi: davinci: Fix use-after-free on unbind
  spi: atmel-quadspi: Fix use-after-free on unbind
  spi: spi-sh: Fix use-after-free on unbind
  spi: pxa2xx: Fix use-after-free on unbind
  drm/i915: Fix mismatch between misplaced vma check and vma insert
  drm/dp_aux_dev: check aux_dev before use in drm_dp_aux_dev_get_by_minor()
  drm/amd/display: Fix memory leaks in S3 resume
  platform/x86: mlx-platform: remove an unused variable
  jfs: Fix array index bounds check in dbAdjTree
  jffs2: Fix ignoring mounting options problem during remounting
  jffs2: Fix GC exit abnormally
  ubifs: wbuf: Don't leak kernel memory to flash
  SMB3: avoid confusing warning message on mount to Azure
  ceph: fix race in concurrent __ceph_remove_cap invocations
  um: Remove use of asprinf in umid.c
  ima: Don't modify file descriptor mode on the fly
  powerpc/powernv/memtrace: Fix crashing the kernel when enabling concurrently
  powerpc/powernv/memtrace: Don't leak kernel memory to user space
  powerpc/powernv/npu: Do not attempt NPU2 setup on POWER8NVL NPU
  powerpc/mm: Fix verification of MMU_FTR_TYPE_44x
  powerpc/8xx: Fix early debug when SMC1 is relocated
  powerpc/xmon: Change printk() to pr_cont()
  powerpc/feature: Add CPU_FTR_NOEXECUTE to G2_LE
  powerpc/rtas: Fix typo of ibm,open-errinjct in RTAS filter
  powerpc: Fix incorrect stw{, ux, u, x} instructions in __set_pte_at
  xprtrdma: Fix XDRBUF_SPARSE_PAGES support
  ARM: dts: at91: sama5d2: fix CAN message ram offset and size
  ARM: dts: pandaboard: fix pinmux for gpio user button of Pandaboard ES
  KVM: arm64: Introduce handling of AArch32 TTBCR2 traps
  ext4: fix deadlock with fs freezing and EA inodes
  ext4: fix a memory leak of ext4_free_data
  btrfs: trim: fix underflow in trim length to prevent access beyond device boundary
  btrfs: do not shorten unpin len for caching block groups
  USB: serial: keyspan_pda: fix write unthrottling
  USB: serial: keyspan_pda: fix tx-unthrottle use-after-free
  USB: serial: keyspan_pda: fix write-wakeup use-after-free
  USB: serial: keyspan_pda: fix stalled writes
  USB: serial: keyspan_pda: fix write deadlock
  USB: serial: keyspan_pda: fix dropped unthrottle interrupts
  USB: serial: digi_acceleport: fix write-wakeup deadlocks
  USB: serial: mos7720: fix parallel-port state restore
  cpuset: fix race between hotplug work and later CPU offline
  EDAC/amd64: Fix PCI component registration
  EDAC/i10nm: Use readl() to access MMIO registers
  crypto: arm/aes-ce - work around Cortex-A57/A72 silion errata
  crypto: ecdh - avoid unaligned accesses in ecdh_set_secret()
  powerpc/perf: Exclude kernel samples while counting events in user space.
  perf/x86/intel: Fix rtm_abort_event encoding on Ice Lake
  perf/x86/intel: Add event constraint for CYCLE_ACTIVITY.STALLS_MEM_ANY
  staging: comedi: mf6x4: Fix AI end-of-conversion detection
  ASoC: cx2072x: Fix doubly definitions of Playback and Capture streams
  binder: add flag to clear buffer on txn complete
  s390/dasd: fix list corruption of lcu list
  s390/dasd: fix list corruption of pavgroup group list
  s390/dasd: prevent inconsistent LCU device data
  s390/dasd: fix hanging device offline processing
  s390/kexec_file: fix diag308 subcode when loading crash kernel
  s390/smp: perform initial CPU reset also for SMT siblings
  ALSA: core: memalloc: add page alignment for iram
  ALSA: usb-audio: Disable sample read check if firmware doesn't give back
  ALSA: usb-audio: Add VID to support native DSD reproduction on FiiO devices
  ALSA: hda/realtek: Apply jack fixup for Quanta NL3
  ALSA: hda/realtek: Add quirk for MSI-GP73
  ALSA/hda: apply jack fixup for the Acer Veriton N4640G/N6640G/N2510G
  ALSA: pcm: oss: Fix a few more UBSAN fixes
  ALSA: hda/realtek - Add supported for more Lenovo ALC285 Headset Button
  ALSA: hda/realtek - Enable headset mic of ASUS Q524UQK with ALC255
  ALSA: hda/realtek - Enable headset mic of ASUS X430UN with ALC256
  ALSA: hda/realtek: make bass spk volume adjustable on a yoga laptop
  ALSA: hda/ca0132 - Fix AE-5 rear headphone pincfg.
  ALSA: hda: Fix regressions on clear and reconfig sysfs
  ACPI: PNP: compare the string length in the matching_id()
  Revert "ACPI / resources: Use AE_CTRL_TERMINATE to terminate resources walks"
  PM: ACPI: PCI: Drop acpi_pm_set_bridge_wakeup()
  ALSA: hda/ca0132 - Change Input Source enum strings.
  Input: cyapa_gen6 - fix out-of-bounds stack access
  media: ipu3-cio2: Make the field on subdev format V4L2_FIELD_NONE
  media: ipu3-cio2: Validate mbus format in setting subdev format
  media: ipu3-cio2: Serialise access to pad format
  media: ipu3-cio2: Return actual subdev format
  media: ipu3-cio2: Remove traces of returned buffers
  media: netup_unidvb: Don't leak SPI master in probe error path
  media: sunxi-cir: ensure IR is handled when it is continuous
  media: gspca: Fix memory leak in probe
  vfio/pci/nvlink2: Do not attempt NPU2 setup on POWER8NVL NPU
  Input: goodix - add upside-down quirk for Teclast X98 Pro tablet
  initramfs: fix clang build failure
  Input: cros_ec_keyb - send 'scancodes' in addition to key events
  drm/amdkfd: Fix leak in dmabuf import
  drm/amd/display: Prevent bandwidth overflow
  lwt: Disable BH too in run_lwt_bpf()
  fix namespaced fscaps when !CONFIG_SECURITY
  cfg80211: initialize rekey_data
  ARM: sunxi: Add machine match for the Allwinner V3 SoC
  perf probe: Fix memory leak when synthesizing SDT probes
  kconfig: fix return value of do_error_if()
  clk: sunxi-ng: Make sure divider tables have sentinel
  clk: s2mps11: Fix a resource leak in error handling paths in the probe function
  clk: at91: sam9x60: remove atmel,osc-bypass support
  virtio_ring: Fix two use after free bugs
  virtio_net: Fix error code in probe()
  virtio_ring: Cut and paste bugs in vring_create_virtqueue_packed()
  qlcnic: Fix error code in probe
  perf record: Fix memory leak when using '--user-regs=?' to list registers
  pwm: lp3943: Dynamically allocate PWM chip base
  pwm: zx: Add missing cleanup in error path
  clk: ti: Fix memleak in ti_fapll_synth_setup
  watchdog: coh901327: add COMMON_CLK dependency
  watchdog: qcom: Avoid context switch in restart handler
  libnvdimm/label: Return -ENXIO for no slot in __blk_label_update
  net: korina: fix return value
  net: allwinner: Fix some resources leak in the error handling path of the probe and in the remove function
  net: bcmgenet: Fix a resource leak in an error handling path in the probe functin
  lan743x: fix rx_napi_poll/interrupt ping-pong
  checkpatch: fix unescaped left brace
  mm: don't wake kswapd prematurely when watermark boosting is disabled
  sparc: fix handling of page table constructor failure
  powerpc/ps3: use dma_mapping_error()
  nfc: s3fwrn5: Release the nfc firmware
  RDMA/cma: Don't overwrite sgid_attr after device is released
  sunrpc: fix xs_read_xdr_buf for partial pages receive
  um: chan_xterm: Fix fd leak
  um: tty: Fix handling of close in tty lines
  um: Monitor error events in IRQ controller
  ubifs: Fix error return code in ubifs_init_authentication()
  watchdog: Fix potential dereferencing of null pointer
  watchdog: sprd: check busy bit before new loading rather than after that
  watchdog: sprd: remove watchdog disable from resume fail path
  watchdog: sirfsoc: Add missing dependency on HAS_IOMEM
  watchdog: armada_37xx: Add missing dependency on HAS_IOMEM
  irqchip/alpine-msi: Fix freeing of interrupts on allocation error path
  ASoC: wm_adsp: remove "ctl" from list on error in wm_adsp_create_control()
  mac80211: don't set set TDLS STA bandwidth wider than possible
  crypto: atmel-i2c - select CONFIG_BITREVERSE
  extcon: max77693: Fix modalias string
  mtd: rawnand: gpmi: Fix the random DMA timeout issue
  mtd: rawnand: meson: Fix a resource leak in init
  mtd: rawnand: gpmi: fix reference count leak in gpmi ops
  clk: tegra: Fix duplicated SE clock entry
  remoteproc: qcom: Fix potential NULL dereference in adsp_init_mmio()
  remoteproc: qcom: fix reference leak in adsp_start
  remoteproc: q6v5-mss: fix error handling in q6v5_pds_enable
  RDMA/core: Do not indicate device ready when device enablement fails
  can: m_can: m_can_config_endisable(): remove double clearing of clock stop request bit
  erofs: avoid using generic_block_bmap
  iwlwifi: mvm: hook up missing RX handlers
  s390/cio: fix use-after-free in ccw_device_destroy_console
  bus: fsl-mc: fix error return code in fsl_mc_object_allocate()
  platform/chrome: cros_ec_spi: Don't overwrite spi::mode
  x86/kprobes: Restore BTF if the single-stepping is cancelled
  nfs_common: need lock during iterate through the list
  nfsd: Fix message level for normal termination
  speakup: fix uninitialized flush_lock
  usb: oxu210hp-hcd: Fix memory leak in oxu_create
  usb: ehci-omap: Fix PM disable depth umbalance in ehci_hcd_omap_probe
  powerpc/mm: sanity_check_fault() should work for all, not only BOOK3S
  ASoC: amd: change clk_get() to devm_clk_get() and add missed checks
  drm/mediatek: avoid dereferencing a null hdmi_phy on an error message
  powerpc/pseries/hibernation: remove redundant cacheinfo update
  powerpc/pseries/hibernation: drop pseries_suspend_begin() from suspend ops
  platform/x86: mlx-platform: Fix item counter assignment for MSN2700, MSN24xx systems
  scsi: fnic: Fix error return code in fnic_probe()
  seq_buf: Avoid type mismatch for seq_buf_init
  scsi: pm80xx: Fix error return in pm8001_pci_probe()
  scsi: qedi: Fix missing destroy_workqueue() on error in __qedi_probe
  arm64: dts: meson: g12a: x96-max: fix PHY deassert timing requirements
  ARM: dts: meson: fix PHY deassert timing requirements
  arm64: dts: meson: fix PHY deassert timing requirements
  Bluetooth: btmtksdio: Add the missed release_firmware() in mtk_setup_firmware()
  Bluetooth: btusb: Add the missed release_firmware() in btusb_mtk_setup_firmware()
  cpufreq: scpi: Add missing MODULE_ALIAS
  cpufreq: loongson1: Add missing MODULE_ALIAS
  cpufreq: sun50i: Add missing MODULE_DEVICE_TABLE
  cpufreq: st: Add missing MODULE_DEVICE_TABLE
  cpufreq: qcom: Add missing MODULE_DEVICE_TABLE
  cpufreq: mediatek: Add missing MODULE_DEVICE_TABLE
  cpufreq: highbank: Add missing MODULE_DEVICE_TABLE
  cpufreq: ap806: Add missing MODULE_DEVICE_TABLE
  clocksource/drivers/arm_arch_timer: Correct fault programming of CNTKCTL_EL1.EVNTI
  clocksource/drivers/arm_arch_timer: Use stable count reader in erratum sne
  phy: renesas: rcar-gen3-usb2: disable runtime pm in case of failure
  dm ioctl: fix error return code in target_message
  ASoC: jz4740-i2s: add missed checks for clk_get()
  net/mlx5: Properly convey driver version to firmware
  MIPS: Don't round up kernel sections size for memblock_add()
  memstick: r592: Fix error return in r592_probe()
  arm64: dts: rockchip: Fix UART pull-ups on rk3328
  pinctrl: falcon: add missing put_device() call in pinctrl_falcon_probe()
  bpf: Fix bpf_put_raw_tracepoint()'s use of __module_address()
  ARM: dts: at91: sama5d2: map securam as device
  iio: hrtimer-trigger: Mark hrtimer to expire in hard interrupt context
  clocksource/drivers/cadence_ttc: Fix memory leak in ttc_setup_clockevent()
  clocksource/drivers/orion: Add missing clk_disable_unprepare() on error path
  powerpc/64: Fix an EMIT_BUG_ENTRY in head_64.S
  powerpc/perf: Fix crash with is_sier_available when pmu is not set
  media: saa7146: fix array overflow in vidioc_s_audio()
  hwmon: (ina3221) Fix PM usage counter unbalance in ina3221_write_enable
  vfio-pci: Use io_remap_pfn_range() for PCI IO memory
  selftests/seccomp: Update kernel config
  NFS: switch nfsiod to be an UNBOUND workqueue.
  lockd: don't use interval-based rebinding over TCP
  net: sunrpc: Fix 'snprintf' return value check in 'do_xprt_debugfs'
  NFSv4: Fix the alignment of page data in the getdeviceinfo reply
  SUNRPC: xprt_load_transport() needs to support the netid "rdma6"
  NFSv4.2: condition READDIR's mask for security label based on LSM state
  SUNRPC: rpc_wake_up() should wake up tasks in the correct order
  ath10k: Release some resources in an error handling path
  ath10k: Fix an error handling path
  ath10k: Fix the parsing error in service available event
  platform/x86: dell-smbios-base: Fix error return code in dell_smbios_init
  ARM: dts: at91: at91sam9rl: fix ADC triggers
  soc: amlogic: canvas: add missing put_device() call in meson_canvas_get()
  arm64: dts: meson-sm1: fix typo in opp table
  arm64: dts: meson: fix spi-max-frequency on Khadas VIM2
  PCI: iproc: Fix out-of-bound array accesses
  PCI: Fix overflow in command-line resource alignment requests
  PCI: Bounds-check command-line resource alignment requests
  arm64: dts: qcom: c630: Polish i2c-hid devices
  arm64: dts: ls1028a: fix ENETC PTP clock input
  genirq/irqdomain: Don't try to free an interrupt that has no mapping
  power: supply: bq24190_charger: fix reference leak
  power: supply: axp288_charger: Fix HP Pavilion x2 10 DMI matching
  arm64: dts: rockchip: Set dr_mode to "host" for OTG on rk3328-roc-cc
  arm64: dts: armada-3720-turris-mox: update ethernet-phy handle name
  ARM: dts: Remove non-existent i2c1 from 98dx3236
  HSI: omap_ssi: Don't jump to free ID in ssi_add_controller()
  slimbus: qcom-ngd-ctrl: Avoid sending power requests without QMI
  media: max2175: fix max2175_set_csm_mode() error code
  mips: cdmm: fix use-after-free in mips_cdmm_bus_discover
  media: imx214: Fix stop streaming
  samples: bpf: Fix lwt_len_hist reusing previous BPF map
  platform/x86: mlx-platform: Remove PSU EEPROM from MSN274x platform configuration
  platform/x86: mlx-platform: Remove PSU EEPROM from default platform configuration
  media: siano: fix memory leak of debugfs members in smsdvb_hotplug
  arm64: tegra: Fix DT binding for IO High Voltage entry
  dmaengine: mv_xor_v2: Fix error return code in mv_xor_v2_probe()
  cw1200: fix missing destroy_workqueue() on error in cw1200_init_common
  rsi: fix error return code in rsi_reset_card()
  qtnfmac: fix error return code in qtnf_pcie_probe()
  orinoco: Move context allocation after processing the skb
  mmc: pxamci: Fix error return code in pxamci_probe
  ARM: dts: at91: sama5d3_xplained: add pincontrol for USB Host
  ARM: dts: at91: sama5d4_xplained: add pincontrol for USB Host
  memstick: fix a double-free bug in memstick_check
  RDMA/cxgb4: Validate the number of CQEs
  clk: meson: Kconfig: fix dependency for G12A
  Input: omap4-keypad - fix runtime PM error handling
  drivers: soc: ti: knav_qmss_queue: Fix error return code in knav_queue_probe
  soc: ti: Fix reference imbalance in knav_dma_probe
  soc: ti: knav_qmss: fix reference leak in knav_queue_probe
  spi: fix resource leak for drivers without .remove callback
  crypto: omap-aes - Fix PM disable depth imbalance in omap_aes_probe
  crypto: crypto4xx - Replace bitwise OR with logical OR in crypto4xx_build_pd
  EDAC/mce_amd: Use struct cpuinfo_x86.cpu_die_id for AMD NodeId
  powerpc/feature: Fix CPU_FTRS_ALWAYS by removing CPU_FTRS_GENERIC_32
  powerpc: Avoid broken GCC __attribute__((optimize))
  selftests/bpf: Fix broken riscv build
  spi: mxs: fix reference leak in mxs_spi_probe
  usb/max3421: fix return error code in max3421_probe()
  Input: ads7846 - fix unaligned access on 7845
  Input: ads7846 - fix integer overflow on Rt calculation
  Input: ads7846 - fix race that causes missing releases
  drm/omap: dmm_tiler: fix return error code in omap_dmm_probe()
  video: fbdev: atmel_lcdfb: fix return error code in atmel_lcdfb_of_init()
  media: solo6x10: fix missing snd_card_free in error handling case
  scsi: core: Fix VPD LUN ID designator priorities
  ASoC: meson: fix COMPILE_TEST error
  media: v4l2-fwnode: Return -EINVAL for invalid bus-type
  media: mtk-vcodec: add missing put_device() call in mtk_vcodec_init_enc_pm()
  media: mtk-vcodec: add missing put_device() call in mtk_vcodec_release_dec_pm()
  media: mtk-vcodec: add missing put_device() call in mtk_vcodec_init_dec_pm()
  media: tm6000: Fix sizeof() mismatches
  staging: gasket: interrupt: fix the missed eventfd_ctx_put() in gasket_interrupt.c
  staging: greybus: codecs: Fix reference counter leak in error handling
  crypto: qat - fix status check in qat_hal_put_rel_rd_xfer()
  MIPS: BCM47XX: fix kconfig dependency bug for BCM47XX_BCMA
  RDMa/mthca: Work around -Wenum-conversion warning
  ASoC: arizona: Fix a wrong free in wm8997_probe
  spi: sprd: fix reference leak in sprd_spi_remove
  ASoC: wm8998: Fix PM disable depth imbalance on error
  selftest/bpf: Add missed ip6ip6 test back
  mwifiex: fix mwifiex_shutdown_sw() causing sw reset failure
  spi: bcm63xx-hsspi: fix missing clk_disable_unprepare() on error in bcm63xx_hsspi_resume
  spi: tegra114: fix reference leak in tegra spi ops
  spi: tegra20-sflash: fix reference leak in tegra_sflash_resume
  spi: tegra20-slink: fix reference leak in slink ops of tegra20
  spi: mt7621: fix missing clk_disable_unprepare() on error in mt7621_spi_probe
  spi: spi-ti-qspi: fix reference leak in ti_qspi_setup
  Bluetooth: hci_h5: fix memory leak in h5_close
  Bluetooth: Fix null pointer dereference in hci_event_packet()
  arm64: dts: exynos: Correct psci compatible used on Exynos7
  arm64: dts: exynos: Include common syscon restart/poweroff for Exynos7
  brcmfmac: Fix memory leak for unpaired brcmf_{alloc/free}
  spi: stm32: fix reference leak in stm32_spi_resume
  selinux: fix inode_doinit_with_dentry() LABEL_INVALID error handling
  ASoC: pcm: DRAIN support reactivation
  spi: spi-mem: fix reference leak in spi_mem_access_start
  drm/msm/dsi_pll_10nm: restore VCO rate during restore_state
  f2fs: call f2fs_get_meta_page_retry for nat page
  spi: img-spfi: fix reference leak in img_spfi_resume
  powerpc/64: Set up a kernel stack for secondaries before cpu_restore()
  drm/amdgpu: fix build_coefficients() argument
  ARM: dts: aspeed: tiogapass: Remove vuart
  ASoC: sun4i-i2s: Fix lrck_period computation for I2S justified mode
  crypto: inside-secure - Fix sizeof() mismatch
  crypto: talitos - Fix return type of current_desc_hdr()
  crypto: talitos - Endianess in current_desc_hdr()
  drm/amdgpu: fix incorrect enum type
  sched: Reenable interrupts in do_sched_yield()
  sched/deadline: Fix sched_dl_global_validate()
  x86/apic: Fix x2apic enablement without interrupt remapping
  ARM: p2v: fix handling of LPAE translation in BE mode
  x86/mm/ident_map: Check for errors from ident_pud_init()
  RDMA/rxe: Compute PSN windows correctly
  ARM: dts: aspeed: s2600wf: Fix VGA memory region location
  selinux: fix error initialization in inode_doinit_with_dentry()
  rtc: pcf2127: fix pcf2127_nvmem_read/write() returns
  RDMA/bnxt_re: Set queue pair state when being queried
  Revert "i2c: i2c-qcom-geni: Fix DMA transfer race"
  soc: qcom: geni: More properly switch to DMA mode
  soc: mediatek: Check if power domains can be powered on at boot time
  soc: renesas: rmobile-sysc: Fix some leaks in rmobile_init_pm_domains()
  arm64: dts: renesas: cat875: Remove rxc-skew-ps from ethernet-phy node
  arm64: dts: renesas: hihope-rzg2-ex: Drop rxc-skew-ps from ethernet-phy node
  drm/tve200: Fix handling of platform_get_irq() error
  drm/mcde: Fix handling of platform_get_irq() error
  drm/aspeed: Fix Kconfig warning & subsequent build errors
  drm/gma500: fix double free of gma_connector
  md: fix a warning caused by a race between concurrent md_ioctl()s
  crypto: af_alg - avoid undefined behavior accessing salg_name
  media: msi2500: assign SPI bus number dynamically
  quota: Sanity-check quota file headers on load
  Bluetooth: Fix slab-out-of-bounds read in hci_le_direct_adv_report_evt()
  serial_core: Check for port state when tty is in error state
  HID: i2c-hid: add Vero K147 to descriptor override
  scsi: megaraid_sas: Check user-provided offsets
  coresight: etb10: Fix possible NULL ptr dereference in etb_enable_perf()
  coresight: tmc-etr: Fix barrier packet insertion for perf buffer
  coresight: tmc-etr: Check if page is valid before dma_map_page()
  coresight: tmc-etf: Fix NULL ptr dereference in tmc_enable_etf_sink_perf()
  ARM: dts: exynos: fix USB 3.0 pins supply being turned off on Odroid XU
  ARM: dts: exynos: fix USB 3.0 VBUS control and over-current pins on Exynos5410
  ARM: dts: exynos: fix roles of USB 3.0 ports on Odroid XU
  usb: chipidea: ci_hdrc_imx: Pass DISABLE_DEVICE_STREAMING flag to imx6ul
  USB: gadget: f_rndis: fix bitrate for SuperSpeed and above
  usb: gadget: f_fs: Re-use SS descriptors for SuperSpeedPlus
  USB: gadget: f_midi: setup SuperSpeed Plus descriptors
  USB: gadget: f_acm: add support for SuperSpeed Plus
  USB: serial: option: add interface-number sanity check to flag handling
  usb: mtu3: fix memory corruption in mtu3_debugfs_regset()
  soc/tegra: fuse: Fix index bug in get_process_id
  kbuild: avoid split lines in .mod files
  perf/x86/intel: Check PEBS status correctly
  drm/amd/display: Init clock value by current vbios CLKs
  iwlwifi: pcie: add one missing entry for AX210
  dm table: Remove BUG_ON(in_interrupt())
  scsi: mpt3sas: Increase IOCInit request timeout to 30s
  vxlan: Copy needed_tailroom from lowerdev
  vxlan: Add needed_headroom for lower device
  arm64: syscall: exit userspace before unmasking exceptions
  habanalabs: put devices before driver removal
  drm/tegra: sor: Disable clocks on error in tegra_sor_init()
  kernel/cpu: add arch override for clear_tasks_mm_cpumask() mm handling
  drm/tegra: replace idr_init() by idr_init_base()
  net: mvpp2: add mvpp2_phylink_to_port() helper
  selftests: fix poll error in udpgro.sh
  ixgbe: avoid premature Rx buffer reuse
  i40e: avoid premature Rx buffer reuse
  i40e: optimise prefetch page refcount
  i40e: Refactor rx_bi accesses
  RDMA/cm: Fix an attempt to use non-valid pointer when cleaning timewait
  selftests/bpf/test_offload.py: Reset ethtool features after failed setting
  netfilter: nft_ct: Remove confirmation check for NFT_CT_ID
  gpio: eic-sprd: break loop when getting NULL device resource
  Revert "gpio: eic-sprd: Use devm_platform_ioremap_resource()"
  afs: Fix memory leak when mounting with multiple source parameters
  netfilter: nft_dynset: fix timeouts later than 23 days
  netfilter: nft_compat: make sure xtables destructors have run
  netfilter: x_tables: Switch synchronization to RCU
  pinctrl: aspeed: Fix GPIO requests on pass-through banks
  blk-mq: In blk_mq_dispatch_rq_list() "no budget" is a reason to kick
  block: factor out requeue handling from dispatch code
  block: Simplify REQ_OP_ZONE_RESET_ALL handling
  clk: renesas: r9a06g032: Drop __packed for portability
  can: softing: softing_netdev_open(): fix error handling
  xsk: Replace datagram_poll by sock_poll_wait
  xsk: Fix xsk_poll()'s return type
  scsi: bnx2i: Requires MMU
  gpio: mvebu: fix potential user-after-free on probe
  gpio: zynq: fix reference leak in zynq_gpio functions
  PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter
  ARM: dts: imx6qdl-kontron-samx6i: fix I2C_PM scl pin
  ARM: dts: imx6qdl-wandboard-revd1: Remove PAD_GPIO_6 from enetgrp
  ARM: dts: sun7i: pcduino3-nano: enable RGMII RX/TX delay on PHY
  ARM: dts: sun8i: v3s: fix GIC node memory range
  pinctrl: baytrail: Avoid clearing debounce value when turning it off
  pinctrl: merrifield: Set default bias in case no particular value given
  ARM: dts: sun8i: v40: bananapi-m2-berry: Fix ethernet node
  ARM: dts: sun8i: r40: bananapi-m2-berry: Fix dcdc1 regulator
  ARM: dts: sun7i: bananapi: Enable RGMII RX/TX delay on Ethernet PHY
  Linux 5.4.85
  x86/resctrl: Fix incorrect local bandwidth when mba_sc is enabled
  x86/resctrl: Remove unused struct mbm_state::chunks_bw
  membarrier: Explicitly sync remote cores when SYNC_CORE is requested
  Revert "selftests/ftrace: check for do_sys_openat2 in user-memory test"
  KVM: mmu: Fix SPTE encoding of MMIO generation upper half
  serial: 8250_omap: Avoid FIFO corruption caused by MDR1 access
  ALSA: pcm: oss: Fix potential out-of-bounds shift
  USB: sisusbvga: Make console support depend on BROKEN
  USB: UAS: introduce a quirk to set no_write_same
  xhci-pci: Allow host runtime PM as default for Intel Alpine Ridge LP
  xhci: Give USB2 ports time to enter U3 in bus suspend
  ALSA: usb-audio: Fix control 'access overflow' errors from chmap
  ALSA: usb-audio: Fix potential out-of-bounds shift
  USB: add RESET_RESUME quirk for Snapscan 1212
  USB: dummy-hcd: Fix uninitialized array use in init()
  ktest.pl: If size of log is too big to email, email error message
  net: stmmac: delete the eee_ctrl_timer after napi disabled
  net: stmmac: dwmac-meson8b: fix mask definition of the m250_sel mux
  net: ll_temac: Fix potential NULL dereference in temac_probe()
  net/mlx4_en: Handle TX error CQE
  lan743x: fix for potential NULL pointer dereference with bare card
  net/mlx4_en: Avoid scheduling restart task if it is already running
  tcp: fix cwnd-limited bug for TSO deferral where we send nothing
  tcp: select sane initial rcvq_space.space for big MSS
  net: stmmac: free tx skb buffer in stmmac_resume()
  bridge: Fix a deadlock when enabling multicast snooping
  enetc: Fix reporting of h/w packet counters
  udp: fix the proto value passed to ip_protocol_deliver_rcu for the segments
  net: hns3: remove a misused pragma packed
  vrf: packets with lladdr src needs dst at input with orig_iif when needs strict
  net: bridge: vlan: fix error return code in __vlan_add()
  mac80211: mesh: fix mesh_pathtbl_init() error path
  ipv4: fix error return code in rtm_to_fib_config()
  ptrace: Prevent kernel-infoleak in ptrace_get_syscall_info()
  Linux 5.4.84
  compiler.h: fix barrier_data() on clang
  mm/zsmalloc.c: drop ZSMALLOC_PGTABLE_MAPPING
  x86/apic/vector: Fix ordering in vector assignment
  x86/membarrier: Get rid of a dubious optimization
  x86/mm/mem_encrypt: Fix definition of PMD_FLAGS_DEC_WP
  scsi: be2iscsi: Revert "Fix a theoretical leak in beiscsi_create_eqs()"
  proc: use untagged_addr() for pagemap_read addresses
  kbuild: avoid static_assert for genksyms
  drm/i915/display/dp: Compute the correct slice count for VDSC on DP
  mmc: block: Fixup condition for CMD13 polling for RPMB requests
  pinctrl: amd: remove debounce filter setting in IRQ type setting
  Input: i8042 - add Acer laptops to the i8042 reset list
  Input: cm109 - do not stomp on control URB
  ktest.pl: Fix incorrect reboot for grub2bls
  can: m_can: m_can_dev_setup(): add support for bosch mcan version 3.3.0
  platform/x86: touchscreen_dmi: Add info for the Irbis TW118 tablet
  platform/x86: intel-vbtn: Support for tablet mode on HP Pavilion 13 x360 PC
  platform/x86: acer-wmi: add automatic keyboard background light toggle key as KEY_LIGHTS_TOGGLE
  platform/x86: thinkpad_acpi: Add BAT1 is primary battery quirk for Thinkpad Yoga 11e 4th gen
  platform/x86: thinkpad_acpi: Do not report SW_TABLET_MODE on Yoga 11e
  arm64: tegra: Disable the ACONNECT for Jetson TX2
  soc: fsl: dpio: Get the cpumask through cpumask_of(cpu)
  spi: spi-nxp-fspi: fix fspi panic by unexpected interrupts
  irqchip/gic-v3-its: Unconditionally save/restore the ITS state on suspend
  ibmvnic: skip tx timeout reset while in resetting
  interconnect: qcom: qcs404: Remove GPU and display RPM IDs
  scsi: ufs: Make sure clk scaling happens only when HBA is runtime ACTIVE
  ARC: stack unwinding: don't assume non-current task is sleeping
  arm64: dts: broadcom: clear the warnings caused by empty dma-ranges
  powerpc: Drop -me200 addition to build flags
  iwlwifi: mvm: fix kernel panic in case of assert during CSA
  iwlwifi: pcie: set LTR to avoid completion timeout
  arm64: dts: rockchip: Assign a fixed index to mmc devices on rk3399 boards.
  iwlwifi: pcie: limit memory read spin time
  x86/lib: Change .weak to SYM_FUNC_START_WEAK for arch/x86/lib/mem*_64.S
  Kbuild: do not emit debug info for assembly with LLVM_IAS=1
  ANDROID: GKI: bring back irq_create_mapping()
  Linux 5.4.83
  Revert "geneve: pull IP header before ECN decapsulation"
  x86/insn-eval: Use new for_each_insn_prefix() macro to loop over prefixes bytes
  netfilter: nftables_offload: set address type in control dissector
  netfilter: nf_tables: avoid false-postive lockdep splat
  Input: i8042 - fix error return code in i8042_setup_aux()
  dm writecache: remove BUG() and fail gracefully instead
  i2c: qup: Fix error return code in qup_i2c_bam_schedule_desc()
  rtw88: debug: Fix uninitialized memory in debugfs code
  ASoC: wm_adsp: fix error return code in wm_adsp_load()
  tipc: fix a deadlock when flushing scheduled work
  netfilter: ipset: prevent uninit-value in hash_ip6_add
  gfs2: check for empty rgrp tree in gfs2_ri_update
  can: af_can: can_rx_unregister(): remove WARN() statement from list operation sanity check
  lib/syscall: fix syscall registers retrieval on 32-bit platforms
  tracing: Fix userstacktrace option for instances
  iommu/amd: Set DTE[IntTabLen] to represent 512 IRTEs
  spi: bcm2835: Release the DMA channel if probe fails after dma_init
  i2c: imx: Check for I2SR_IAL after every byte
  i2c: imx: Fix reset of I2SR_IAL flag
  speakup: Reject setting the speakup line discipline outside of speakup
  mm/swapfile: do not sleep with a spin lock held
  mm: list_lru: set shrinker map bit when child nr_items is not zero
  coredump: fix core_pattern parse error
  x86/uprobes: Do not use prefixes.nbytes when looping over prefixes.bytes
  dm: remove invalid sparse __acquires and __releases annotations
  dm: fix bug with RCU locking in dm_blk_report_zones
  powerpc/pseries: Pass MSI affinity to irq_create_mapping()
  genirq/irqdomain: Add an irq_create_mapping_affinity() function
  powerpc/64s/powernv: Fix memory corruption when saving SLB entries on MCE
  dm writecache: fix the maximum number of arguments
  scsi: mpt3sas: Fix ioctl timeout
  drm/i915/gt: Program mocs:63 for cache eviction on gen9
  thunderbolt: Fix use-after-free in remove_unplugged_switch()
  i2c: imx: Don't generate STOP condition if arbitration has been lost
  cifs: fix potential use-after-free in cifs_echo_request()
  cifs: allow syscalls to be restarted in __smb_send_rqst()
  ftrace: Fix updating FTRACE_FL_TRAMP
  ALSA: hda/generic: Add option to enforce preferred_dacs pairs
  ALSA: hda/realtek - Add new codec supported for ALC897
  ALSA: hda/realtek: Enable headset of ASUS UX482EG & B9400CEA with ALC294
  ALSA: hda/realtek: Add mute LED quirk to yet another HP x360 model
  ALSA: hda/realtek: Fix bass speaker DAC assignment on Asus Zephyrus G14
  tty: Fix ->session locking
  tty: Fix ->pgrp locking in tiocspgrp()
  USB: serial: option: fix Quectel BG96 matching
  USB: serial: option: add support for Thales Cinterion EXS82
  USB: serial: option: add Fibocom NL668 variants
  USB: serial: ch341: sort device-id entries
  USB: serial: ch341: add new Product ID for CH341A
  USB: serial: kl5kusb105: fix memleak on open
  usb: gadget: f_fs: Use local copy of descriptors for userspace copy
  Partially revert bpf: Zero-fill re-used per-cpu map element
  pinctrl: baytrail: Fix pin being driven low for a while on gpiod_get(..., GPIOD_OUT_HIGH)
  pinctrl: baytrail: Replace WARN with dev_info_once when setting direct-irq pin to output
  Linux 5.4.82
  RDMA/i40iw: Address an mmap handler exploit in i40iw
  tracing: Remove WARN_ON in start_thread()
  Input: i8042 - add ByteSpeed touchpad to noloop table
  Input: xpad - support Ardwiino Controllers
  ALSA: usb-audio: US16x08: fix value count for level meters
  net/mlx5: Fix wrong address reclaim when command interface is down
  net/mlx5: DR, Proper handling of unsupported Connect-X6DX SW steering
  net/sched: act_mpls: ensure LSE is pullable before reading it
  net: openvswitch: ensure LSE is pullable before reading it
  net: skbuff: ensure LSE is pullable before decrementing the MPLS ttl
  net: mvpp2: Fix error return code in mvpp2_open()
  chelsio/chtls: fix a double free in chtls_setkey()
  vxlan: fix error return code in __vxlan_dev_create()
  net: pasemi: fix error return code in pasemi_mac_open()
  cxgb3: fix error return code in t3_sge_alloc_qset()
  net/x25: prevent a couple of overflows
  net: ip6_gre: set dev->hard_header_len when using header_ops
  geneve: pull IP header before ECN decapsulation
  inet_ecn: Fix endianness of checksum update when setting ECT(1)
  ibmvnic: Fix TX completion error handling
  ibmvnic: Ensure that SCRQ entry reads are correctly ordered
  chelsio/chtls: fix panic during unload reload chtls
  dt-bindings: net: correct interrupt flags in examples
  ipv4: Fix tos mask in inet_rtm_getroute()
  netfilter: bridge: reset skb->pkt_type after NF_INET_POST_ROUTING traversal
  sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list
  ima: extend boot_aggregate with kernel measurements
  staging/octeon: fix up merge error
  bonding: wait for sysfs kobject destruction before freeing struct slave
  usbnet: ipheth: fix connectivity with iOS 14
  tun: honor IOCB_NOWAIT flag
  tcp: Set INET_ECN_xmit configuration in tcp_reinit_congestion_control
  sock: set sk_err to ee_errno on dequeue from errq
  rose: Fix Null pointer dereference in rose_send_frame()
  net/tls: Protect from calling tls_dev_del for TLS RX twice
  net/tls: missing received data after fast remote close
  net/af_iucv: set correct sk_protocol for child sockets
  ipv6: addrlabel: fix possible memory leak in ip6addrlbl_net_init
  devlink: Hold rtnl lock while reading netdev attributes
  Linux 5.4.81
  ASoC: Intel: Skylake: Automatic DMIC format configuration according to information from NHLT
  ASoC: Intel: Multiple I/O PCM format support for pipe
  ASoC: Intel: Skylake: Await purge request ack on CNL
  ASoC: Intel: Allow for ROM init retry on CNL platforms
  ASoC: Intel: Skylake: Shield against no-NHLT configurations
  ASoC: Intel: Skylake: Enable codec wakeup during chip init
  ASoC: Intel: Skylake: Select hda configuration permissively
  ASoC: Intel: Skylake: Remove superfluous chip initialization
  USB: core: Fix regression in Hercules audio card
  x86/resctrl: Add necessary kernfs_put() calls to prevent refcount leak
  x86/resctrl: Remove superfluous kernfs_get() calls to prevent refcount leak
  x86/speculation: Fix prctl() when spectre_v2_user={seccomp,prctl},ibpb
  x86/mce: Do not overwrite no_way_out if mce_end() fails
  irqchip/exiu: Fix the index of fwspec for IRQ type
  usb: gadget: Fix memleak in gadgetfs_fill_super
  USB: quirks: Add USB_QUIRK_DISCONNECT_SUSPEND quirk for Lenovo A630Z TIO built-in usb-audio card
  usb: gadget: f_midi: Fix memleak in f_midi_alloc
  USB: core: Change %pK for __user pointers to %px
  spi: bcm2835aux: Restore err assignment in bcm2835aux_spi_probe
  perf probe: Fix to die_entrypc() returns error correctly
  perf stat: Use proper cpu for shadow stats
  can: m_can: fix nominal bitiming tseg2 min for version >= 3.1
  can: m_can: m_can_open(): remove IRQF_TRIGGER_FALLING from request_threaded_irq()'s flags
  RDMA/hns: Bugfix for memory window mtpt configuration
  RDMA/hns: Fix retry_cnt and rnr_cnt when querying QP
  platform/x86: toshiba_acpi: Fix the wrong variable assignment
  platform/x86: thinkpad_acpi: Send tablet mode switch at wakeup time
  can: gs_usb: fix endianess problem with candleLight firmware
  efi: EFI_EARLYCON should depend on EFI
  efivarfs: revert "fix memory leak in efivarfs_create()"
  arm64: tegra: Wrong AON HSP reg property size
  optee: add writeback to valid memory type
  ibmvnic: fix NULL pointer dereference in ibmvic_reset_crq
  ibmvnic: fix NULL pointer dereference in reset_sub_crq_queues
  net: ena: set initial DMA width to avoid intel iommu issue
  nfc: s3fwrn5: use signed integer for parsing GPIO numbers
  i40e: Fix removing driver while bare-metal VFs pass traffic
  IB/mthca: fix return value of error branch in mthca_init_cq()
  powerpc/64s: Fix allnoconfig build since uaccess flush
  ibmvnic: notify peers when failover and migration happen
  ibmvnic: fix call_netdevice_notifiers in do_reset
  s390/qeth: fix tear down of async TX buffers
  s390/qeth: fix af_iucv notification race
  s390/qeth: make af_iucv TX notification call more robust
  cxgb4: fix the panic caused by non smac rewrite
  bnxt_en: Release PCI regions when DMA mask setup fails during probe.
  video: hyperv_fb: Fix the cache type when mapping the VRAM
  bnxt_en: fix error return code in bnxt_init_board()
  bnxt_en: fix error return code in bnxt_init_one()
  scsi: ufs: Fix race between shutdown and runtime resume flow
  ARM: dts: dra76x: m_can: fix order of clocks
  arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed
  batman-adv: set .owner to THIS_MODULE
  iwlwifi: mvm: write queue_sync_state only for sync
  phy: tegra: xusb: Fix dangling pointer on probe failure
  ARM: OMAP2+: Manage MPU state properly for omap_enter_idle_coupled()
  bus: ti-sysc: Fix bogus resetdone warning on enable for cpsw
  net: dsa: mv88e6xxx: Wait for EEPROM done after HW reset
  xtensa: uaccess: Add missing __user to strncpy_from_user() prototype
  perf/x86: fix sysfs type mismatches
  scsi: target: iscsi: Fix cmd abort fabric stop race
  scsi: libiscsi: Fix NOP race condition
  dmaengine: pl330: _prep_dma_memcpy: Fix wrong burst size
  vhost scsi: fix cmd completion race
  nvme: free sq/cq dbbuf pointers when dbbuf set fails
  proc: don't allow async path resolution of /proc/self components
  HID: Add Logitech Dinovo Edge battery quirk
  HID: logitech-hidpp: Add HIDPP_CONSUMER_VENDOR_KEYS quirk for the Dinovo Edge
  x86/xen: don't unbind uninitialized lock_kicker_irq
  dmaengine: xilinx_dma: use readl_poll_timeout_atomic variant
  HID: add HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE for Gamevice devices
  staging: ralink-gdma: fix kconfig dependency bug for DMA_RALINK
  HID: hid-sensor-hub: Fix issue with devices with no report ID
  Input: i8042 - allow insmod to succeed on devices without an i8042 controller
  HID: add support for Sega Saturn
  HID: cypress: Support Varmilo Keyboards' media hotkeys
  HID: ite: Replace ABS_MISC 120/121 events with touchpad on/off keypresses
  HID: uclogic: Add ID for Trust Flex Design Tablet
  arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()
  arm64: pgtable: Fix pte_accessible()
  trace: fix potenial dangerous pointer
  KVM: x86: Fix split-irqchip vs interrupt injection window request
  KVM: x86: handle !lapic_in_kernel case in kvm_cpu_*_extint
  KVM: arm64: vgic-v3: Drop the reporting of GICR_TYPER.Last for userspace
  KVM: PPC: Book3S HV: XIVE: Fix possible oops when accessing ESB page
  cifs: fix a memleak with modefromsid
  smb3: Handle error case during offload read path
  smb3: Avoid Mid pending list corruption
  smb3: Call cifs reconnect from demultiplex thread
  wireless: Use linux/stddef.h instead of stddef.h
  btrfs: fix lockdep splat when reading qgroup config on mount
  btrfs: don't access possibly stale fs_info data for printing duplicate device
  btrfs: tree-checker: add missing returns after data_ref alignment checks
  btrfs: tree-checker: add missing return after error in root_item
  netfilter: clear skb->next in NF_HOOK_LIST()
  ipv4: use IS_ENABLED instead of ifdef
  spi: bcm2835: Fix use-after-free on unbind
  spi: bcm-qspi: Fix use-after-free on unbind
  Revert "Exempt multicast addresses from five-second neighbor lifetime"
  Linux 5.4.80
  sched/fair: Fix overutilized update in enqueue_task_fair()
  mm, page_alloc: skip ->waternark_boost for atomic order-0 allocations
  mm/userfaultfd: do not access vma->vm_mm after calling handle_userfault()
  mm: memcg/slab: fix root memcg vmstats
  x86/microcode/intel: Check patch signature before saving microcode for early loading
  seccomp: Set PF_SUPERPRIV when checking capability
  ptrace: Set PF_SUPERPRIV when checking capability
  mmc: sdhci-pci: Prefer SDR25 timing for High Speed mode for BYT-based Intel controllers
  drm/i915: Handle max_bpc==16
  drm/amd/display: Add missing pflip irq for dcn2.0
  Drivers: hv: vmbus: Allow cleanup of VMBUS_CONNECT_CPU if disconnected
  s390/dasd: fix null pointer dereference for ERP requests
  s390/cpum_sf.c: fix file permission for cpum_sfb_size
  mac80211: free sta in sta_info_insert_finish() on errors
  mac80211: minstrel: fix tx status processing corner case
  mac80211: minstrel: remove deferred sampling code
  xtensa: disable preemption around cache alias management calls
  xtensa: fix TLBTEMP area placement
  regulator: workaround self-referent regulators
  regulator: avoid resolve_supply() infinite recursion
  regulator: fix memory leak with repeated set_machine_constraints()
  regulator: pfuze100: limit pfuze-support-disable-sw to pfuze{100,200}
  spi: bcm2835aux: Fix use-after-free on unbind
  spi: npcm-fiu: Don't leak SPI master in probe error path
  spi: Introduce device-managed SPI controller allocation
  spi: lpspi: Fix use-after-free on unbind
  iio: adc: mediatek: fix unset field
  iio: accel: kxcjk1013: Add support for KIOX010A ACPI DSM for setting tablet-mode
  iio: accel: kxcjk1013: Replace is_smo8500_device with an acpi_type enum
  ext4: fix bogus warning in ext4_update_dx_flag()
  iio: light: fix kconfig dependency bug for VCNL4035
  staging: rtl8723bs: Add 024c:0627 to the list of SDIO device-ids
  efivarfs: fix memory leak in efivarfs_create()
  HID: logitech-dj: Fix an error in mse_bluetooth_descriptor
  tty: serial: imx: keep console clocks always on
  tty: serial: imx: fix potential deadlock
  ALSA: hda/realtek: Add some Clove SSID in the ALC293(ALC1220)
  ALSA: hda/realtek - Add supported for Lenovo ThinkPad Headset Button
  ALSA: mixart: Fix mutex deadlock
  ALSA: ctl: fix error path at adding user-defined element set
  ALSA: usb-audio: Add delay quirk for all Logitech USB devices
  ALSA: firewire: Clean up a locking issue in copy_resp_to_buf()
  speakup: Do not let the line discipline be used several times
  HID: logitech-dj: Fix Dinovo Mini when paired with a MX5x00 receiver
  HID: logitech-dj: Handle quad/bluetooth keyboards with a builtin trackpad
  HID: logitech-hidpp: Add PID for MX Anywhere 2
  libfs: fix error cast of negative value in simple_attr_write()
  efi/x86: Free efi_pgd with free_pages()
  bpf, sockmap: Avoid returning unneeded EAGAIN when redirecting to self
  bpf, sockmap: Use truesize with sk_rmem_schedule()
  bpf, sockmap: On receive programs try to fast track SK_PASS ingress
  bpf, sockmap: Skb verdict SK_PASS to self already checked rmem limits
  xfs: revert "xfs: fix rmap key and record comparison functions"
  fail_function: Remove a redundant mutex unlock
  regulator: ti-abb: Fix array out of bound read access on the first transition
  xfs: return corresponding errcode if xfs_initialize_perag() fail
  xfs: strengthen rmap record flags checking
  xfs: fix the minrecs logic when dealing with inode root child blocks
  can: m_can: process interrupt only when not runtime suspended
  can: flexcan: flexcan_chip_start(): fix erroneous flexcan_transceiver_enable() during bus-off recovery
  iommu/vt-d: Avoid panic if iommu init fails in tboot system
  iommu/vt-d: Move intel_iommu_gfx_mapped to Intel IOMMU header
  can: kvaser_usb: kvaser_usb_hydra: Fix KCAN bittiming limits
  can: kvaser_pciefd: Fix KCAN bittiming limits
  bpf, sockmap: Ensure SO_RCVBUF memory is observed on ingress redirect
  bpf, sockmap: Fix partial copy_page_to_iter so progress can still be made
  net/mlx5: E-Switch, Fail mlx5_esw_modify_vport_rate if qos disabled
  drm/sun4i: dw-hdmi: fix error return code in sun8i_dw_hdmi_bind()
  MIPS: Alchemy: Fix memleak in alchemy_clk_setup_cpu
  selftests/bpf: Fix error return code in run_getsockopt_test()
  ASoC: qcom: lpass-platform: Fix memory leak
  can: m_can: m_can_stop(): set device to software init mode before closing
  can: m_can: m_can_class_free_dev(): introduce new function
  can: m_can: m_can_handle_state_change(): fix state change
  can: tcan4x5x: tcan4x5x_can_remove(): fix order of deregistration
  can: tcan4x5x: tcan4x5x_can_probe(): add missing error checking for devm_regmap_init()
  can: tcan4x5x: replace depends on REGMAP_SPI with depends on SPI
  can: flexcan: fix failure handling of pm_runtime_get_sync()
  can: peak_usb: fix potential integer overflow on shift of a int
  can: mcba_usb: mcba_usb_start_xmit(): first fill skb, then pass to can_put_echo_skb()
  can: ti_hecc: Fix memleak in ti_hecc_probe
  can: dev: can_restart(): post buffer from the right context
  can: af_can: prevent potential access of uninitialized member in canfd_rcv()
  can: af_can: prevent potential access of uninitialized member in can_rcv()
  ip_tunnels: Set tunnel option flag when tunnel metadata is present
  tools, bpftool: Add missing close before bpftool net attach exit
  perf lock: Don't free "lock_seq_stat" if read_count isn't zero
  RMDA/sw: Don't allow drivers using dma_virt_ops on highmem configs
  RDMA/pvrdma: Fix missing kfree() in pvrdma_register_device()
  rfkill: Fix use-after-free in rfkill_resume()
  Input: resistive-adc-touch - fix kconfig dependency on IIO_BUFFER
  ARM: dts: imx50-evk: Fix the chip select 1 IOMUX
  arm64: dts: imx8mm: fix voltage for 1.6GHz CPU operating point
  swiotlb: using SIZE_MAX needs limits.h included
  arm: dts: imx6qdl-udoo: fix rgmii phy-mode for ksz9031 phy
  arm64: dts imx8mn: Remove non-existent USB OTG2
  arm64: dts: allwinner: h5: OrangePi Prime: Fix ethernet node
  MIPS: export has_transparent_hugepage() for modules
  Input: adxl34x - clean up a data type in adxl34x_probe()
  arm64: dts: allwinner: a64: bananapi-m64: Enable RGMII RX/TX delay on PHY
  ARM: dts: sunxi: bananapi-m2-plus: Enable RGMII RX/TX delay on Ethernet PHY
  ARM: dts: sun9i: Enable both RGMII RX/TX delay on Ethernet PHY
  ARM: dts: sun8i: a83t: Enable both RGMII RX/TX delay on Ethernet PHY
  ARM: dts: sun8i: h3: orangepi-plus2e: Enable RGMII RX/TX delay on Ethernet PHY
  ARM: dts: sun7i: bananapi-m1-plus: Enable RGMII RX/TX delay on Ethernet PHY
  ARM: dts: sun7i: cubietruck: Enable RGMII RX/TX delay on Ethernet PHY
  ARM: dts: sun6i: a31-hummingbird: Enable RGMII RX/TX delay on Ethernet PHY
  Revert "arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to active high"
  ARM: dts: sun8i: r40: bananapi-m2-ultra: Fix ethernet node
  arm64: dts: allwinner: h5: OrangePi PC2: Fix ethernet node
  arm64: dts: allwinner: a64: Pine64 Plus: Fix ethernet node
  arm64: dts: allwinner: a64: OrangePi Win: Fix ethernet node
  arm64: dts: allwinner: Pine H64: Enable both RGMII RX/TX delay
  arm64: dts: allwinner: beelink-gs1: Enable both RGMII RX/TX delay
  hwmon: (pwm-fan) Fix RPM calculation
  gfs2: fix possible reference leak in gfs2_check_blk_type
  vfs: remove lockdep bogosity in __sb_start_write
  arm64: smp: Tell RCU about CPUs that fail to come online
  arm64: psci: Avoid printing in cpu_psci_cpu_die()
  arm64: errata: Fix handling of 1418040 with late CPU onlining
  ACPI: button: Add DMI quirk for Medion Akoya E2228T
  selftests: kvm: Fix the segment descriptor layout to match the actual layout
  scsi: ufs: Fix unbalanced scsi_block_reqs_cnt caused by ufshcd_hold()
  pinctrl: rockchip: enable gpio pclk for rockchip_gpio_to_irq
  net: ftgmac100: Fix crash when removing driver
  net/ncsi: Fix netlink registration
  net: usb: qmi_wwan: Set DTR quirk for MR400
  net/mlx5: Disable QoS when min_rates on all VFs are zero
  net/mlx5: Add handling of port type in rule deletion
  tcp: only postpone PROBE_RTT if RTT is < current min_rtt estimate
  sctp: change to hold/put transport for proto_unreach_timer
  qlcnic: fix error return code in qlcnic_83xx_restart_hw()
  qed: fix error return code in qed_iwarp_ll2_start()
  page_frag: Recover from memory pressure
  net: x25: Increase refcnt of "struct x25_neigh" in x25_rx_call_request
  net/tls: fix corrupted data in recvmsg
  net/smc: fix direct access to ib_gid_addr->ndev in smc_ib_determine_gid()
  net: qualcomm: rmnet: Fix incorrect receive packet handling during cleanup
  net/mlx4_core: Fix init_hca fields offset
  net: lantiq: Wait for the GPHY firmware to be ready
  netlabel: fix an uninitialized warning in netlbl_unlabel_staticlist()
  netlabel: fix our progress tracking in netlbl_unlabel_staticlist()
  net: Have netpoll bring-up DSA management interface
  net: ethernet: ti: cpsw: fix error return code in cpsw_probe()
  net: dsa: mv88e6xxx: Avoid VTU corruption on 6097
  net: bridge: add missing counters to ndo_get_stats64 callback
  net: b44: fix error return code in b44_init_one()
  mlxsw: core: Use variable timeout for EMAD retries
  lan743x: prevent entire kernel HANG on open, for some platforms
  lan743x: fix issue causing intermittent kernel log warnings
  ipv6: Fix error path to cancel the meseage
  inet_diag: Fix error path to cancel the meseage in inet_req_diag_fill()
  Exempt multicast addresses from five-second neighbor lifetime
  devlink: Add missing genlmsg_cancel() in devlink_nl_sb_port_pool_fill()
  bnxt_en: read EEPROM A2h address using page 0
  atm: nicstar: Unmap DMA on send error
  ah6: fix error return code in ah6_input()
  Linux 5.4.79
  ACPI: GED: fix -Wformat
  KVM: x86: clflushopt should be treated as a no-op by emulation
  can: proc: can_remove_proc(): silence remove_proc_entry warning
  mac80211: always wind down STA state
  Input: sunkbd - avoid use-after-free in teardown paths
  net: lantiq: Add locking for TX DMA channel
  powerpc/8xx: Always fault when _PAGE_ACCESSED is not set
  net/mlx5: Add retry mechanism to the command entry index allocation
  net/mlx5: Fix a race when moving command interface to events mode
  net/mlx5: poll cmd EQ in case of command timeout
  net/mlx5: Use async EQ setup cleanup helpers for multiple EQs
  MIPS: PCI: Fix MIPS build
  selftests/powerpc: entry flush test
  powerpc: Only include kup-radix.h for 64-bit Book3S
  powerpc/64s: flush L1D after user accesses
  powerpc/64s: flush L1D on kernel entry
  selftests/powerpc: rfi_flush: disable entry flush if present
  Linux 5.4.78
  Convert trailing spaces and periods in path components
  net: sch_generic: fix the missing new qdisc assignment bug
  perf/core: Fix race in the perf_mmap_close() function
  perf scripting python: Avoid declaring function pointers with a visibility attribute
  x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP
  powerpc/603: Always fault when _PAGE_ACCESSED is not set
  drm/i915: Correctly set SFC capability for video engines
  r8169: fix potential skb double free in an error path
  tipc: fix memory leak in tipc_topsrv_start()
  net/x25: Fix null-ptr-deref in x25_connect
  net: Update window_clamp if SOCK_RCVBUF is set
  net: udp: fix UDP header access on Fast/frag0 UDP GRO
  net/af_iucv: fix null pointer dereference on shutdown
  IPv6: Set SIT tunnel hard_header_len to zero
  swiotlb: fix "x86: Don't panic if can not alloc buffer for swiotlb"
  pinctrl: amd: fix incorrect way to disable debounce filter
  pinctrl: amd: use higher precision for 512 RtcClk
  drm/gma500: Fix out-of-bounds access to struct drm_device.vblank[]
  don't dump the threads that had been already exiting when zapped.
  mmc: renesas_sdhi_core: Add missing tmio_mmc_host_free() at remove
  mmc: sdhci-of-esdhc: Handle pulse width detection erratum for more SoCs
  gpio: pcie-idio-24: Enable PEX8311 interrupts
  gpio: pcie-idio-24: Fix IRQ Enable Register value
  gpio: pcie-idio-24: Fix irq mask when masking
  selinux: Fix error return code in sel_ib_pkey_sid_slow()
  btrfs: fix potential overflow in cluster_pages_for_defrag on 32bit arch
  ocfs2: initialize ip_next_orphan
  reboot: fix overflow parsing reboot cpu number
  Revert "kernel/reboot.c: convert simple_strtoul to kstrtoint"
  mm/slub: fix panic in slab_alloc_node()
  jbd2: fix up sparse warnings in checkpoint code
  futex: Don't enable IRQs unconditionally in put_pi_state()
  mei: protect mei_cl_mtu from null dereference
  virtio: virtio_console: fix DMA memory allocation for rproc serial
  xhci: hisilicon: fix refercence leak in xhci_histb_probe
  usb: cdc-acm: Add DISABLE_ECHO for Renesas USB Download mode
  uio: Fix use-after-free in uio_unregister_device()
  thunderbolt: Add the missed ida_simple_remove() in ring_request_msix()
  thunderbolt: Fix memory leak if ida_simple_get() fails in enumerate_services()
  KVM: arm64: Don't hide ID registers from userspace
  btrfs: dev-replace: fail mount if we don't have replace item with target device
  btrfs: fix min reserved size calculation in merge_reloc_root
  btrfs: ref-verify: fix memory leak in btrfs_ref_tree_mod
  ext4: unlock xattr_sem properly in ext4_inline_data_truncate()
  ext4: correctly report "not supported" for {usr,grp}jquota when !CONFIG_QUOTA
  erofs: derive atime instead of leaving it empty
  perf: Fix get_recursion_context()
  vrf: Fix fast path output packet handling with async Netfilter rules
  cosa: Add missing kfree in error path of cosa_write
  of/address: Fix of_node memory leak in of_dma_is_coherent
  xfs: fix a missing unlock on error in xfs_fs_map_blocks
  lan743x: fix "BUG: invalid wait context" when setting rx mode
  xfs: fix brainos in the refcount scrubber's rmap fragment processor
  xfs: fix rmap key and record comparison functions
  xfs: set the unwritten bit in rmap lookup flags in xchk_bmap_get_rmapextents
  xfs: fix flags argument to rmap lookup when converting shared file rmaps
  igc: Fix returning wrong statistics
  nbd: fix a block_device refcount leak in nbd_release
  bpf: Zero-fill re-used per-cpu map element
  SUNRPC: Fix general protection fault in trace_rpc_xdr_overflow()
  net/mlx5: Fix deletion of duplicate rules
  pinctrl: aspeed: Fix GPI only function problem.
  bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE
  ARM: 9019/1: kprobes: Avoid fortify_panic() when copying optprobe template
  pinctrl: intel: Set default bias in case no particular value given
  mfd: sprd: Add wakeup capability for PMIC IRQ
  tick/common: Touch watchdog in tick_unfreeze() on all CPUs
  spi: bcm2835: remove use of uninitialized gpio flags variable
  tpm_tis: Disable interrupts on ThinkPad T490s
  i2c: sh_mobile: implement atomic transfers
  riscv: Set text_offset correctly for M-Mode
  selftests: proc: fix warning: _GNU_SOURCE redefined
  amd/amdgpu: Disable VCN DPG mode for Picasso
  i2c: mediatek: move dma reset before i2c reset
  vfio/pci: Bypass IGD init in case of -ENODEV
  vfio: platform: fix reference leak in vfio_platform_open
  s390/smp: move rcu_cpu_starting() earlier
  iommu/amd: Increase interrupt remapping table limit to 512 entries
  nvme-tcp: avoid repeated request completion
  nvme-rdma: avoid repeated request completion
  nvme-tcp: avoid race between time out and tear down
  nvme-rdma: avoid race between time out and tear down
  nvme: introduce nvme_sync_io_queues
  scsi: mpt3sas: Fix timeouts observed while reenabling IRQ
  scsi: scsi_dh_alua: Avoid crash during alua_bus_detach()
  tracing: Fix the checking of stackidx in __ftrace_trace_stack
  cfg80211: regulatory: Fix inconsistent format argument
  cfg80211: initialize wdev data earlier
  mac80211: fix use of skb payload instead of header
  drm/amd/pm: do not use ixFEATURE_STATUS for checking smc running
  drm/amd/pm: perform SMC reset on suspend/hibernation
  drm/amdgpu: perform srbm soft reset always on SDMA resume
  scsi: hpsa: Fix memory leak in hpsa_init_one()
  gfs2: check for live vs. read-only file system in gfs2_fitrim
  gfs2: Add missing truncate_inode_pages_final for sd_aspace
  gfs2: Free rd_bits later in gfs2_clear_rgrpd to fix use-after-free
  ALSA: hda: Reinstate runtime_allow() for all hda controllers
  ALSA: hda: Separate runtime and system suspend
  selftests: pidfd: fix compilation errors due to wait.h
  selftests/ftrace: check for do_sys_openat2 in user-memory test
  usb: gadget: goku_udc: fix potential crashes in probe
  opp: Reduce the size of critical section in _opp_table_kref_release()
  usb: dwc3: pci: add support for the Intel Alder Lake-S
  ASoC: cs42l51: manage mclk shutdown delay
  ASoC: qcom: sdm845: set driver name correctly
  ath9k_htc: Use appropriate rs_datalen type
  KVM: x86: don't expose MSR_IA32_UMWAIT_CONTROL unconditionally
  KVM: arm64: ARM_SMCCC_ARCH_WORKAROUND_1 doesn't return SMCCC_RET_NOT_REQUIRED
  random32: make prandom_u32() output unpredictable
  tpm: efi: Don't create binary_bios_measurements file for an empty log
  xfs: fix scrub flagging rtinherit even if there is no rt device
  xfs: flush new eof page on truncate to avoid post-eof corruption
  can: flexcan: flexcan_remove(): disable wakeup completely
  can: flexcan: remove FLEXCAN_QUIRK_DISABLE_MECR quirk for LS1021A
  can: peak_canfd: pucan_handle_can_rx(): fix echo management when loopback is on
  can: peak_usb: peak_usb_get_ts_time(): fix timestamp wrapping
  can: peak_usb: add range checking in decode operations
  can: xilinx_can: handle failure cases of pm_runtime_get_sync
  can: ti_hecc: ti_hecc_probe(): add missed clk_disable_unprepare() in error path
  can: j1939: j1939_sk_bind(): return failure if netdev is down
  can: j1939: swap addr and pgn in the send example
  can: can_create_echo_skb(): fix echo skb generation: always use skb_clone()
  can: dev: __can_get_echo_skb(): fix real payload length return value for RTR frames
  can: dev: can_get_echo_skb(): prevent call to kfree_skb() in hard IRQ context
  can: rx-offload: don't call kfree_skb() from IRQ context
  afs: Fix warning due to unadvanced marshalling pointer
  iommu/vt-d: Fix a bug for PDP check in prq_event_thread
  ALSA: hda: prevent undefined shift in snd_hdac_ext_bus_get_link()
  perf tools: Add missing swap for ino_generation
  perf trace: Fix segfault when trying to trace events by cgroup
  powerpc/eeh_cache: Fix a possible debugfs deadlock
  netfilter: ipset: Update byte and packet counters regardless of whether they match
  netfilter: nf_tables: missing validation from the abort path
  netfilter: use actual socket sk rather than skb sk when routing harder
  xfs: set xefi_discard when creating a deferred agfl free log intent item
  ASoC: codecs: wcd9335: Set digital gain range correctly
  net: xfrm: fix a race condition during allocing spi
  hv_balloon: disable warning when floor reached
  genirq: Let GENERIC_IRQ_IPI select IRQ_DOMAIN_HIERARCHY
  ASoC: Intel: kbl_rt5663_max98927: Fix kabylake_ssp_fixup function
  btrfs: reschedule when cloning lots of extents
  btrfs: sysfs: init devices outside of the chunk_mutex
  btrfs: tracepoints: output proper root owner for trace_find_free_extent()
  usb: dwc3: gadget: Reclaim extra TRBs after request completion
  usb: dwc3: gadget: Continue to process pending requests
  PCI: qcom: Make sure PCIe is reset before init for rev 2.1.0
  KVM: arm64: Force PTE mapping on fault resulting in a device mapping
  nbd: don't update block size after device is started
  time: Prevent undefined behaviour in timespec64_to_ns()
  drm/i915/gem: Flush coherency domains on first set-domain-ioctl
  Linux 5.4.77
  powercap: restrict energy meter to root access
  Linux 5.4.76
  arm64: dts: marvell: espressobin: Add ethernet switch aliases
  perf/core: Fix a memory leak in perf_event_parse_addr_filter()
  xfs: flush for older, xfs specific ioctls
  PM: runtime: Resume the device earlier in __device_release_driver()
  PM: runtime: Drop pm_runtime_clean_up_links()
  PM: runtime: Drop runtime PM references to supplier on link removal
  ARC: stack unwinding: avoid indefinite looping
  drm/panfrost: Fix a deadlock between the shrinker and madvise path
  usb: mtu3: fix panic in mtu3_gadget_stop()
  USB: Add NO_LPM quirk for Kingston flash drive
  usb: dwc3: ep0: Fix delay status handling
  tty: serial: fsl_lpuart: LS1021A has a FIFO size of 16 words, like LS1028A
  tty: serial: fsl_lpuart: add LS1028A support
  USB: serial: option: add Telit FN980 composition 0x1055
  USB: serial: option: add LE910Cx compositions 0x1203, 0x1230, 0x1231
  USB: serial: option: add Quectel EC200T module support
  USB: serial: cyberjack: fix write-URB completion race
  serial: txx9: add missing platform_driver_unregister() on error in serial_txx9_init
  serial: 8250_mtk: Fix uart_get_baud_rate warning
  s390/pkey: fix paes selftest failure with paes and pkey static build
  fork: fix copy_process(CLONE_PARENT) race with the exiting ->real_parent
  vt: Disable KD_FONT_OP_COPY
  Revert "coresight: Make sysfs functional on topologies with per core sink"
  arm64/smp: Move rcu_cpu_starting() earlier
  drm/nouveau/gem: fix "refcount_t: underflow; use-after-free"
  drm/nouveau/nouveau: fix the start/end range for migration
  usb: cdns3: gadget: suspicious implicit sign extension
  ACPI: NFIT: Fix comparison to '-ENXIO'
  drm/vc4: drv: Add error handding for bind
  nvmet: fix a NULL pointer dereference when tracing the flush command
  nvme-rdma: handle unexpected nvme completion data length
  vsock: use ns_capable_noaudit() on socket create
  scsi: ibmvscsi: Fix potential race after loss of transport
  drm/amdgpu: add DID for navi10 blockchain SKU
  scsi: core: Don't start concurrent async scan on same host
  blk-cgroup: Pre-allocate tree node on blkg_conf_prep
  blk-cgroup: Fix memleak on error path
  drm/sun4i: frontend: Fix the scaler phase on A33
  drm/sun4i: frontend: Reuse the ch0 phase for RGB formats
  drm/sun4i: frontend: Rework a bit the phase data
  of: Fix reserved-memory overlap detection
  x86/kexec: Use up-to-dated screen_info copy to fill boot params
  arm64: dts: meson: add missing g12 rng clock
  ARM: dts: sun4i-a10: fix cpu_alert temperature
  futex: Handle transient "ownerless" rtmutex state correctly
  tracing: Fix out of bounds write in get_trace_buf
  spi: bcm2835: fix gpio cs level inversion
  regulator: defer probe when trying to get voltage from unresolved supply
  ftrace: Handle tracing when switching between context
  ftrace: Fix recursion check for NMI test
  mtd: spi-nor: Don't copy self-pointing struct around
  ring-buffer: Fix recursion protection transitions between interrupt context
  gfs2: Wake up when sd_glock_disposal becomes zero
  mm: always have io_remap_pfn_range() set pgprot_decrypted()
  kthread_worker: prevent queuing delayed work from timer_fn when it is being canceled
  lib/crc32test: remove extra local_irq_disable/enable
  mm: mempolicy: fix potential pte_unmap_unlock pte error
  ALSA: usb-audio: Add implicit feedback quirk for MODX
  ALSA: usb-audio: Add implicit feedback quirk for Qu-16
  ALSA: usb-audio: add usb vendor id as DSD-capable for Khadas devices
  ALSA: usb-audio: Add implicit feedback quirk for Zoom UAC-2
  ALSA: hda/realtek - Enable headphone for ASUS TM420
  ALSA: hda/realtek - Fixed HP headset Mic can't be detected
  Fonts: Replace discarded const qualifier
  sfp: Fix error handing in sfp_probe()
  sctp: Fix COMM_LOST/CANT_STR_ASSOC err reporting on big-endian platforms
  powerpc/vnic: Extend "failover pending" window
  net: usb: qmi_wwan: add Telit LE910Cx 0x1230 composition
  ip_tunnel: fix over-mtu packet send fail without TUNNEL_DONT_FRAGMENT flags
  ionic: check port ptr before use
  gianfar: Account for Tx PTP timestamp in the skb headroom
  gianfar: Replace skb_realloc_headroom with skb_cow_head for PTP
  chelsio/chtls: fix always leaking ctrl_skb
  chelsio/chtls: fix memory leaks caused by a race
  cadence: force nonlinear buffers to be cloned
  ptrace: fix task_join_group_stop() for the case when current is traced
  tipc: fix use-after-free in tipc_bcast_get_mode
  arm64: Change .weak to SYM_FUNC_START_WEAK_PI for arch/arm64/lib/mem*.S
  arm64: lib: Use modern annotations for assembly functions
  arm64: asm: Add new-style position independent function annotations
  linkage: Introduce new macros for assembler symbols
  ASoC: Intel: Skylake: Add alternative topology binary name
  drm/i915: Drop runtime-pm assert from vgpu io accessors
  drm/i915/gt: Delay execlist processing for tgl
  drm/i915: Break up error capture compression loops with cond_resched()
  Linux 5.4.75
  staging: octeon: Drop on uncorrectable alignment or FCS error
  staging: octeon: repair "fixed-link" support
  staging: comedi: cb_pcidas: Allow 2-channel commands for AO subdevice
  staging: fieldbus: anybuss: jump to correct label in an error path
  KVM: arm64: Fix AArch32 handling of DBGD{CCINT,SCRext} and DBGVCR
  device property: Don't clear secondary pointer for shared primary firmware node
  device property: Keep secondary firmware node secondary by type
  ARM: s3c24xx: fix missing system reset
  ARM: samsung: fix PM debug build with DEBUG_LL but !MMU
  arm: dts: mt7623: add missing pause for switchport
  hil/parisc: Disable HIL driver when it gets stuck
  cachefiles: Handle readpage error correctly
  arm64: berlin: Select DW_APB_TIMER_OF
  tty: make FONTX ioctl use the tty pointer they were actually passed
  drm/amd/pm: increase mclk switch threshold to 200 us
  mmc: sdhci: Use Auto CMD Auto Select only when v4_mode is true
  mmc: sdhci-of-esdhc: set timeout to max before tuning
  drm/ttm: fix eviction valuable range check.
  ext4: fix invalid inode checksum
  ext4: fix error handling code in add_new_gdb
  ext4: fix leaking sysfs kobject after failed mount
  vringh: fix __vringh_iov() when riov and wiov are different
  ring-buffer: Return 0 on success from ring_buffer_resize()
  9P: Cast to loff_t before multiplying
  libceph: clear con->out_msg on Policy::stateful_server faults
  ceph: promote to unsigned long long before shifting
  drm/amd/display: Fix kernel panic by dal_gpio_open() error
  drm/amd/display: Don't invoke kgdb_breakpoint() unconditionally
  drm/amdgpu: increase the reserved VM size to 2MB
  drm/amd/display: Avoid MST manager resource leak.
  drm/amdkfd: Use same SQ prefetch setting as amdgpu
  drm/amdgpu: correct the gpu reset handling for job != NULL case
  drm/amd/display: Increase timeout for DP Disable
  drm/amdgpu: don't map BO in reserved region
  i2c: imx: Fix external abort on interrupt in exit paths
  rtc: rx8010: don't modify the global rtc ops
  ia64: fix build error with !COREDUMP
  ubi: check kthread_should_stop() after the setting of task state
  ARC: perf: redo the pct irq missing in device-tree handling
  perf python scripting: Fix printable strings in python3 scripts
  ubifs: mount_ubifs: Release authentication resource in error handling path
  ubifs: Don't parse authentication mount options in remount process
  ubifs: Fix a memleak after dumping authentication mount options
  ubifs: journal: Make sure to not dirty twice for auth nodes
  ubifs: xattr: Fix some potential memory leaks while iterating entries
  ubifs: dent: Fix some potential memory leaks while iterating entries
  NFSD: Add missing NFSv2 .pc_func methods
  NFSv4.2: support EXCHGID4_FLAG_SUPP_FENCE_OPS 4.2 EXCHANGE_ID flag
  NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE
  powerpc: Fix undetected data corruption with P9N DD2.1 VSX CI load emulation
  powerpc/powermac: Fix low_sleep_handler with KUAP and KUEP
  powerpc/powernv/elog: Fix race while processing OPAL error log event.
  powerpc/memhotplug: Make lmb size 64bit
  powerpc: Warn about use of smt_snooze_delay
  powerpc/rtas: Restrict RTAS requests from userspace
  s390/stp: add locking to sysfs functions
  MIPS: DEC: Restore bootmem reservation for firmware working memory area
  powerpc/drmem: Make lmb_size 64 bit
  iio:gyro:itg3200: Fix timestamp alignment and prevent data leak.
  iio:adc:ti-adc12138 Fix alignment issue with timestamp
  iio:adc:ti-adc0832 Fix alignment issue with timestamp
  iio: adc: gyroadc: fix leak of device node iterator
  iio:light:si1145: Fix timestamp alignment and prevent data leak.
  dmaengine: dma-jz4780: Fix race in jz4780_dma_tx_status
  udf: Fix memory leak when mounting
  HID: wacom: Avoid entering wacom_wac_pen_report for pad / battery
  vt: keyboard, extend func_buf_lock to readers
  vt: keyboard, simplify vt_kdgkbsent
  drm/i915: Force VT'd workarounds when running as a guest OS
  usb: host: fsl-mph-dr-of: check return of dma_set_mask()
  usb: typec: tcpm: reset hard_reset_count for any disconnect
  usb: cdc-acm: fix cooldown mechanism
  usb: dwc3: gadget: END_TRANSFER before CLEAR_STALL command
  usb: dwc3: gadget: Resume pending requests after CLEAR_STALL
  usb: dwc3: core: don't trigger runtime pm when remove driver
  usb: dwc3: core: add phy cleanup for probe error handling
  usb: dwc3: gadget: Check MPS of the request length
  usb: dwc3: ep0: Fix ZLP for OUT ep0 requests
  usb: dwc3: pci: Allow Elkhart Lake to utilize DSM method for PM functionality
  usb: xhci: Workaround for S3 issue on AMD SNPS 3.0 xHC
  btrfs: fix readahead hang and use-after-free after removing a device
  btrfs: fix use-after-free on readahead extent after failure to create it
  btrfs: tree-checker: validate number of chunk stripes and parity
  btrfs: cleanup cow block on error
  btrfs: tree-checker: fix false alert caused by legacy btrfs root item
  btrfs: use kvzalloc() to allocate clone_roots in btrfs_ioctl_send()
  btrfs: send, recompute reference path after orphanization of a directory
  btrfs: send, orphanize first all conflicting inodes when processing references
  btrfs: reschedule if necessary when logging directory items
  btrfs: improve device scanning messages
  btrfs: qgroup: fix wrong qgroup metadata reserve for delayed inode
  PM: runtime: Remove link state checks in rpm_get/put_supplier()
  scsi: qla2xxx: Fix crash on session cleanup with unload
  scsi: mptfusion: Fix null pointer dereferences in mptscsih_remove()
  w1: mxc_w1: Fix timeout resolution problem leading to bus error
  acpi-cpufreq: Honor _PSD table setting on new AMD CPUs
  ACPI: EC: PM: Drop ec_no_wakeup check from acpi_ec_dispatch_gpe()
  ACPI: EC: PM: Flush EC work unconditionally after wakeup
  PCI/ACPI: Whitelist hotplug ports for D3 if power managed by ACPI
  ACPI: debug: don't allow debugging when ACPI is disabled
  ACPI: video: use ACPI backlight for HP 635 Notebook
  ACPI / extlog: Check for RDMSR failure
  ACPI: button: fix handling lid state changes when input device closed
  NFS: fix nfs_path in case of a rename retry
  fs: Don't invalidate page buffers in block_write_full_page()
  media: uvcvideo: Fix uvc_ctrl_fixup_xu_info() not having any effect
  leds: bcm6328, bcm6358: use devres LED registering function
  extcon: ptn5150: Fix usage of atomic GPIO with sleeping GPIO chips
  spi: sprd: Release DMA channel also on probe deferral
  perf/x86/amd/ibs: Fix raw sample data accumulation
  perf/x86/amd/ibs: Don't include randomized bits in get_ibs_op_count()
  perf/x86/intel: Fix Ice Lake event constraint table
  selftests/x86/fsgsbase: Test PTRACE_PEEKUSER for GSBASE with invalid LDT GS
  seccomp: Make duplicate listener detection non-racy
  mmc: sdhci-acpi: AMDI0040: Set SDHCI_QUIRK2_PRESET_VALUE_BROKEN
  mmc: sdhci: Add LTR support for some Intel BYT based controllers
  md/raid5: fix oops during stripe resizing
  nvme-rdma: fix crash when connect rejected
  sgl_alloc_order: fix memory leak
  nbd: make the config put is called before the notifying the waiter
  ARM: dts: s5pv210: remove dedicated 'audio-subsystem' node
  ARM: dts: s5pv210: move PMU node out of clock controller
  ARM: dts: s5pv210: move fixed clocks under root node
  ARM: dts: s5pv210: remove DMA controller bus node name to fix dtschema warnings
  memory: emif: Remove bogus debugfs error handling
  ARM: dts: omap4: Fix sgx clock rate for 4430
  arm64: dts: renesas: ulcb: add full-pwr-cycle-in-suspend into eMMC nodes
  cifs: handle -EINTR in cifs_setattr
  gfs2: add validation checks for size of superblock
  gfs2: use-after-free in sysfs deregistration
  KVM: PPC: Book3S HV: Do not allocate HPT for a nested guest
  ext4: Detect already used quota file early
  drivers: watchdog: rdc321x_wdt: Fix race condition bugs
  net: 9p: initialize sun_server.sun_path to have addr's value only when addr is valid
  clk: ti: clockdomain: fix static checker warning
  rpmsg: glink: Use complete_all for open states
  bnxt_en: Log unknown link speed appropriately.
  md/bitmap: md_bitmap_get_counter returns wrong blocks
  btrfs: fix replace of seed device
  ARC: [dts] fix the errors detected by dtbs_check
  drm/amd/display: HDMI remote sink need mode validation for Linux
  power: supply: test_power: add missing newlines when printing parameters by sysfs
  ACPI: HMAT: Fix handling of changes from ACPI 6.2 to ACPI 6.3
  bus/fsl_mc: Do not rely on caller to provide non NULL mc_io
  drivers/net/wan/hdlc_fr: Correctly handle special skb->protocol values
  brcmfmac: Fix warning message after dongle setup failed
  ACPI: Add out of bounds and numa_off protections to pxm_to_node()
  xfs: don't free rt blocks when we're doing a REMAP bunmapi call
  can: flexcan: disable clocks during stop mode
  arm64/mm: return cpu_all_mask when node is NUMA_NO_NODE
  SUNRPC: Mitigate cond_resched() in xprt_transmit()
  usb: xhci: omit duplicate actions when suspending a runtime suspended host.
  coresight: Make sysfs functional on topologies with per core sink
  uio: free uio id after uio file node is freed
  USB: adutux: fix debugging
  cpufreq: sti-cpufreq: add stih418 support
  riscv: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO
  samples/bpf: Fix possible deadlock in xdpsock
  selftests/bpf: Define string const as global for test_sysctl_prog.c
  media: uvcvideo: Fix dereference of out-of-bound list iterator
  bpf: Permit map_ptr arithmetic with opcode add and offset 0
  kgdb: Make "kgdbcon" work properly with "kgdb_earlycon"
  ia64: kprobes: Use generic kretprobe trampoline handler
  printk: reduce LOG_BUF_SHIFT range for H8300
  arm64: topology: Stop using MPIDR for topology information
  drm/bridge/synopsys: dsi: add support for non-continuous HS clock
  mmc: via-sdmmc: Fix data race bug
  media: imx274: fix frame interval handling
  media: tw5864: check status of tw5864_frameinterval_get
  usb: typec: tcpm: During PR_SWAP, source caps should be sent only after tSwapSourceStart
  media: platform: Improve queue set up flow for bug fixing
  media: videodev2.h: RGB BT2020 and HSV are always full range
  selftests/x86/fsgsbase: Reap a forgotten child
  drm/brige/megachips: Add checking if ge_b850v3_lvds_init() is working correctly
  ath10k: fix VHT NSS calculation when STBC is enabled
  ath10k: start recovery process when payload length exceeds max htc length for sdio
  video: fbdev: pvr2fb: initialize variables
  xfs: fix realtime bitmap/summary file truncation when growing rt volume
  power: supply: bq27xxx: report "not charging" on all types
  NFS4: Fix oops when copy_file_range is attempted with NFS4.0 source
  ARM: 8997/2: hw_breakpoint: Handle inexact watchpoint addresses
  f2fs: handle errors of f2fs_get_meta_page_nofail
  um: change sigio_spinlock to a mutex
  s390/startup: avoid save_area_sync overflow
  f2fs: fix to check segment boundary during SIT page readahead
  f2fs: fix uninit-value in f2fs_lookup
  f2fs: add trace exit in exception path
  sparc64: remove mm_cpumask clearing to fix kthread_use_mm race
  powerpc: select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
  mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race
  powerpc/powernv/smp: Fix spurious DBG() warning
  futex: Fix incorrect should_fail_futex() handling
  ata: sata_nv: Fix retrieving of active qcs
  RDMA/qedr: Fix memory leak in iWARP CM
  mlxsw: core: Fix use-after-free in mlxsw_emad_trans_finish()
  x86/unwind/orc: Fix inactive tasks with stack pointer in %sp on GCC 10 compiled kernels
  firmware: arm_scmi: Add missing Rx size re-initialisation
  firmware: arm_scmi: Fix ARCH_COLD_RESET
  xen/events: block rogue events for some time
  xen/events: defer eoi in case of excessive number of events
  xen/events: use a common cpu hotplug hook for event channels
  xen/events: switch user event channels to lateeoi model
  xen/pciback: use lateeoi irq binding
  xen/pvcallsback: use lateeoi irq binding
  xen/scsiback: use lateeoi irq binding
  xen/netback: use lateeoi irq binding
  xen/blkback: use lateeoi irq binding
  xen/events: add a new "late EOI" evtchn framework
  xen/events: fix race in evtchn_fifo_unmask()
  xen/events: add a proper barrier to 2-level uevent unmasking
  xen/events: avoid removing an event channel while handling it
  ANDROID: GKI: fix up include/linux/pm.h to handle some abi changes
  Linux 5.4.74
  phy: marvell: comphy: Convert internal SMCC firmware return codes to errno
  misc: rtsx: do not setting OC_POWER_DOWN reg in rtsx_pci_init_ocp()
  openrisc: Fix issue with get_user for 64-bit values
  crypto: x86/crc32c - fix building with clang ias
  xen/gntdev.c: Mark pages as dirty
  ata: sata_rcar: Fix DMA boundary mask
  PM: runtime: Fix timer_expires data type on 32-bit arches
  serial: pl011: Fix lockdep splat when handling magic-sysrq interrupt
  serial: qcom_geni_serial: To correct QUP Version detection logic
  mtd: lpddr: Fix bad logic in print_drs_error
  RDMA/addr: Fix race with netevent_callback()/rdma_addr_cancel()
  cxl: Rework error message for incompatible slots
  p54: avoid accessing the data mapped to streaming DMA
  evm: Check size of security.evm before using it
  bpf: Fix comment for helper bpf_current_task_under_cgroup()
  fuse: fix page dereference after free
  ata: ahci: mvebu: Make SATA PHY optional for Armada 3720
  x86/xen: disable Firmware First mode for correctable memory errors
  arch/x86/amd/ibs: Fix re-arming IBS Fetch
  erofs: avoid duplicated permission check for "trusted." xattrs
  bnxt_en: Invoke cancel_delayed_work_sync() for PFs also.
  bnxt_en: Fix regression in workqueue cleanup logic in bnxt_remove_one().
  bnxt_en: Re-write PCI BARs after PCI fatal error.
  net: hns3: Clear the CMDQ registers before unmapping BAR region
  tipc: fix memory leak caused by tipc_buf_append()
  tcp: Prevent low rmem stalls with SO_RCVLOWAT.
  ravb: Fix bit fields checking in ravb_hwtstamp_get()
  r8169: fix issue with forced threading in combination with shared interrupts
  net/sched: act_mpls: Add softdep on mpls_gso.ko
  netem: fix zero division in tabledist
  mlxsw: core: Fix memory leak on module removal
  ibmvnic: fix ibmvnic_set_mac
  gtp: fix an use-before-init in gtp_newlink()
  cxgb4: set up filter action after rewrites
  chelsio/chtls: fix tls record info to user
  chelsio/chtls: fix memory leaks in CPL handlers
  chelsio/chtls: fix deadlock issue
  bnxt_en: Send HWRM_FUNC_RESET fw command unconditionally.
  bnxt_en: Check abort error state in bnxt_open_nic().
  efivarfs: Replace invalid slashes with exclamation marks in dentries.
  x86/PCI: Fix intel_mid_pci.c build error when ACPI is not enabled
  arm64: link with -z norelro regardless of CONFIG_RELOCATABLE
  arm64: Run ARCH_WORKAROUND_2 enabling code on all CPUs
  arm64: Run ARCH_WORKAROUND_1 enabling code on all CPUs
  scripts/setlocalversion: make git describe output more reliable
  objtool: Support Clang non-section symbols in ORC generation
  socket: don't clear SOCK_TSTAMP_NEW when SO_TIMESTAMPNS is disabled
  netfilter: nftables_offload: KASAN slab-out-of-bounds Read in nft_flow_rule_create
  Revert "PCI/IOV: Mark VFs as not implementing PCI_COMMAND_MEMORY"
  Revert "vfio/pci: Decouple PCI_COMMAND_MEMORY bit checks from is_virtfn"
  Linux 5.4.73
  usb: gadget: f_ncm: allow using NCM in SuperSpeed Plus gadgets.
  eeprom: at25: set minimum read/write access stride to 1
  usb: cdns3: gadget: free interrupt after gadget has deleted
  USB: cdc-wdm: Make wdm_flush() interruptible and add wdm_fsync().
  usb: cdc-acm: add quirk to blacklist ETAS ES58X devices
  tty: serial: fsl_lpuart: fix lpuart32_poll_get_char
  tty: serial: lpuart: fix lpuart32_write usage
  s390/qeth: don't let HW override the configured port role
  net: korina: cast KSEG0 address to pointer in kfree
  ath10k: check idx validity in __ath10k_htt_rx_ring_fill_n()
  dmaengine: dw: Activate FIFO-mode for memory peripherals only
  dmaengine: dw: Add DMA-channels mask cell support
  scsi: ufs: ufs-qcom: Fix race conditions caused by ufs_qcom_testbus_config()
  usb: core: Solve race condition in anchor cleanup functions
  brcm80211: fix possible memleak in brcmf_proto_msgbuf_attach
  scsi: smartpqi: Avoid crashing kernel for controller issues
  ALSA: hda/ca0132 - Add new quirk ID for SoundBlaster AE-7.
  ALSA: hda/ca0132 - Add AE-7 microphone selection commands.
  mwifiex: don't call del_timer_sync() on uninitialized timer
  reiserfs: Fix memory leak in reiserfs_parse_options()
  ipvs: Fix uninit-value in do_ip_vs_set_ctl()
  Bluetooth: btusb: Fix memleak in btusb_mtk_submit_wmt_recv_urb
  tty: ipwireless: fix error handling
  fbmem: add margin check to fb_check_caps()
  scsi: qedi: Fix list_del corruption while removing active I/O
  scsi: qedi: Protect active command list to avoid list corruption
  scsi: qedf: Return SUCCESS if stale rport is encountered
  HID: ite: Add USB id match for Acer One S1003 keyboard dock
  Fix use after free in get_capset_info callback.
  rtl8xxxu: prevent potential memory leak
  brcmsmac: fix memory leak in wlc_phy_attach_lcnphy
  selftests/bpf: Fix test_sysctl_loop{1, 2} failure due to clang change
  scsi: qla2xxx: Warn if done() or free() are called on an already freed srb
  scsi: ibmvfc: Fix error return in ibmvfc_probe()
  iomap: fix WARN_ON_ONCE() from unprivileged users
  drm/msm/a6xx: fix a potential overflow issue
  Bluetooth: Only mark socket zapped after unlocking
  usb: ohci: Default to per-port over-current protection
  xfs: make sure the rt allocator doesn't run off the end
  opp: Prevent memory leak in dev_pm_opp_attach_genpd()
  reiserfs: only call unlock_new_inode() if I_NEW
  misc: rtsx: Fix memory leak in rtsx_pci_probe
  bpf: Limit caller's stack depth 256 for subprogs with tailcalls
  drm/panfrost: add amlogic reset quirk callback
  ath9k: hif_usb: fix race condition between usb_get_urb() and usb_kill_anchored_urbs()
  can: flexcan: flexcan_chip_stop(): add error handling and propagate error value
  usb: dwc3: simple: add support for Hikey 970
  USB: cdc-acm: handle broken union descriptors
  rtw88: increse the size of rx buffer size
  udf: Avoid accessing uninitialized data on failed inode read
  udf: Limit sparing table size
  usb: gadget: function: printer: fix use-after-free in __lock_acquire
  usb: dwc3: Add splitdisable quirk for Hisilicon Kirin Soc
  misc: vop: add round_up(x,4) for vring_size to avoid kernel panic
  mic: vop: copy data to kernel space then write to io memory
  scsi: target: core: Add CONTROL field for trace events
  scsi: mvumi: Fix error return in mvumi_io_attach()
  PM: hibernate: remove the bogus call to get_gendisk() in software_resume()
  mac80211: handle lack of sband->bitrates in rates
  ip_gre: set dev->hard_header_len and dev->needed_headroom properly
  ntfs: add check for mft record size in superblock
  media: venus: core: Fix runtime PM imbalance in venus_probe
  fs: dlm: fix configfs memory leak
  media: venus: fixes for list corruption
  media: saa7134: avoid a shift overflow
  mmc: sdio: Check for CISTPL_VERS_1 buffer size
  media: uvcvideo: Ensure all probed info is returned to v4l2
  x86/mce: Make mce_rdmsrl() panic on an inaccessible MSR
  media: media/pci: prevent memory leak in bttv_probe
  media: bdisp: Fix runtime PM imbalance on error
  media: platform: sti: hva: Fix runtime PM imbalance on error
  media: platform: s3c-camif: Fix runtime PM imbalance on error
  media: vsp1: Fix runtime PM imbalance on error
  media: exynos4-is: Fix a reference count leak
  media: exynos4-is: Fix a reference count leak due to pm_runtime_get_sync
  media: exynos4-is: Fix several reference count leaks due to pm_runtime_get_sync
  media: sti: Fix reference count leaks
  media: st-delta: Fix reference count leak in delta_run_work
  media: ati_remote: sanity check for both endpoints
  media: firewire: fix memory leak
  x86/mce: Add Skylake quirk for patrol scrub reported errors
  x86/asm: Replace __force_order with a memory clobber
  crypto: ccp - fix error handling
  block: ratelimit handle_bad_sector() message
  md/bitmap: fix memory leak of temporary bitmap
  i2c: core: Restore acpi_walk_dep_device_list() getting called after registering the ACPI i2c devs
  perf: correct SNOOPX field offset
  sched/features: Fix !CONFIG_JUMP_LABEL case
  NTB: hw: amd: fix an issue about leak system resources
  nvmet: fix uninitialized work for zero kato
  powerpc/pseries: Avoid using addr_to_pfn in real mode
  powerpc/powernv/dump: Fix race while processing OPAL dump
  lightnvm: fix out-of-bounds write to array devices->info[]
  ARM: dts: meson8: remove two invalid interrupt lines from the GPU node
  arm64: dts: zynqmp: Remove additional compatible string for i2c IPs
  ARM: OMAP2+: Restore MPU power domain if cpu_cluster_pm_enter() fails
  soc: fsl: qbman: Fix return value on success
  ARM: dts: owl-s500: Fix incorrect PPI interrupt specifiers
  arm64: dts: actions: limit address range for pinctrl node
  arm64: dts: renesas: r8a774c0: Fix MSIOF1 DMA channels
  arm64: dts: renesas: r8a77990: Fix MSIOF1 DMA channels
  arm64: dts: qcom: msm8916: Fix MDP/DSI interrupts
  arm64: dts: qcom: pm8916: Remove invalid reg size from wcd_codec
  arm64: dts: qcom: msm8916: Remove one more thermal trip point unit name
  arm64: dts: imx8mq: Add missing interrupts to GPC
  memory: fsl-corenet-cf: Fix handling of platform_get_irq() error
  memory: omap-gpmc: Fix build error without CONFIG_OF
  memory: omap-gpmc: Fix a couple off by ones
  arm64: dts: allwinner: h5: remove Mali GPU PMU module
  ARM: dts: sun8i: r40: bananapi-m2-ultra: Fix dcdc1 regulator
  ARM: s3c24xx: fix mmc gpio lookup tables
  ARM: at91: pm: of_node_put() after its usage
  ARM: dts: imx6sl: fix rng node
  arm64: dts: meson: vim3: correct led polarity
  netfilter: nf_fwd_netdev: clear timestamp in forwarding path
  netfilter: ebtables: Fixes dropping of small packets in bridge nat
  netfilter: conntrack: connection timeout after re-register
  scsi: bfa: Fix error return in bfad_pci_init()
  KVM: x86: emulating RDPID failure shall return #UD rather than #GP
  Input: sun4i-ps2 - fix handling of platform_get_irq() error
  Input: twl4030_keypad - fix handling of platform_get_irq() error
  Input: omap4-keypad - fix handling of platform_get_irq() error
  Input: ep93xx_keypad - fix handling of platform_get_irq() error
  Input: stmfts - fix a & vs && typo
  Input: imx6ul_tsc - clean up some errors in imx6ul_tsc_resume()
  SUNRPC: fix copying of multiple pages in gss_read_proxy_verf()
  clk: imx8mq: Fix usdhc parents order
  vfio iommu type1: Fix memory leak in vfio_iommu_type1_pin_pages
  vfio/pci: Clear token on bypass registration failure
  ext4: limit entries returned when counting fsmap records
  svcrdma: fix bounce buffers for unaligned offsets and multiple pages
  watchdog: sp5100: Fix definition of EFCH_PM_DECODEEN3
  watchdog: Use put_device on error
  watchdog: Fix memleak in watchdog_cdev_register
  clk: bcm2835: add missing release if devm_clk_hw_register fails
  clk: at91: clk-main: update key before writing AT91_CKGR_MOR
  module: statically initialize init section freeing data
  clk: mediatek: add UART0 clock support
  clk: rockchip: Initialize hw to error to avoid undefined behavior
  pwm: img: Fix null pointer access in probe
  clk: keystone: sci-clk: fix parsing assigned-clock data during probe
  clk: qcom: gcc-sdm660: Fix wrong parent_map
  vfio/pci: Decouple PCI_COMMAND_MEMORY bit checks from is_virtfn
  PCI/IOV: Mark VFs as not implementing PCI_COMMAND_MEMORY
  rpmsg: smd: Fix a kobj leak in in qcom_smd_parse_edge()
  PCI: iproc: Set affinity mask on MSI interrupts
  PCI: aardvark: Check for errors from pci_bridge_emul_init() call
  clk: meson: g12a: mark fclk_div2 as critical
  i2c: rcar: Auto select RESET_CONTROLLER
  mailbox: avoid timer start from callback
  rapidio: fix the missed put_device() for rio_mport_add_riodev
  rapidio: fix error handling path
  ramfs: fix nommu mmap with gaps in the page cache
  lib/crc32.c: fix trivial typo in preprocessor condition
  mm/page_owner: change split_page_owner to take a count
  RDMA/rxe: Handle skb_clone() failure in rxe_recv.c
  f2fs: wait for sysfs kobject removal before freeing f2fs_sb_info
  selftests/powerpc: Fix eeh-basic.sh exit codes
  maiblox: mediatek: Fix handling of platform_get_irq() error
  RDMA/rxe: Fix skb lifetime in rxe_rcv_mcast_pkt()
  IB/rdmavt: Fix sizeof mismatch
  cpufreq: powernv: Fix frame-size-overflow in powernv_cpufreq_reboot_notifier
  i3c: master: Fix error return in cdns_i3c_master_probe()
  powerpc/perf/hv-gpci: Fix starting index value
  powerpc/perf: Exclude pmc5/6 from the irrelevant PMU group constraints
  RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces
  overflow: Include header file with SIZE_MAX declaration
  kdb: Fix pager search for multi-line strings
  mtd: spinand: gigadevice: Add QE Bit
  mtd: spinand: gigadevice: Only one dummy byte in QUADIO
  mtd: rawnand: vf610: disable clk on error handling path in probe
  RDMA/hns: Fix missing sq_sig_type when querying QP
  RDMA/hns: Fix the wrong value of rnr_retry when querying qp
  perf stat: Skip duration_time in setup_system_wide
  i40iw: Add support to make destroy QP synchronous
  RDMA/mlx5: Disable IB_DEVICE_MEM_MGT_EXTENSIONS if IB_WR_REG_MR can't work
  RDMA/hns: Set the unsupported wr opcode
  perf intel-pt: Fix "context_switch event has no tid" error
  RDMA/cma: Consolidate the destruction of a cma_multicast in one place
  RDMA/cma: Remove dead code for kernel rdmacm multicast
  powerpc/64s/radix: Fix mm_cpumask trimming race vs kthread_use_mm
  powerpc/tau: Disable TAU between measurements
  powerpc/tau: Check processor type before enabling TAU interrupt
  powerpc/tau: Remove duplicated set_thresholds() call
  powerpc/tau: Convert from timer to workqueue
  powerpc/tau: Use appropriate temperature sample interval
  powerpc/book3s64/hash/4k: Support large linear mapping range with 4K
  RDMA/qedr: Fix inline size returned for iWARP
  RDMA/qedr: Fix return code if accept is called on a destroyed qp
  RDMA/qedr: Fix use of uninitialized field
  RDMA/qedr: Fix qp structure memory leak
  RDMA/umem: Prevent small pages from being returned by ib_umem_find_best_pgsz()
  RDMA/umem: Fix ib_umem_find_best_pgsz() for mappings that cross a page boundary
  xfs: fix high key handling in the rt allocator's query_range function
  xfs: fix deadlock and streamline xfs_getfsmap performance
  xfs: limit entries returned when counting fsmap records
  ida: Free allocated bitmap in error path
  arc: plat-hsdk: fix kconfig dependency warning when !RESET_CONTROLLER
  ARM: 9007/1: l2c: fix prefetch bits init in L2X0_AUX_CTRL using DT values
  mtd: mtdoops: Don't write panic data twice
  RDMA/mlx5: Fix potential race between destroy and CQE poll
  pseries/drmem: don't cache node id in drmem_lmb struct
  powerpc/pseries: explicitly reschedule during drmem_lmb list traversal
  RDMA/umem: Fix signature of stub ib_umem_find_best_pgsz()
  RDMA/hns: Add a check for current state before modifying QP
  mtd: lpddr: fix excessive stack usage with clang
  RDMA/ucma: Add missing locking around rdma_leave_multicast()
  RDMA/ucma: Fix locking for ctx->events_reported
  powerpc/icp-hv: Fix missing of_node_put() in success path
  powerpc/pseries: Fix missing of_node_put() in rng_init()
  IB/mlx4: Adjust delayed work when a dup is observed
  IB/mlx4: Fix starvation in paravirt mux/demux
  i3c: master add i3c_master_attach_boardinfo to preserve boardinfo
  selftests/ftrace: Change synthetic event name for inter-event-combined test
  fs: fix NULL dereference due to data race in prepend_path()
  mm, oom_adj: don't loop through tasks in __set_oom_adj when not necessary
  mm/memcg: fix device private memcg accounting
  mm/swapfile.c: fix potential memory leak in sys_swapon
  netfilter: nf_log: missing vlan offload tag and proto
  net: korina: fix kfree of rx/tx descriptor array
  ipvs: clear skb->tstamp in forwarding path
  mwifiex: fix double free
  platform/x86: mlx-platform: Remove PSU EEPROM configuration
  ipmi_si: Fix wrong return value in try_smi_init()
  scsi: be2iscsi: Fix a theoretical leak in beiscsi_create_eqs()
  scsi: target: tcmu: Fix warning: 'page' may be used uninitialized
  usb: dwc2: Fix INTR OUT transfers in DDMA mode.
  nl80211: fix non-split wiphy information
  usb: gadget: u_ether: enable qmult on SuperSpeed Plus as well
  usb: gadget: f_ncm: fix ncm_bitrate for SuperSpeed and above.
  iwlwifi: mvm: split a print to avoid a WARNING in ROC
  mfd: sm501: Fix leaks in probe()
  net: enic: Cure the enic api locking trainwreck
  iio: adc: stm32-adc: fix runtime autosuspend delay when slow polling
  qtnfmac: fix resource leaks on unsupported iftype error return path
  ibmvnic: set up 200GBPS speed
  coresight: etm: perf: Fix warning caused by etm_setup_aux failure
  nl80211: fix OBSS PD min and max offset validation
  nvmem: core: fix possibly memleak when use nvmem_cell_info_to_nvmem_cell()
  HID: hid-input: fix stylus battery reporting
  ASoC: fsl_sai: Instantiate snd_soc_dai_driver
  slimbus: qcom-ngd-ctrl: disable ngd in qmi server down callback
  slimbus: core: do not enter to clock pause mode in core
  slimbus: core: check get_addr before removing laddr ida
  quota: clear padding in v2r1_mem2diskdqb()
  usb: dwc2: Fix parameter type in function pointer prototype
  ALSA: seq: oss: Avoid mutex lock for a long-time ioctl
  misc: mic: scif: Fix error handling path
  dmaengine: dmatest: Check list for emptiness before access its last entry
  ath6kl: wmi: prevent a shift wrapping bug in ath6kl_wmi_delete_pstream_cmd()
  spi: omap2-mcspi: Improve performance waiting for CHSTAT
  net: dsa: rtl8366rb: Support all 4096 VLANs
  ASoC: tlv320aic32x4: Fix bdiv clock rate derivation
  net: wilc1000: clean up resource in error path of init mon interface
  net: dsa: rtl8366: Skip PVID setting if not requested
  net: dsa: rtl8366: Refactor VLAN/PVID init
  net: dsa: rtl8366: Check validity of passed VLANs
  xhci: don't create endpoint debugfs entry before ring buffer is set.
  coresight: etm4x: Handle unreachable sink in perf mode
  drm: mxsfb: check framebuffer pitch
  cpufreq: armada-37xx: Add missing MODULE_DEVICE_TABLE
  net: stmmac: use netif_tx_start|stop_all_queues() function
  scsi: mpt3sas: Fix sync irqs
  net/mlx5: Don't call timecounter cyc2time directly from 1PPS flow
  pinctrl: mcp23s08: Fix mcp23x17 precious range
  pinctrl: mcp23s08: Fix mcp23x17_regmap initialiser
  iomap: Clear page error before beginning a write
  drm/panfrost: Ensure GPU quirks are always initialised
  drm/msm: Avoid div-by-zero in dpu_crtc_atomic_check()
  HID: roccat: add bounds checking in kone_sysfs_write_settings()
  ASoC: fsl: imx-es8328: add missing put_device() call in imx_es8328_probe()
  video: fbdev: radeon: Fix memleak in radeonfb_pci_register
  video: fbdev: sis: fix null ptr dereference
  video: fbdev: vga16fb: fix setting of pixclock because a pass-by-value error
  drivers/virt/fsl_hypervisor: Fix error handling path
  pwm: lpss: Add range limit check for the base_unit register value
  pwm: lpss: Fix off by one error in base_unit math in pwm_lpss_prepare()
  pty: do tty_flip_buffer_push without port->lock in pty_write
  tty: hvcs: Don't NULL tty->driver_data until hvcs_cleanup()
  tty: serial: earlycon dependency
  binder: Remove bogus warning on failed same-process transaction
  drm/crc-debugfs: Fix memleak in crc_control_write
  drm: panel: Fix bpc for OrtusTech COM43H4M85ULC panel
  mm/error_inject: Fix allow_error_inject function signatures.
  VMCI: check return value of get_user_pages_fast() for errors
  staging: emxx_udc: Fix passing of NULL to dma_alloc_coherent()
  backlight: sky81452-backlight: Fix refcount imbalance on error
  scsi: csiostor: Fix wrong return value in csio_hw_prep_fw()
  scsi: qla2xxx: Fix wrong return value in qla_nvme_register_hba()
  scsi: qla2xxx: Fix wrong return value in qlt_chk_unresolv_exchg()
  scsi: qla4xxx: Fix an error handling path in 'qla4xxx_get_host_stats()'
  drm/gma500: fix error check
  staging: rtl8192u: Do not use GFP_KERNEL in atomic context
  mwifiex: Do not use GFP_KERNEL in atomic context
  brcmfmac: check ndev pointer
  ASoC: qcom: lpass-cpu: fix concurrency issue
  ASoC: qcom: lpass-platform: fix memory leak
  wcn36xx: Fix reported 802.11n rx_highest rate wcn3660/wcn3680
  ath10k: Fix the size used in a 'dma_free_coherent()' call in an error handling path
  ath9k: Fix potential out of bounds in ath9k_htc_txcompletion_cb()
  ath6kl: prevent potential array overflow in ath6kl_add_new_sta()
  drm: panel: Fix bus format for OrtusTech COM43H4M85ULC panel
  drm/amd/display: Fix wrong return value in dm_update_plane_state()
  Bluetooth: hci_uart: Cancel init work before unregistering
  drm/vkms: fix xrgb on compute crc
  ath10k: provide survey info as accumulated data
  blk-mq: move cancel of hctx->run_work to the front of blk_exit_queue
  spi: spi-s3c64xx: Check return values
  spi: spi-s3c64xx: swap s3c64xx_spi_set_cs() and s3c64xx_enable_datapath()
  pinctrl: bcm: fix kconfig dependency warning when !GPIOLIB
  regulator: resolve supply after creating regulator
  media: ti-vpe: Fix a missing check and reference count leak
  media: stm32-dcmi: Fix a reference count leak
  media: s5p-mfc: Fix a reference count leak
  media: camss: Fix a reference count leak.
  media: platform: fcp: Fix a reference count leak.
  media: rockchip/rga: Fix a reference count leak.
  media: rcar-vin: Fix a reference count leak.
  media: tc358743: cleanup tc358743_cec_isr
  media: tc358743: initialize variable
  media: mx2_emmaprp: Fix memleak in emmaprp_probe
  cypto: mediatek - fix leaks in mtk_desc_ring_alloc
  hwmon: (pmbus/max34440) Fix status register reads for MAX344{51,60,61}
  crypto: omap-sham - fix digcnt register handling with export/import
  media: rcar-csi2: Allocate v4l2_async_subdev dynamically
  media: rcar_drif: Allocate v4l2_async_subdev dynamically
  media: rcar_drif: Fix fwnode reference leak when parsing DT
  media: i2c: ov5640: Enable data pins on poweron for DVP mode
  media: i2c: ov5640: Separate out mipi configuration from s_power
  media: i2c: ov5640: Remain in power down for DVP mode unless streaming
  media: omap3isp: Fix memleak in isp_probe
  media: staging/intel-ipu3: css: Correctly reset some memory
  media: uvcvideo: Silence shift-out-of-bounds warning
  media: uvcvideo: Set media controller entity functions
  media: m5mols: Check function pointer in m5mols_sensor_power
  media: ov5640: Correct Bit Div register in clock tree diagram
  media: Revert "media: exynos4-is: Add missed check for pinctrl_lookup_state()"
  media: tuner-simple: fix regression in simple_set_radio_freq
  crypto: picoxcell - Fix potential race condition bug
  crypto: ixp4xx - Fix the size used in a 'dma_free_coherent()' call
  crypto: mediatek - Fix wrong return value in mtk_desc_ring_alloc()
  crypto: algif_skcipher - EBUSY on aio should be an error
  x86/events/amd/iommu: Fix sizeof mismatch
  x86/nmi: Fix nmi_handle() duration miscalculation
  perf/x86/intel/uncore: Reduce the number of CBOX counters
  perf/x86/intel/uncore: Update Ice Lake uncore units
  sched/fair: Fix wrong cpu selecting from isolated domain
  drivers/perf: thunderx2_pmu: Fix memory resource error handling
  drivers/perf: xgene_pmu: Fix uninitialized resource struct
  x86/fpu: Allow multiple bits in clearcpuid= parameter
  perf/x86/intel/ds: Fix x86_pmu_stop warning for large PEBS
  EDAC/ti: Fix handling of platform_get_irq() error
  EDAC/aspeed: Fix handling of platform_get_irq() error
  EDAC/i5100: Fix error handling order in i5100_init_one()
  crypto: caam/qi - add fallback for XTS with more than 8B IV
  crypto: algif_aead - Do not set MAY_BACKLOG on the async path
  ima: Don't ignore errors from crypto_shash_update()
  KVM: SVM: Initialize prev_ga_tag before use
  KVM: x86/mmu: Commit zap of remaining invalid pages when recovering lpages
  KVM: nVMX: Reload vmcs01 if getting vmcs12's pages fails
  KVM: nVMX: Reset the segment cache when stuffing guest segs
  SMB3: Resolve data corruption of TCP server info fields
  cifs: Return the error from crypt_message when enc/dec key not found.
  cifs: remove bogus debug code
  ALSA: hda/realtek: Enable audio jacks of ASUS D700SA with ALC887
  ALSA: hda/realtek - Add mute Led support for HP Elitebook 845 G7
  ALSA: hda/realtek - set mic to auto detect on a HP AIO machine
  ALSA: hda/realtek - The front Mic on a HP machine doesn't work
  icmp: randomize the global rate limiter
  tcp: fix to update snd_wl1 in bulk receiver fast path
  selftests: rtnetlink: load fou module for kci_test_encap_fou() test
  selftests: forwarding: Add missing 'rp_filter' configuration
  r8169: fix operation under forced interrupt threading
  nfc: Ensure presence of NFC_ATTR_FIRMWARE_NAME attribute in nfc_genl_fw_download()
  nexthop: Fix performance regression in nexthop deletion
  net/sched: act_tunnel_key: fix OOB write in case of IPv6 ERSPAN tunnels
  net: Properly typecast int values to set sk_max_pacing_rate
  net: hdlc_raw_eth: Clear the IFF_TX_SKB_SHARING flag after calling ether_setup
  net: hdlc: In hdlc_rcv, check to make sure dev is an HDLC device
  net: ftgmac100: Fix Aspeed ast2600 TX hang issue
  ibmvnic: save changed mac address to adapter->mac_addr
  chelsio/chtls: correct function return and return type
  chelsio/chtls: correct netdevice for vlan interface
  chelsio/chtls: fix socket lock
  nvme-pci: disable the write zeros command for Intel 600P/P3100
  ALSA: hda/hdmi: fix incorrect locking in hdmi_pcm_close
  ALSA: hda: fix jack detection with Realtek codecs when in D3
  ALSA: bebob: potential info leak in hwdep_read()
  binder: fix UAF when releasing todo list
  cxgb4: handle 4-tuple PEDIT to NAT mode translation
  r8169: fix data corruption issue on RTL8402
  net_sched: remove a redundant goto chain check
  net/ipv4: always honour route mtu during forwarding
  net: j1939: j1939_session_fresh_new(): fix missing initialization of skbcnt
  can: j1935: j1939_tp_tx_dat_new(): fix missing initialization of skbcnt
  can: m_can_platform: don't call m_can_class_suspend in runtime suspend
  socket: fix option SO_TIMESTAMPING_NEW
  tipc: fix the skb_unshare() in tipc_buf_append()
  net: usb: qmi_wwan: add Cellient MPL200 card
  net/tls: sendfile fails with ktls offload
  net/smc: fix valid DMBE buffer sizes
  net: fix pos incrementment in ipv6_route_seq_next
  net: fec: Fix PHY init after phy_reset_after_clk_enable()
  net: fec: Fix phy_device lookup for phy_reset_after_clk_enable()
  mlx4: handle non-napi callers to napi_poll
  ipv4: Restore flowi4_oif update before call to xfrm_lookup_route
  ibmveth: Identify ingress large send packets.
  ibmveth: Switch order of ibmveth_helper calls.
  Linux 5.4.72
  crypto: qat - check cipher length for aead AES-CBC-HMAC-SHA
  crypto: bcm - Verify GCM/CCM key length in setkey
  xen/events: don't use chip_data for legacy IRQs
  reiserfs: Fix oops during mount
  reiserfs: Initialize inode keys properly
  USB: serial: ftdi_sio: add support for FreeCalypso JTAG+UART adapters
  USB: serial: pl2303: add device-id for HP GC device
  staging: comedi: check validity of wMaxPacketSize of usb endpoints found
  USB: serial: option: Add Telit FT980-KS composition
  USB: serial: option: add Cellient MPL200 card
  media: usbtv: Fix refcounting mixup
  Bluetooth: Disconnect if E0 is used for Level 4
  Bluetooth: Fix update of connection state in `hci_encrypt_cfm`
  Bluetooth: Consolidate encryption handling in hci_encrypt_cfm
  Bluetooth: MGMT: Fix not checking if BT_HS is enabled
  Bluetooth: L2CAP: Fix calling sk_filter on non-socket based channel
  Bluetooth: A2MP: Fix not initializing all members
  ACPI: Always build evged in
  ARM: 8939/1: kbuild: use correct nm executable
  btrfs: take overcommit into account in inc_block_group_ro
  btrfs: don't pass system_chunk into can_overcommit
  perf cs-etm: Move definition of 'traceid_list' global variable from header file
  Linux 5.4.71
  net_sched: commit action insertions together
  net_sched: defer tcf_idr_insert() in tcf_action_init_1()
  net: usb: rtl8150: set random MAC address when set_ethernet_addr() fails
  Input: ati_remote2 - add missing newlines when printing module parameters
  net/mlx5e: Fix driver's declaration to support GRE offload
  net/tls: race causes kernel panic
  net/core: check length before updating Ethertype in skb_mpls_{push,pop}
  tcp: fix receive window update in tcp_add_backlog()
  mm: khugepaged: recalculate min_free_kbytes after memory hotplug as expected by khugepaged
  mmc: core: don't set limits.discard_granularity as 0
  perf: Fix task_function_call() error handling
  rxrpc: Fix server keyring leak
  rxrpc: The server keyring isn't network-namespaced
  rxrpc: Fix some missing _bh annotations on locking conn->state_lock
  rxrpc: Downgrade the BUG() for unsupported token type in rxrpc_read()
  rxrpc: Fix rxkad token xdr encoding
  net/mlx5e: Fix VLAN create flow
  net/mlx5e: Fix VLAN cleanup flow
  net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU
  net/mlx5: Fix request_irqs error flow
  net/mlx5: Avoid possible free of command entry while timeout comp handler
  virtio-net: don't disable guest csum when disable LRO
  net: usb: ax88179_178a: fix missing stop entry in driver_info
  r8169: fix RTL8168f/RTL8411 EPHY config
  mlxsw: spectrum_acl: Fix mlxsw_sp_acl_tcam_group_add()'s error path
  mdio: fix mdio-thunder.c dependency & build error
  bonding: set dev->needed_headroom in bond_setup_by_slave()
  net: ethernet: cavium: octeon_mgmt: use phy_start and phy_stop
  iavf: Fix incorrect adapter get in iavf_resume
  iavf: use generic power management
  xfrm: Use correct address family in xfrm_state_find
  platform/x86: fix kconfig dependency warning for FUJITSU_LAPTOP
  net: stmmac: removed enabling eee in EEE set callback
  xfrm: clone whole liftime_cur structure in xfrm_do_migrate
  xfrm: clone XFRMA_SEC_CTX in xfrm_do_migrate
  xfrm: clone XFRMA_REPLAY_ESN_VAL in xfrm_do_migrate
  xfrm: clone XFRMA_SET_MARK in xfrm_do_migrate
  iommu/vt-d: Fix lockdep splat in iommu_flush_dev_iotlb()
  drm/amdgpu: prevent double kfree ttm->sg
  openvswitch: handle DNAT tuple collision
  net: team: fix memory leak in __team_options_register
  team: set dev->needed_headroom in team_setup_by_port()
  sctp: fix sctp_auth_init_hmacs() error path
  i2c: owl: Clear NACK and BUS error bits
  i2c: meson: fixup rate calculation with filter delay
  i2c: meson: fix clock setting overwrite
  cifs: Fix incomplete memory allocation on setxattr path
  xfrmi: drop ignore_df check before updating pmtu
  nvme-tcp: check page by sendpage_ok() before calling kernel_sendpage()
  tcp: use sendpage_ok() to detect misused .sendpage
  net: introduce helper sendpage_ok() in include/linux/net.h
  mm/khugepaged: fix filemap page_to_pgoff(page) != offset
  macsec: avoid use-after-free in macsec_handle_frame()
  nvme-core: put ctrl ref when module ref get fail
  btrfs: allow btrfs_truncate_block() to fallback to nocow for data space reservation
  btrfs: fix RWF_NOWAIT write not failling when we need to cow
  btrfs: Ensure we trim ranges across block group boundary
  btrfs: volumes: Use more straightforward way to calculate map length
  Btrfs: send, fix emission of invalid clone operations within the same file
  Btrfs: send, allow clone operations within the same file
  arm64: dts: stratix10: add status to qspi dts node
  i2c: i801: Exclude device from suspend direct complete optimization
  perf top: Fix stdio interface input handling with glibc 2.28+
  perf test session topology: Fix data path
  driver core: Fix probe_count imbalance in really_probe()
  platform/x86: thinkpad_acpi: re-initialize ACPI buffer size when reuse
  platform/x86: intel-vbtn: Switch to an allow-list for SW_TABLET_MODE reporting
  bpf: Prevent .BTF section elimination
  bpf: Fix sysfs export of empty BTF section
  platform/x86: thinkpad_acpi: initialize tp_nvram_state variable
  platform/x86: intel-vbtn: Fix SW_TABLET_MODE always reporting 1 on the HP Pavilion 11 x360
  Platform: OLPC: Fix memleak in olpc_ec_probe
  usermodehelper: reset umask to default before executing user process
  vhost: Use vhost_get_used_size() in vhost_vring_set_addr()
  vhost: Don't call access_ok() when using IOTLB
  drm/nouveau/mem: guard against NULL pointer access in mem_del
  net: wireless: nl80211: fix out-of-bounds access in nl80211_del_key()
  io_uring: Fix double list add in io_queue_async_work()
  io_uring: Fix remove irrelevant req from the task_list
  io_uring: Fix missing smp_mb() in io_cancel_async_work()
  io_uring: Fix resource leaking when kill the process
  Revert "ravb: Fixed to be able to unload modules"
  fbcon: Fix global-out-of-bounds read in fbcon_get_font()
  Fonts: Support FONT_EXTRA_WORDS macros for built-in fonts
  fbdev, newport_con: Move FONT_EXTRA_WORDS macros into linux/font.h
  Linux 5.4.70
  netfilter: ctnetlink: add a range check for l3/l4 protonum
  ep_create_wakeup_source(): dentry name can change under you...
  epoll: EPOLL_CTL_ADD: close the race in decision to take fast path
  epoll: replace ->visited/visited_list with generation count
  epoll: do not insert into poll queues until all sanity checks are done
  nvme: consolidate chunk_sectors settings
  nvme: Introduce nvme_lba_to_sect()
  nvme: Cleanup and rename nvme_block_nr()
  mm: don't rely on system state to detect hot-plug operations
  mm: replace memmap_context by meminit_context
  block/diskstats: more accurate approximation of io_ticks for slow disks
  random32: Restore __latent_entropy attribute on net_rand_state
  scripts/dtc: only append to HOST_EXTRACFLAGS instead of overwriting
  Input: trackpoint - enable Synaptics trackpoints
  i2c: cpm: Fix i2c_ram structure
  gpio: aspeed: fix ast2600 bank properties
  gpio/aspeed-sgpio: don't enable all interrupts by default
  gpio/aspeed-sgpio: enable access to all 80 input & output sgpios
  iommu/exynos: add missing put_device() call in exynos_iommu_of_xlate()
  clk: samsung: exynos4: mark 'chipid' clock as CLK_IGNORE_UNUSED
  clk: tegra: Always program PLL_E when enabled
  nfs: Fix security label length not being reset
  pinctrl: mvebu: Fix i2c sda definition for 98DX3236
  phy: ti: am654: Fix a leak in serdes_am654_probe()
  gpio: sprd: Clear interrupt when setting the type as edge
  nvme-fc: fail new connections to a deleted host or remote port
  nvme-pci: fix NULL req in completion handler
  spi: fsl-espi: Only process interrupts for expected events
  tools/io_uring: fix compile breakage
  tracing: Make the space reserved for the pid wider
  mac80211: do not allow bigger VHT MPDUs than the hardware supports
  mac80211: Fix radiotap header channel flag for 6GHz band
  drivers/net/wan/hdlc: Set skb->protocol before transmitting
  drivers/net/wan/lapbether: Make skb->protocol consistent with the header
  fuse: fix the ->direct_IO() treatment of iov_iter
  nvme-core: get/put ctrl and transport module in nvme_dev_open/release()
  rndis_host: increase sleep time in the query-response loop
  net: dec: de2104x: Increase receive ring size for Tulip
  drm/sun4i: mixer: Extend regmap max_register
  drivers/net/wan/hdlc_fr: Add needed_headroom for PVC devices
  libbpf: Remove arch-specific include path in Makefile
  clocksource/drivers/timer-gx6605s: Fixup counter reload
  drm/amdgpu: restore proper ref count in amdgpu_display_crtc_set_config
  memstick: Skip allocating card when removing host
  ftrace: Move RCU is watching check after recursion check
  iio: adc: qcom-spmi-adc5: fix driver name
  Input: i8042 - add nopnp quirk for Acer Aspire 5 A515
  xfs: trim IO to found COW extent limit
  net: virtio_vsock: Enhance connection semantics
  vsock/virtio: add transport parameter to the virtio_transport_reset_no_sock()
  clk: socfpga: stratix10: fix the divider for the emac_ptp_free_clk
  gpio: tc35894: fix up tc35894 interrupt configuration
  gpio: mockup: fix resource leak in error path
  gpio: siox: explicitly support only threaded irqs
  USB: gadget: f_ncm: Fix NDP16 datagram validation
  mmc: sdhci: Workaround broken command queuing on Intel GLK based IRBIS models
  btrfs: fix filesystem corruption after a device replace
  Revert "opp: Replace list_kref with a local counter"
  Revert "opp: Increase parsed_static_opps in _of_add_opp_table_v1()"
  Revert "mmc: core: Fix size overflow for mmc partitions"
  Revert "exec: Add exec_update_mutex to replace cred_guard_mutex"
  Revert "exec: Fix a deadlock in strace"
  Revert "selftests/ptrace: add test cases for dead-locks"
  Revert "kernel/kcmp.c: Use new infrastructure to fix deadlocks in execve"
  Revert "proc: Use new infrastructure to fix deadlocks in execve"
  Revert "proc: io_accounting: Use new infrastructure to fix deadlocks in execve"
  Revert "perf: Use new infrastructure to fix deadlocks in execve"
  Linux 5.4.69
  ata: sata_mv, avoid trigerrable BUG_ON
  ata: make qc_prep return ata_completion_errors
  ata: define AC_ERR_OK
  kprobes: Fix compiler warning for !CONFIG_KPROBES_ON_FTRACE
  dm: fix bio splitting and its bio completion order for regular IO
  KVM: arm64: Assume write fault on S1PTW permission fault on instruction fetch
  s390/zcrypt: Fix ZCRYPT_PERDEV_REQCNT ioctl
  mm/gup: fix gup_fast with dynamic page table folding
  mm, THP, swap: fix allocating cluster for swapfile by mistake
  dmabuf: fix NULL pointer dereference in dma_buf_release()
  btrfs: fix overflow when copying corrupt csums for a message
  kprobes: tracing/kprobes: Fix to kill kprobes on initmem after boot
  kprobes: Fix to check probe enabled before disarm_kprobe_ftrace()
  s390/dasd: Fix zero write for FBA devices
  tracing: fix double free
  lib/string.c: implement stpcpy
  ALSA: hda/realtek: Enable front panel headset LED on Lenovo ThinkStation P520
  ALSA: hda/realtek - Couldn't detect Mic if booting with headset plugged
  ALSA: usb-audio: Add delay quirk for H570e USB headsets
  scsi: lpfc: Fix initial FLOGI failure due to BBSCN not supported
  x86/ioapic: Unbreak check_timer()
  arch/x86/lib/usercopy_64.c: fix __copy_user_flushcache() cache writeback
  mm: validate pmd after splitting
  KVM: SVM: Add a dedicated INVD intercept routine
  KVM: x86: Reset MMU context if guest toggles CR4.SMAP or CR4.PKE
  regulator: axp20x: fix LDO2/4 description
  MIPS: Add the missing 'CPU_1074K' into __get_cpu_type()
  regmap: fix page selection for noinc writes
  regmap: fix page selection for noinc reads
  ALSA: asihpi: fix iounmap in error handler
  lib80211: fix unmet direct dependendices config warning when !CRYPTO
  bpf: Fix a rcu warning for bpffs map pretty-print
  batman-adv: mcast: fix duplicate mcast packets from BLA backbone to mesh
  batman-adv: mcast: fix duplicate mcast packets in BLA backbone from mesh
  batman-adv: mcast: fix duplicate mcast packets in BLA backbone from LAN
  nvme-tcp: fix kconfig dependency warning when !CRYPTO
  batman-adv: Add missing include for in_interrupt()
  drm/sun4i: sun8i-csc: Secondary CSC register correction
  net: qed: RDMA personality shouldn't fail VF load
  net: qede: Disable aRFS for NPAR and 100G
  net: qed: Disable aRFS for NPAR and 100G
  drm/vc4/vc4_hdmi: fill ASoC card owner
  bpf: Fix clobbering of r2 in bpf_gen_ld_abs
  mac802154: tx: fix use-after-free
  netfilter: conntrack: nf_conncount_init is failing with IPv6 disabled
  batman-adv: mcast/TT: fix wrongly dropped or rerouted packets
  atm: eni: fix the missed pci_disable_device() for eni_init_one()
  batman-adv: bla: fix type misuse for backbone_gw hash indexing
  mwifiex: Increase AES key storage size to 256 bits
  clocksource/drivers/h8300_timer8: Fix wrong return value in h8300_8timer_init()
  ieee802154/adf7242: check status of adf7242_read_reg
  ieee802154: fix one possible memleak in ca8210_dev_com_init
  objtool: Fix noreturn detection for ignored functions
  i2c: core: Call i2c_acpi_install_space_handler() before i2c_acpi_register_devices()
  drm/amdgpu/dc: Require primary plane to be enabled whenever the CRTC is
  drm/amd/display: update nv1x stutter latencies
  drm/amdkfd: fix a memory leak issue
  EDAC/ghes: Check whether the driver is on the safe list correctly
  lockdep: fix order in trace_hardirqs_off_caller()
  s390/init: add missing __init annotations
  i2c: aspeed: Mask IRQ status to relevant bits
  RISC-V: Take text_mutex in ftrace_init_nop()
  ASoC: Intel: bytcr_rt5640: Add quirk for MPMAN Converter9 2-in-1
  ASoC: wm8994: Ensure the device is resumed in wm89xx_mic_detect functions
  ASoC: wm8994: Skip setting of the WM8994_MICBIAS register for WM1811
  ASoC: pcm3168a: ignore 0 Hz settings
  device_cgroup: Fix RCU list debugging warning
  nvme: explicitly update mpath disk capacity on revalidation
  net: openvswitch: use div_u64() for 64-by-32 divisions
  ALSA: hda: Workaround for spurious wakeups on some Intel platforms
  ALSA: hda: Always use jackpoll helper for jack update after resume
  perf parse-events: Use strcmp() to compare the PMU name
  opp: Increase parsed_static_opps in _of_add_opp_table_v1()
  mt76: fix LED link time failure
  ubi: fastmap: Free unused fastmap anchor peb during detach
  scsi: qla2xxx: Retry PLOGI on FC-NVMe PRLI failure
  perf tests: Fix test 68 zstd compression for s390
  btrfs: qgroup: fix data leak caused by race between writeback and truncate
  vfio/pci: fix racy on error and request eventfd ctx
  selftests/x86/syscall_nt: Clear weird flags after each test
  scsi: libfc: Skip additional kref updating work event
  scsi: libfc: Handling of extra kref
  mac80211: skip mpath lookup also for control port tx
  nvme: fix possible deadlock when I/O is blocked
  cifs: Fix double add page to memcg when cifs_readpages
  vfio/pci: Clear error and request eventfd ctx after releasing
  NFS: nfs_xdr_status should record the procedure name
  x86/speculation/mds: Mark mds_user_clear_cpu_buffers() __always_inline
  mtd: parser: cmdline: Support MTD names containing one or more colons
  rapidio: avoid data race between file operation callbacks and mport_cdev_add().
  mm: memcontrol: fix stat-corrupting race in charge moving
  mm/swap_state: fix a data race in swapin_nr_pages
  ceph: fix potential race in ceph_check_caps
  PCI: tegra: Fix runtime PM imbalance on error
  mtd: rawnand: omap_elm: Fix runtime PM imbalance on error
  mtd: rawnand: gpmi: Fix runtime PM imbalance on error
  wlcore: fix runtime pm imbalance in wlcore_regdomain_config
  wlcore: fix runtime pm imbalance in wl1271_tx_work
  ASoC: img-i2s-out: Fix runtime PM imbalance on error
  PCI: tegra194: Fix runtime PM imbalance on error
  perf kcore_copy: Fix module map when there are no modules loaded
  perf metricgroup: Free metric_events on error
  perf util: Fix memory leak of prefix_if_not_in
  perf stat: Fix duration_time value for higher intervals
  perf trace: Fix the selection for architectures to generate the errno name tables
  perf evsel: Fix 2 memory leaks
  KVM: PPC: Book3S HV: Close race with page faults around memslot flushes
  vfio/pci: fix memory leaks of eventfd ctx
  gpio: rcar: Fix runtime PM imbalance on error
  btrfs: fix double __endio_write_update_ordered in direct I/O
  btrfs: don't force read-only after error in drop snapshot
  usb: dwc3: Increase timeout for CmdAct cleared by device controller
  printk: handle blank console arguments passed in.
  drm/nouveau/dispnv50: fix runtime pm imbalance on error
  drm/nouveau: fix runtime pm imbalance on error
  drm/nouveau/debugfs: fix runtime pm imbalance on error
  e1000: Do not perform reset in reset_task if we are already down
  drm/amdkfd: fix restore worker race condition
  arm64/cpufeature: Drop TraceFilt feature exposure from ID_DFR0 register
  scsi: cxlflash: Fix error return code in cxlflash_probe()
  arm64: acpi: Make apei_claim_sea() synchronise with APEI's irq work
  coresight: etm4x: Fix use-after-free of per-cpu etm drvdata
  USB: EHCI: ehci-mv: fix less than zero comparison of an unsigned int
  fuse: update attr_version counter on fuse_notify_inval_inode()
  fuse: don't check refcount after stealing page
  svcrdma: Fix backchannel return code
  powerpc/traps: Make unrecoverable NMIs die instead of panic
  ipmi:bt-bmc: Fix error handling and status check
  drm/exynos: dsi: Remove bridge node reference in error handling path in probe function
  ALSA: hda: Fix potential race in unsol event handler
  tty: serial: samsung: Correct clock selection logic
  tipc: fix memory leak in service subscripting
  KVM: x86: handle wrap around 32-bit address space
  USB: EHCI: ehci-mv: fix error handling in mv_ehci_probe()
  Bluetooth: Handle Inquiry Cancel error after Inquiry Complete
  phy: samsung: s5pv210-usb2: Add delay after reset
  power: supply: max17040: Correct voltage reading
  i2c: tegra: Restore pinmux on system resume
  mm/slub: fix incorrect interpretation of s->offset
  perf mem2node: Avoid double free related to realloc
  media: venus: vdec: Init registered list unconditionally
  atm: fix a memory leak of vcc->user_back
  devlink: Fix reporter's recovery condition
  dt-bindings: sound: wm8994: Correct required supplies based on actual implementaion
  dpaa2-eth: fix error return code in setup_dpni()
  sched/fair: Eliminate bandwidth race between throttling and distribution
  arm64: cpufeature: Relax checks for AArch32 support at EL[0-2]
  sparc64: vcc: Fix error return code in vcc_probe()
  staging:r8188eu: avoid skb_clone for amsdu to msdu conversion
  scsi: aacraid: Fix error handling paths in aac_probe_one()
  net: openvswitch: use u64 for meter bucket
  KVM: arm64: vgic-its: Fix memory leak on the error path of vgic_add_lpi()
  KVM: arm64: vgic-v3: Retire all pending LPIs on vcpu destroy
  drivers: char: tlclk.c: Avoid data race between init and interrupt handler
  bdev: Reduce time holding bd_mutex in sync in blkdev_close()
  perf stat: Force error in fallback on :k events
  KVM: Remove CREATE_IRQCHIP/SET_PIT2 race
  btrfs: fix setting last_trans for reloc roots
  serial: uartps: Wait for tx_empty in console setup
  scsi: qedi: Fix termination timeouts in session logout
  ALSA: hda: Skip controller resume if not needed
  mm/mmap.c: initialize align_offset explicitly for vm_unmapped_area
  drm/amdgpu/sriov add amdgpu_amdkfd_pre_reset in gpu reset
  workqueue: Remove the warning in wq_worker_sleeping()
  nvmet-rdma: fix double free of rdma queue
  SUNRPC: Don't start a timer on an already queued rpc task
  mm/vmscan.c: fix data races using kswapd_classzone_idx
  mm/swapfile: fix data races in try_to_unuse()
  mm/filemap.c: clear page error before actual read
  mm/kmemleak.c: use address-of operator on section symbols
  powerpc/perf: Implement a global lock to avoid races between trace, core and thread imc events.
  drm/amdgpu/vcn2.0: stall DPG when WPTR/RPTR reset
  NFS: Fix races nfs_page_group_destroy() vs nfs_destroy_unlinked_subrequests()
  PCI: pciehp: Fix MSI interrupt race
  ALSA: usb-audio: Fix case when USB MIDI interface has more than one extra endpoint descriptor
  ubifs: Fix out-of-bounds memory access caused by abnormal value of node_len
  ubifs: ubifs_add_orphan: Fix a memory leak bug
  ubifs: ubifs_jnl_write_inode: Fix a memory leak bug
  PCI: Use ioremap(), not phys_to_virt() for platform ROM
  netfilter: nf_tables: silence a RCU-list warning in nft_table_lookup()
  svcrdma: Fix leak of transport addresses
  SUNRPC: Fix a potential buffer overflow in 'svc_print_xprts()'
  scsi: hpsa: correct race condition in offload enabled
  IB/iser: Always check sig MR before putting it to the free pool
  RDMA/rxe: Set sys_image_guid to be aligned with HW IB devices
  xfs: prohibit fs freezing when using empty transactions
  brcmfmac: Fix double freeing in the fmac usb data path
  nvme: Fix controller creation races with teardown flow
  nvme: Fix ctrl use-after-free during sysfs deletion
  nvme-multipath: do not reset on unknown status
  perf: Use new infrastructure to fix deadlocks in execve
  proc: io_accounting: Use new infrastructure to fix deadlocks in execve
  proc: Use new infrastructure to fix deadlocks in execve
  kernel/kcmp.c: Use new infrastructure to fix deadlocks in execve
  selftests/ptrace: add test cases for dead-locks
  exec: Fix a deadlock in strace
  exec: Add exec_update_mutex to replace cred_guard_mutex
  tools: gpio-hammer: Avoid potential overflow in main
  cpufreq: powernv: Fix frame-size-overflow in powernv_cpufreq_work_fn
  net: axienet: Propagate failure of DMA descriptor setup
  net: axienet: Convert DMA error handler to a work queue
  perf cpumap: Fix snprintf overflow check
  serial: 8250: 8250_omap: Terminate DMA before pushing data on RX timeout
  serial: 8250_omap: Fix sleeping function called from invalid context during probe
  serial: 8250_port: Don't service RX FIFO if throttled
  r8169: improve RTL8168b FIFO overflow workaround
  btrfs: free the reloc_control in a consistent way
  btrfs: do not init a reloc root if we aren't relocating
  perf parse-events: Fix 3 use after frees found with clang ASAN
  KVM: LAPIC: Mark hrtimer for period or oneshot mode to expire in hard interrupt context
  thermal: rcar_thermal: Handle probe error gracefully
  tracing: Use address-of operator on section symbols
  drm/msm/a5xx: Always set an OPP supported hardware value
  drm/msm: fix leaks if initialization fails
  KVM: PPC: Book3S HV: Treat TM-related invalid form instructions on P9 like the valid ones
  intel_th: Disallow multi mode on devices where it's broken
  RDMA/cm: Remove a race freeing timewait_info
  nfsd: Don't add locks to closed or closing open stateids
  rtc: ds1374: fix possible race condition
  rtc: sa1100: fix possible race condition
  tpm: ibmvtpm: Wait for buffer to be set before proceeding
  ext4: mark block bitmap corrupted when found instead of BUGON
  xfs: mark dir corrupt when lookup-by-hash fails
  xfs: don't ever return a stale pointer from __xfs_dir3_free_read
  tty: sifive: Finish transmission before changing the clock
  media: tda10071: fix unsigned sign extension overflow
  Bluetooth: L2CAP: handle l2cap config request during open state
  scsi: aacraid: Disabling TM path and only processing IOP reset
  ath10k: use kzalloc to read for ath10k_sdio_hif_diag_read
  perf cs-etm: Correct synthesizing instruction samples
  perf cs-etm: Swap packets for instruction samples
  s390/irq: replace setup_irq() by request_irq()
  cpu-topology: Fix the potential data corruption
  clk: imx: Fix division by zero warning on pfdv2
  drm/amd/display: Stop if retimer is not available
  ARM: OMAP2+: Handle errors for cpu_pm
  drm/amdgpu: increase atombios cmd timeout
  mm: avoid data corruption on CoW fault into PFN-mapped VMA
  perf jevents: Fix leak of mapfile memory
  ext4: fix a data race at inode->i_disksize
  drm/amd/display: fix image corruption with ODM 2:1 DSC 2 slice
  powerpc/book3s64: Fix error handling in mm_iommu_do_alloc()
  timekeeping: Prevent 32bit truncation in scale64_check_overflow()
  Bluetooth: guard against controllers sending zero'd events
  media: go7007: Fix URB type for interrupt handling
  ASoC: SOF: ipc: check ipc return value before data copy
  bus: hisi_lpc: Fixup IO ports addresses to avoid use-after-free in host removal
  random: fix data races at timer_rand_state
  firmware: arm_sdei: Use cpus_read_lock() to avoid races with cpuhp
  iavf: use tc_cls_can_offload_and_chain0() instead of chain check
  drm/omap: dss: Cleanup DSS ports on initialisation failure
  drm/amd/display: dal_ddc_i2c_payloads_create can fail causing panic
  soundwire: bus: disable pm_runtime in sdw_slave_delete
  dmaengine: tegra-apb: Prevent race conditions on channel's freeing
  dmaengine: stm32-dma: use vchan_terminate_vdesc() in .terminate_all
  bpf: Remove recursion prevention from rcu free callback
  x86/pkeys: Add check for pkey "overflow"
  media: staging/imx: Missing assignment in imx_media_capture_device_register()
  dmaengine: stm32-mdma: use vchan_terminate_vdesc() in .terminate_all
  KVM: nVMX: Hold KVM's srcu lock when syncing vmcs12->shadow
  KVM: x86: fix incorrect comparison in trace event
  RDMA/rxe: Fix configuration of atomic queue pair attributes
  perf test: Fix test trace+probe_vfs_getname.sh on s390
  ALSA: usb-audio: Don't create a mixer element with bogus volume range
  mt76: fix handling full tx queues in mt76_dma_tx_queue_skb_raw
  mt76: clear skb pointers from rx aggregation reorder buffer during cleanup
  crypto: chelsio - This fixes the kernel panic which occurs during a libkcapi test
  clk: stratix10: use do_div() for 64-bit calculation
  locking/lockdep: Decrement IRQ context counters when removing lock chain
  drm/omap: fix possible object reference leak
  scsi: lpfc: Fix coverity errors in fmdi attribute handling
  scsi: lpfc: Fix release of hwq to clear the eq relationship
  scsi: lpfc: Fix RQ buffer leakage when no IOCBs available
  selinux: sel_avc_get_stat_idx should increase position index
  audit: CONFIG_CHANGE don't log internal bookkeeping as an event
  drm/amd/display: fix workaround for incorrect double buffer register for DLG ADL and TTU
  nfsd: Fix a perf warning
  skbuff: fix a data race in skb_queue_len()
  ALSA: hda: Clear RIRB status before reading WP
  KVM: fix overflow of zero page refcount with ksm running
  Bluetooth: prefetch channel before killing sock
  mm: pagewalk: fix termination condition in walk_pte_range()
  mm/swapfile.c: swap_next should increase position index
  Bluetooth: Fix refcount use-after-free issue
  tools/power/x86/intel_pstate_tracer: changes for python 3 compatibility
  selftests/ftrace: fix glob selftest
  ceph: ensure we have a new cap before continuing in fill_inode
  ar5523: Add USB ID of SMCWUSBT-G2 wireless adapter
  ARM: 8948/1: Prevent OOB access in stacktrace
  tracing: Set kernel_stack's caller size properly
  Bluetooth: btrtl: Use kvmalloc for FW allocations
  powerpc/eeh: Only dump stack once if an MMIO loop is detected
  nfsd: Fix a soft lockup race in nfsd_file_mark_find_or_create()
  s390/cpum_sf: Use kzalloc and minor changes
  dmaengine: zynqmp_dma: fix burst length configuration
  btrfs: tree-checker: Check leaf chunk item size
  i2c: tegra: Prevent interrupt triggering after transfer timeout
  drm/amd/display: Initialize DSC PPS variables to 0
  scsi: ufs: Fix a race condition in the tracing code
  scsi: ufs: Make ufshcd_add_command_trace() easier to read
  ACPI: EC: Reference count query handlers under lock
  sctp: move trace_sctp_probe_path into sctp_outq_sack
  scsi: lpfc: Fix incomplete NVME discovery when target
  scsi: qla2xxx: Fix stuck session in GNL
  opp: Replace list_kref with a local counter
  media: ti-vpe: cal: Restrict DMA to avoid memory corruption
  drm/scheduler: Avoid accessing freed bad job.
  seqlock: Require WRITE_ONCE surrounding raw_seqcount_barrier
  drm/mcde: Handle pending vblank while disabling display
  ipv6_route_seq_next should increase position index
  rt_cpu_seq_next should increase position index
  neigh_stat_seq_next() should increase position index
  vcc_seq_next should increase position index
  tipc: fix link overflow issue at socket shutdown
  ALSA: hda: enable regmap internal locking
  xfs: fix log reservation overflows when allocating large rt extents
  module: Remove accidental change of module_enable_x()
  KVM: arm/arm64: vgic: Fix potential double free dist->spis in __kvm_vgic_destroy()
  kernel/sys.c: avoid copying possible padding bytes in copy_to_user
  kernel/notifier.c: intercept duplicate registrations to avoid infinite loops
  selftests/bpf: De-flake test_tcpbpf
  arm64: insn: consistently handle exit text
  drm/amdgpu: fix calltrace during kmd unload(v3)
  xfs: fix realtime file data space leak
  s390: avoid misusing CALL_ON_STACK for task stack setup
  xtensa: fix system_call interaction with ptrace
  ASoC: max98090: remove msleep in PLL unlocked workaround
  f2fs: stop GC when the victim becomes fully valid
  CIFS: Properly process SMB3 lease breaks
  CIFS: Use common error handling code in smb2_ioctl_query_info()
  SUNRPC: Capture completion of all RPC tasks
  debugfs: Fix !DEBUG_FS debugfs_create_automount
  mt76: add missing locking around ampdu action
  mt76: do not use devm API for led classdev
  scsi: pm80xx: Cleanup command when a reset times out
  gfs2: clean up iopen glock mess in gfs2_create_inode
  mmc: core: Fix size overflow for mmc partitions
  ubi: Fix producing anchor PEBs
  RDMA/iw_cgxb4: Fix an error handling path in 'c4iw_connect()'
  xfs: fix attr leaf header freemap.size underflow
  fix dget_parent() fastpath race
  PCI: Avoid double hpmemsize MMIO window assignment
  RDMA/i40iw: Fix potential use after free
  RDMA/qedr: Fix potential use after free
  x86/kdump: Always reserve the low 1M when the crashkernel option is specified
  dmaengine: mediatek: hsdma_probe: fixed a memory leak when devm_request_irq fails
  bcache: fix a lost wake-up problem caused by mca_cannibalize_lock
  tracing: Adding NULL checks for trace_array descriptor pointer
  tracing: Verify if trace array exists before destroying it.
  tpm_crb: fix fTPM on AMD Zen+ CPUs
  drm/amdgpu/powerplay/smu7: fix AVFS handling with custom powerplay table
  mfd: mfd-core: Protect against NULL call-back function pointer
  mtd: cfi_cmdset_0002: don't free cfi->cfiq in error path of cfi_amdstd_setup()
  ice: Fix to change Rx/Tx ring descriptor size via ethtool with DCBx
  drm/amdgpu/powerplay: fix AVFS handling with custom powerplay table
  clk/ti/adpll: allocate room for terminating null
  f2fs: avoid kernel panic on corruption test
  iomap: Fix overflow in iomap_page_mkwrite
  dax: Fix alloc_dax_region() compile warning
  net: silence data-races on sk_backlog.tail
  powerpc/64s: Always disable branch profiling for prom_init.o
  scsi: lpfc: Fix kernel crash at lpfc_nvme_info_show during remote port bounce
  scsi: fnic: fix use after free
  PM / devfreq: tegra30: Fix integer overflow on CPU's freq max out
  dm table: do not allow request-based DM to stack on partitions
  leds: mlxreg: Fix possible buffer overflow
  xfs: properly serialise fallocate against AIO+DIO
  drm/amd/display: Free gamma after calculating legacy transfer function
  media: smiapp: Fix error handling at NVM reading
  soundwire: intel/cadence: fix startup sequence
  ASoC: kirkwood: fix IRQ error handling
  gma/gma500: fix a memory disclosure bug due to uninitialized bytes
  xfs: fix inode fork extent count overflow
  m68k: q40: Fix info-leak in rtc_ioctl
  scsi: aacraid: fix illegal IO beyond last LBA
  mm: fix double page fault on arm64 if PTE_AF is cleared
  PCI/IOV: Serialize sysfs sriov_numvfs reads vs writes
  ath10k: fix memory leak for tpc_stats_final
  ath10k: fix array out-of-bounds access
  scsi: qla2xxx: Add error handling for PLOGI ELS passthrough
  dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
  drm/amdkfd: Fix race in gfx10 context restore handler
  drm/amd/display: Do not double-buffer DTO adjustments
  media: mc-device.c: fix memleak in media_device_register_entity
  selinux: allow labeling before policy is loaded
  scsi: mpt3sas: Free diag buffer without any status check
  scsi: lpfc: Fix pt2pt discovery on SLI3 HBAs
  kernel/sysctl-test: Add null pointer test for sysctl.c:proc_dointvec()
  Linux 5.4.68
  iommu/amd: Use cmpxchg_double() when updating 128-bit IRTE
  mm: memcg: fix memcg reclaim soft lockup
  net: add __must_check to skb_put_padto()
  net: qrtr: check skb_put_padto() return value
  net: phy: Do not warn in phy_stop() on PHY_DOWN
  net: phy: Avoid NPD upon phy_detach() when driver is unbound
  net: lantiq: Disable IRQs only if NAPI gets scheduled
  net: lantiq: Use napi_complete_done()
  net: lantiq: use netif_tx_napi_add() for TX NAPI
  net: lantiq: Wake TX queue again
  bnxt_en: Protect bnxt_set_eee() and bnxt_set_pauseparam() with mutex.
  bnxt_en: return proper error codes in bnxt_show_temp
  net/mlx5e: TLS, Do not expose FPGA TLS counter if not supported
  net/mlx5e: Enable adding peer miss rules only if merged eswitch is supported
  tipc: use skb_unshare() instead in tipc_buf_append()
  tipc: fix shutdown() of connection oriented socket
  tipc: Fix memory leak in tipc_group_create_member()
  taprio: Fix allowing too small intervals
  nfp: use correct define to return NONE fec
  net: sctp: Fix IPv6 ancestor_size calc in sctp_copy_descendant
  net: sch_generic: aviod concurrent reset and enqueue op for lockless qdisc
  net/mlx5: Fix FTE cleanup
  net: ipv6: fix kconfig dependency warning for IPV6_SEG6_HMAC
  net: Fix bridge enslavement failure
  net: dsa: rtl8366: Properly clear member config
  net: DCB: Validate DCB_ATTR_DCB_BUFFER argument
  net: bridge: br_vlan_get_pvid_rcu() should dereference the VLAN group under RCU
  ipv6: avoid lockdep issue in fib6_del()
  ipv4: Update exception handling for multipath routes via same device
  ipv4: Initialize flowi4_multipath_hash in data path
  ip: fix tos reflection in ack and reset packets
  hdlc_ppp: add range checks in ppp_cp_parse_cr()
  geneve: add transport ports in route lookup for geneve
  cxgb4: Fix offset when clearing filter byte counters
  cxgb4: fix memory leak during module unload
  bnxt_en: Fix NULL ptr dereference crash in bnxt_fw_reset_task()
  bnxt_en: Avoid sending firmware messages when AER error is detected.
  act_ife: load meta modules before tcf_idr_check_alloc()
  mm/thp: fix __split_huge_pmd_locked() for migration PMD
  kprobes: fix kill kprobe which has been marked as gone
  ibmvnic: add missing parenthesis in do_reset()
  ibmvnic fix NULL tx_pools and rx_tools issue at do_reset
  af_key: pfkey_dump needs parameter validation
  Revert "ehci-hcd: Move include to keep CRC stable"
  Linux 5.4.67
  dax: Fix compilation for CONFIG_DAX && !CONFIG_FS_DAX
  dm: Call proper helper to determine dax support
  mm/memory_hotplug: drain per-cpu pages again during memory offline
  dm/dax: Fix table reference counts
  selftests/vm: fix display of page size in map_hugetlb
  powerpc/dma: Fix dma_map_ops::get_required_mask
  ehci-hcd: Move include to keep CRC stable
  s390/zcrypt: fix kmalloc 256k failure
  x86/boot/compressed: Disable relocation relaxation
  serial: 8250_pci: Add Realtek 816a and 816b
  Input: i8042 - add Entroware Proteus EL07R4 to nomux and reset lists
  Input: trackpoint - add new trackpoint variant IDs
  percpu: fix first chunk size calculation for populated bitmap
  ALSA: hda/realtek - The Mic on a RedmiBook doesn't work
  ALSA: hda: fixup headset for ASUS GX502 laptop
  Revert "ALSA: hda - Fix silent audio output and corrupted input on MSI X570-A PRO"
  i2c: i801: Fix resume bug
  usb: typec: ucsi: Prevent mode overrun
  usblp: fix race between disconnect() and read()
  USB: UAS: fix disconnect by unplugging a hub
  USB: quirks: Add USB_QUIRK_IGNORE_REMOTE_WAKEUP quirk for BYD zhaoxin notebook
  drm/i915: Filter wake_flags passed to default_wake_function
  riscv: Add sfence.vma after early page table changes
  i2c: mxs: use MXS_DMA_CTRL_WAIT4END instead of DMA_CTRL_ACK
  iommu/amd: Fix potential @entry null deref
  arm64: bpf: Fix branch offset in JIT
  drm/mediatek: Add missing put_device() call in mtk_hdmi_dt_parse_pdata()
  drm/mediatek: Add exception handing in mtk_drm_probe() if component init fail
  MIPS: SNI: Fix spurious interrupts
  fbcon: Fix user font detection test at fbcon_resize().
  perf test: Free formats for perf pmu parse test
  perf parse-event: Fix memory leak in evsel->unit
  perf evlist: Fix cpu/thread map leak
  MIPS: SNI: Fix MIPS_L1_CACHE_SHIFT
  perf test: Fix the "signal" test inline assembly
  Drivers: hv: vmbus: Add timeout to vmbus_wait_for_unload
  arm64: Allow CPUs unffected by ARM erratum 1418040 to come in late
  scsi: libsas: Fix error path in sas_notify_lldd_dev_found()
  Drivers: hv: vmbus: hibernation: do not hang forever in vmbus_bus_resume()
  ASoC: meson: axg-toddr: fix channel order on g12 platforms
  powerpc/book3s64/radix: Fix boot failure with large amount of guest memory
  ASoC: qcom: common: Fix refcount imbalance on error
  ASoC: qcom: Set card->owner to avoid warnings
  clk: rockchip: Fix initialization of mux_pll_src_4plls_p
  clk: davinci: Use the correct size when allocating memory
  KVM: MIPS: Change the definition of kvm type
  spi: Fix memory leak on splited transfers
  i2c: algo: pca: Reapply i2c bus settings after reset
  f2fs: Return EOF on unaligned end of file DIO read
  f2fs: fix indefinite loop scanning for free nid
  block: only call sched requeue_request() for scheduled requests
  nvme-tcp: cancel async events before freeing event struct
  nvme-rdma: cancel async events before freeing event struct
  nvme-fc: cancel async events before freeing event struct
  openrisc: Fix cache API compile issue when not inlining
  cifs: fix DFS mount with cifsacl/modefromsid
  rapidio: Replace 'select' DMAENGINES 'with depends on'
  SUNRPC: stop printk reading past end of string
  NFS: Zero-stateid SETATTR should first return delegation
  spi: spi-loopback-test: Fix out-of-bounds read
  regulator: pwm: Fix machine constraints application
  scsi: lpfc: Fix FLOGI/PLOGI receive race condition in pt2pt discovery
  scsi: libfc: Fix for double free()
  scsi: pm8001: Fix memleak in pm8001_exec_internal_task_abort
  NFSv4.1 handle ERR_DELAY error reclaiming locking state on delegation recall
  firmware_loader: fix memory leak for paged buffer
  hv_netvsc: Remove "unlikely" from netvsc_select_queue
  net: handle the return value of pskb_carve_frag_list() correctly
  dsa: Allow forwarding of redirected IGMP traffic
  e1000e: Add support for Comet Lake
  RDMA/bnxt_re: Restrict the max_gids to 256
  gfs2: initialize transaction tr_ailX_lists earlier
  Revert "netfilter: conntrack: allow sctp hearbeat after connection re-use"
  Linux 5.4.66
  gcov: add support for GCC 10.1
  drm/msm: Disable the RPTR shadow
  drm/msm/gpu: make ringbuffer readonly
  usb: typec: ucsi: acpi: Check the _DEP dependencies
  usb: Fix out of sync data toggle if a configured device is reconfigured
  USB: serial: option: add support for SIM7070/SIM7080/SIM7090 modules
  USB: serial: option: support dynamic Quectel USB compositions
  USB: serial: ftdi_sio: add IDs for Xsens Mti USB converter
  usb: core: fix slab-out-of-bounds Read in read_descriptors
  phy: qcom-qmp: Use correct values for ipq8074 PCIe Gen2 PHY init
  staging: greybus: audio: fix uninitialized value issue
  video: fbdev: fix OOB read in vga_8planes_imageblit()
  ARM: dts: vfxxx: Add syscon compatible with OCOTP
  debugfs: Fix module state check condition
  KVM: fix memory leak in kvm_io_bus_unregister_dev()
  KVM: arm64: Do not try to map PUDs when they are folded into PMD
  KVM: VMX: Don't freeze guest when event delivery causes an APIC-access exit
  vgacon: remove software scrollback support
  fbcon: remove now unusued 'softback_lines' cursor() argument
  fbcon: remove soft scrollback code
  RDMA/mlx4: Read pkey table length instead of hardcoded value
  RDMA/rxe: Fix the parent sysfs read when the interface has 15 chars
  rbd: require global CAP_SYS_ADMIN for mapping and unmapping
  mmc: sdhci-of-esdhc: Don't walk device-tree on every interrupt
  mmc: sdio: Use mmc_pre_req() / mmc_post_req()
  drm/msm: Disable preemption on all 5xx targets
  drm/tve200: Stabilize enable/disable
  drm/i915/gvt: do not check len & max_len for lri
  scsi: target: iscsi: Fix hang in iscsit_access_np() when getting tpg->np_login_sem
  scsi: target: iscsi: Fix data digest calculation
  regulator: core: Fix slab-out-of-bounds in regulator_unlock_recursive()
  regulator: plug of_node leak in regulator_register()'s error path
  regulator: push allocation in set_consumer_device_supply() out of lock
  regulator: push allocations in create_regulator() outside of lock
  regulator: push allocation in regulator_init_coupling() outside of lock
  kobject: Restore old behaviour of kobject_del(NULL)
  btrfs: fix wrong address when faulting in pages in the search ioctl
  btrfs: fix lockdep splat in add_missing_dev
  btrfs: require only sector size alignment for parent eb bytenr
  staging: wlan-ng: fix out of bounds read in prism2sta_probe_usb()
  iio:accel:mma8452: Fix timestamp alignment and prevent data leak.
  iio:accel:mma7455: Fix timestamp alignment and prevent data leak.
  iio: accel: kxsd9: Fix alignment of local buffer.
  iio:chemical:ccs811: Fix timestamp alignment and prevent data leak.
  iio:light:max44000 Fix timestamp alignment and prevent data leak.
  iio:magnetometer:ak8975 Fix alignment and data leak issues.
  iio:adc:ti-adc081c Fix alignment and data leak issues
  iio:adc:max1118 Fix alignment of timestamp and data leak issues
  iio:adc:ina2xx Fix timestamp alignment issue.
  iio:adc:ti-adc084s021 Fix alignment and data leak issues.
  iio:accel:bmc150-accel: Fix timestamp alignment and prevent data leak.
  iio:proximity:mb1232: Fix timestamp alignment and prevent data leak.
  iio:light:ltr501 Fix timestamp alignment issue.
  iio: cros_ec: Set Gyroscope default frequency to 25Hz
  iio: adc: ti-ads1015: fix conversion when CONFIG_PM is not set
  gcov: Disable gcov build with GCC 10
  iommu/amd: Do not use IOMMUv2 functionality when SME is active
  drm/amdgpu: Fix bug in reporting voltage for CIK
  ALSA: hda: fix a runtime pm issue in SOF when integrated GPU is disabled
  ALSA: hda: hdmi - add Rocketlake support
  arm64/module: set trampoline section flags regardless of CONFIG_DYNAMIC_FTRACE
  cpufreq: intel_pstate: Fix intel_pstate_get_hwp_max() for turbo disabled
  cpufreq: intel_pstate: Refuse to turn off with HWP enabled
  ARC: [plat-hsdk]: Switch ethernet phy-mode to rgmii-id
  HID: elan: Fix memleak in elan_input_configured
  drivers/net/wan/hdlc_cisco: Add hard_header_len
  HID: microsoft: Add rumble support for the 8bitdo SN30 Pro+ controller
  HID: quirks: Set INCREMENT_USAGE_ON_DUPLICATE for all Saitek X52 devices
  nvme-pci: cancel nvme device request before disabling
  nvme-rdma: fix reset hang if controller died in the middle of a reset
  nvme-rdma: fix timeout handler
  nvme-rdma: serialize controller teardown sequences
  nvme-tcp: fix reset hang if controller died in the middle of a reset
  nvme-tcp: fix timeout handler
  nvme-tcp: serialize controller teardown sequences
  nvme: have nvme_wait_freeze_timeout return if it timed out
  nvme-fabrics: don't check state NVME_CTRL_NEW for request acceptance
  nvmet-tcp: Fix NULL dereference when a connect data comes in h2cdata pdu
  irqchip/eznps: Fix build error for !ARC700 builds
  xfs: initialize the shortform attr header padding entry
  cfg80211: Adjust 6 GHz frequency to channel conversion
  drivers/net/wan/lapbether: Set network_header before transmitting
  xfs: fix off-by-one in inode alloc block reservation calculation
  net: hns3: Fix for geneve tx checksum bug
  drivers/dma/dma-jz4780: Fix race condition between probe and irq handler
  ALSA: hda/tegra: Program WAKEEN register for Tegra
  ALSA: hda: Fix 2 channel swapping for Tegra
  firestream: Fix memleak in fs_open
  NFC: st95hf: Fix memleak in st95hf_in_send_cmd
  drivers/net/wan/lapbether: Added needed_tailroom
  netfilter: conntrack: allow sctp hearbeat after connection re-use
  dmaengine: acpi: Put the CSRT table after using it
  ARC: HSDK: wireup perf irq
  arm64: dts: ns2: Fixed QSPI compatible string
  ARM: dts: BCM5301X: Fixed QSPI compatible string
  ARM: dts: NSP: Fixed QSPI compatible string
  ARM: dts: bcm: HR2: Fixed QSPI compatible string
  IB/isert: Fix unaligned immediate-data handling
  block: Set same_page to false in __bio_try_merge_page if ret is false
  spi: stm32: fix pm_runtime_get_sync() error checking
  nvme-fabrics: allow to queue requests for live queues
  spi: stm32: Rate-limit the 'Communication suspended' message
  mmc: sdhci-msm: Add retries when all tuning phases are found valid
  mmc: sdhci-acpi: Clear amd_sdhci_host on reset
  drm/sun4i: backend: Disable alpha on the lowest plane on the A20
  drm/sun4i: backend: Support alpha property on lowest plane
  soundwire: fix double free of dangling pointer
  scsi: mpt3sas: Don't call disable_irq from IRQ poll handler
  scsi: megaraid_sas: Don't call disable_irq from process IRQ poll
  RDMA/core: Fix reported speed and width
  scsi: libsas: Set data_dir as DMA_NONE if libata marks qc as NODATA
  iio: adc: mcp3422: fix locking scope
  iio: adc: mcp3422: fix locking on error path
  drm/sun4i: Fix dsi dcs long write function
  arm64: dts: imx8mq: Fix TMU interrupt property
  drm/sun4i: add missing put_device() call in sun8i_r40_tcon_tv_set_mux()
  RDMA/bnxt_re: Do not report transparent vlan from QP1
  RDMA/rxe: Fix panic when calling kmem_cache_create()
  RDMA/rxe: Drop pointless checks in rxe_init_ports
  RDMA/rxe: Fix memleak in rxe_mem_init_user
  ARM: dts: imx7ulp: Correct gpio ranges
  ARM: dts: ls1021a: fix QuadSPI-memory reg range
  selftests/timers: Turn off timeout setting
  ARM: dts: socfpga: fix register entry for timer3 on Arria10
  regulator: remove superfluous lock in regulator_resolve_coupling()
  regulator: push allocation in regulator_ena_gpio_request() out of lock
  ARM: dts: logicpd-som-lv-baseboard: Fix missing video
  ARM: dts: logicpd-som-lv-baseboard: Fix broken audio
  ARM: dts: logicpd-torpedo-baseboard: Fix broken audio
  Linux 5.4.65
  net: disable netpoll on fresh napis
  tipc: fix shutdown() of connectionless socket
  taprio: Fix using wrong queues in gate mask
  sctp: not disable bh in the whole sctp_get_port_local()
  net: usb: dm9601: Add USB ID of Keenetic Plus DSL
  netlabel: fix problems with mapping removal
  ipv6: Fix sysctl max for fib_multipath_hash_policy
  ipv4: Silence suspicious RCU usage warning
  Linux 5.4.64
  net: usb: Fix uninit-was-stored issue in asix_read_phy_addr()
  cfg80211: regulatory: reject invalid hints
  mm/khugepaged.c: fix khugepaged's request size in collapse_file
  mm/hugetlb: fix a race between hugetlb sysctl handlers
  checkpatch: fix the usage of capture group ( ... )
  sdhci: tegra: Add missing TMCLK for data timeout
  perf record: Correct the help info of option "--no-bpf-event"
  vfio/pci: Fix SR-IOV VF handling with MMIO blocking
  mm: madvise: fix vma user-after-free
  mm: slub: fix conversion of freelist_corrupted()
  dm thin metadata: Fix use-after-free in dm_bm_set_read_only
  dm thin metadata: Avoid returning cmd->bm wild pointer on error
  dm cache metadata: Avoid returning cmd->bm wild pointer on error
  dm crypt: Initialize crypto wait structures
  dm integrity: fix error reporting in bitmap mode after creation
  dm mpath: fix racey management of PG initialization
  dm writecache: handle DAX to partitions on persistent memory correctly
  drm/amd/pm: avoid false alarm due to confusing softwareshutdowntemp setting
  dmaengine: dw-edma: Fix scatter-gather address calculation
  blk-iocost: ioc_pd_free() shouldn't assume irq disabled
  libata: implement ATA_HORKAGE_MAX_TRIM_128M and apply to Sandisks
  block: ensure bdi->io_pages is always initialized
  block: allow for_each_bvec to support zero len bvec
  affs: fix basic permission bits to actually work
  media: rc: uevent sysfs file races with rc_unregister_device()
  media: rc: do not access device via sysfs after rc_unregister_device()
  mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers
  mmc: cqhci: Add cqhci_deactivate()
  mmc: dt-bindings: Add resets/reset-names for Mediatek MMC bindings
  mmc: mediatek: add optional module reset property
  arm64: dts: mt7622: add reset node for mmc device
  ALSA: hda/realtek - Improved routing for Thinkpad X1 7th/8th Gen
  ALSA: hda/realtek: Add quirk for Samsung Galaxy Book Ion NT950XCJ-X716A
  ALSA; firewire-tascam: exclude Tascam FE-8 from detection
  ALSA: hda - Fix silent audio output and corrupted input on MSI X570-A PRO
  ALSA: firewire-digi00x: exclude Avid Adrenaline from detection
  ALSA: hda/hdmi: always check pin power status in i915 pin fixup
  ALSA: pcm: oss: Remove superfluous WARN_ON() for mulaw sanity check
  ALSA: usb-audio: Add implicit feedback quirk for UR22C
  ALSA: ca0106: fix error code handling
  Revert "ALSA: hda: Add support for Loongson 7A1000 controller"
  Revert "net: dsa: microchip: set the correct number of ports"
  btrfs: fix potential deadlock in the search ioctl
  net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()
  btrfs: tree-checker: fix the error message for transid error
  btrfs: set the lockdep class for log tree extent buffers
  btrfs: set the correct lockdep class for new nodes
  btrfs: allocate scrub workqueues outside of locks
  btrfs: drop path before adding new uuid tree entry
  ARC: perf: don't bail setup if pct irq missing in device-tree
  xfs: don't update mtime on COW faults
  ext2: don't update mtime on COW faults
  tracing/kprobes, x86/ptrace: Fix regs argument order for i386
  iommu/vt-d: Handle 36bit addressing for x86-32
  vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
  vfio-pci: Fault mmaps to enable vma tracking
  vfio/type1: Support faulting PFNMAP vmas
  include/linux/log2.h: add missing () around n in roundup_pow_of_two()
  net/packet: fix overflow in tpacket_rcv
  iommu/amd: Restore IRTE.RemapEn bit after programming IRTE
  thermal: qcom-spmi-temp-alarm: Don't suppress negative temp
  thermal: ti-soc-thermal: Fix bogus thermal shutdowns for omap4430
  iommu/vt-d: Serialize IOMMU GCMD register modifications
  x86, fakenuma: Fix invalid starting node ID
  tg3: Fix soft lockup when tg3_reset_task() fails.
  perf jevents: Fix suspicious code in fixregex()
  xfs: fix xfs_bmap_validate_extent_raw when checking attr fork of rt files
  MIPS: add missing MSACSR and upper MSA initialization
  net: gemini: Fix another missing clk_disable_unprepare() in probe
  fix regression in "epoll: Keep a reference on files added to the check list"
  net: ethernet: mlx4: Fix memory allocation in mlx4_buddy_init()
  perf tools: Correct SNOOPX field offset
  cxgb4: fix thermal zone device registration
  nvme: fix controller instance leak
  nvmet-fc: Fix a missed _irqsave version of spin_lock in 'nvmet_fc_fod_op_done()'
  netfilter: nfnetlink: nfnetlink_unicast() reports EAGAIN instead of ENOBUFS
  net: dsa: mt7530: fix advertising unsupported 1000baseT_Half
  selftests/bpf: Fix massive output from test_maps
  media: cedrus: Add missing v4l2_ctrl_request_hdl_put()
  media: vicodec: add missing v4l2_ctrl_request_hdl_put()
  bnxt: don't enable NAPI until rings are ready
  xfs: fix boundary test in xfs_attr_shortform_verify
  bnxt_en: fix HWRM error when querying VF temperature
  bnxt_en: Fix possible crash in bnxt_fw_reset_task().
  bnxt_en: Fix PCI AER error recovery flow
  bnxt_en: Check for zero dir entries in NVRAM.
  bnxt_en: Don't query FW when netif_running() is false.
  net: ethernet: ti: cpsw: fix clean up of vlan mc entries for host port
  gtp: add GTPA_LINK info to msg sent to userspace
  dmaengine: pl330: Fix burst length if burst size is smaller than bus width
  net: arc_emac: Fix memleak in arc_mdio_probe
  ravb: Fixed to be able to unload modules
  net: systemport: Fix memleak in bcm_sysport_probe
  net: hns: Fix memleak in hns_nic_dev_probe
  netfilter: nf_tables: fix destination register zeroing
  netfilter: nf_tables: incorrect enum nft_list_attributes definition
  netfilter: nf_tables: add NFTA_SET_USERDATA if not null
  mmc: sdhci-acpi: Fix HS400 tuning for AMDI0040
  MIPS: BMIPS: Also call bmips_cpu_setup() for secondary cores
  MIPS: mm: BMIPS5000 has inclusive physical caches
  rxrpc: Make rxrpc_kernel_get_srtt() indicate validity
  rxrpc: Keep the ACK serial in a var in rxrpc_input_ack()
  dmaengine: at_hdmac: check return value of of_find_device_by_node() in at_dma_xlate()
  batman-adv: bla: use netif_rx_ni when not in interrupt context
  batman-adv: Fix own OGM check in aggregated OGMs
  batman-adv: Avoid uninitialized chaddr when handling DHCP
  dmaengine: of-dma: Fix of_dma_router_xlate's of_dma_xlate handling
  fsldma: fix very broken 32-bit ppc ioread64 functionality
  xen/xenbus: Fix granting of vmalloc'd memory
  drm/amd/display: Fix memleak in amdgpu_dm_mode_config_init
  drm/amd/display: Retry AUX write when fail occurs
  drivers: gpu: amd: Initialize amdgpu_dm_backlight_caps object to 0 in amdgpu_dm_update_backlight_caps
  drm/amd/display: Reject overlay plane configurations in multi-display scenarios
  s390: don't trace preemption in percpu macros
  nbd: restore default timeout when setting it to zero
  cpuidle: Fixup IRQ state
  drm/omap: fix incorrect lock state
  ceph: don't allow setlease on cephfs
  drm/msm/a6xx: fix gmu start on newer firmware
  habanalabs: check correct vmalloc return code
  habanalabs: validate FW file size
  drm/msm: enable vblank during atomic commits
  nvmet: Disable keep-alive timer when kato is cleared to 0h
  hwmon: (applesmc) check status earlier.
  drm/msm: add shutdown support for display platform_driver
  tty: serial: qcom_geni_serial: Drop __init from qcom_geni_console_setup
  drm/msm/dpu: Fix scale params in plane validation
  HID: quirks: Always poll three more Lenovo PixArt mice
  Linux 5.4.63
  scsi: target: tcmu: Optimize use of flush_dcache_page
  scsi: target: tcmu: Fix size in calls to tcmu_flush_dcache_range
  sdhci: tegra: Remove SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK for Tegra186
  sdhci: tegra: Remove SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK for Tegra210
  arm64: tegra: Add missing timeout clock to Tegra210 SDMMC
  arm64: tegra: Add missing timeout clock to Tegra186 SDMMC nodes
  arm64: tegra: Add missing timeout clock to Tegra194 SDMMC nodes
  dt-bindings: mmc: tegra: Add tmclk for Tegra210 and later
  KVM: arm64: Set HCR_EL2.PTW to prevent AT taking synchronous exception
  KVM: arm64: Survive synchronous exceptions caused by AT instructions
  KVM: arm64: Add kvm_extable for vaxorcism code
  drm/etnaviv: fix TS cache flushing on GPUs with BLT engine
  drm/sched: Fix passing zero to 'PTR_ERR' warning v2
  perf record/stat: Explicitly call out event modifiers in the documentation
  HID: core: Sanitize event code and type when mapping input
  HID: core: Correctly handle ReportSize being zero
  Linux 5.4.62
  io_uring: Fix NULL pointer dereference in io_sq_wq_submit_work()
  ALSA: usb-audio: Update documentation comment for MS2109 quirk
  HID: hiddev: Fix slab-out-of-bounds write in hiddev_ioctl_usage()
  kbuild: fix broken builds because of GZIP,BZIP2,LZOP variables
  kbuild: add variables for compression tools
  kheaders: explain why include/config/autoconf.h is excluded from md5sum
  kheaders: remove the last bashism to allow sh to run it
  kheaders: optimize header copy for in-tree builds
  kheaders: optimize md5sum calculation for in-tree builds
  kheaders: remove unneeded 'cat' command piped to 'head' / 'tail'
  fbmem: pull fbcon_update_vcs() out of fb_set_var()
  usb: dwc3: gadget: Handle ZLP for sg requests
  usb: dwc3: gadget: Fix handling ZLP
  usb: dwc3: gadget: Don't setup more than requested
  drm/i915: Fix cmd parser desc matching with masks
  usb: storage: Add unusual_uas entry for Sony PSZ drives
  USB: cdc-acm: rework notification_buffer resizing
  USB: gadget: u_f: Unbreak offset calculation in VLAs
  USB: gadget: f_ncm: add bounds checks to ncm_unwrap_ntb()
  USB: gadget: u_f: add overflow checks to VLA macros
  usb: host: ohci-exynos: Fix error handling in exynos_ohci_probe()
  USB: Ignore UAS for JMicron JMS567 ATA/ATAPI Bridge
  USB: quirks: Ignore duplicate endpoint on Sound Devices MixPre-D
  USB: quirks: Add no-lpm quirk for another Raydium touchscreen
  usb: uas: Add quirk for PNY Pro Elite
  USB: yurex: Fix bad gfp argument
  drm/amd/pm: correct the thermal alert temperature limit settings
  drm/amd/pm: correct Vega20 swctf limit setting
  drm/amd/pm: correct Vega12 swctf limit setting
  drm/amd/pm: correct Vega10 swctf limit setting
  drm/amd/powerplay: Fix hardmins not being sent to SMU for RV
  drm/amdgpu/gfx10: refine mgcg setting
  drm/amdgpu: Fix buffer overflow in INFO ioctl
  x86/hotplug: Silence APIC only after all interrupts are migrated
  irqchip/stm32-exti: Avoid losing interrupts due to clearing pending bits by mistake
  genirq/matrix: Deal with the sillyness of for_each_cpu() on UP
  crypto: af_alg - Work around empty control messages without MSG_MORE
  device property: Fix the secondary firmware node handling in set_primary_fwnode()
  powerpc/perf: Fix crashes with generic_compat_pmu & BHRB
  PM: sleep: core: Fix the handling of pending runtime resume requests
  arm64: vdso32: make vdso32 install conditional
  xhci: Always restore EP_SOFT_CLEAR_TOGGLE even if ep reset failed
  xhci: Do warm-reset when both CAS and XDEV_RESUME are set
  usb: host: xhci: fix ep context print mismatch in debugfs
  XEN uses irqdesc::irq_data_common::handler_data to store a per interrupt XEN data pointer which contains XEN specific information.
  writeback: Fix sync livelock due to b_dirty_time processing
  writeback: Avoid skipping inode writeback
  writeback: Protect inode->i_io_list with inode->i_lock
  serial: 8250: change lock order in serial8250_do_startup()
  serial: 8250_exar: Fix number of ports for Commtech PCIe cards
  serial: stm32: avoid kernel warning on absence of optional IRQ
  serial: pl011: Don't leak amba_ports entry on driver register error
  serial: pl011: Fix oops on -EPROBE_DEFER
  serial: samsung: Removes the IRQ not found warning
  vt_ioctl: change VT_RESIZEX ioctl to check for error return from vc_resize()
  vt: defer kfree() of vc_screenbuf in vc_do_resize()
  USB: lvtest: return proper error code in probe
  fbcon: prevent user font height or width change from causing potential out-of-bounds access
  btrfs: detect nocow for swap after snapshot delete
  btrfs: fix space cache memory leak after transaction abort
  btrfs: check the right error variable in btrfs_del_dir_entries_in_log
  btrfs: reset compression level for lzo on remount
  blk-mq: order adding requests to hctx->dispatch and checking SCHED_RESTART
  HID: i2c-hid: Always sleep 60ms after I2C_HID_PWR_ON commands
  block: loop: set discard granularity and alignment for block device backed loop
  block: fix get_max_io_size()
  arm64: Allow booting of late CPUs affected by erratum 1418040
  arm64: Move handling of erratum 1418040 into C code
  powerpc/perf: Fix soft lockups due to missed interrupt accounting
  net: gianfar: Add of_node_put() before goto statement
  macvlan: validate setting of multiple remote source MAC addresses
  Revert "scsi: qla2xxx: Fix crash on qla2x00_mailbox_command"
  scsi: qla2xxx: Fix null pointer access during disconnect from subsystem
  scsi: qla2xxx: Check if FW supports MQ before enabling
  scsi: qla2xxx: Fix login timeout
  scsi: ufs: Clean up completed request without interrupt notification
  scsi: ufs: Improve interrupt handling for shared interrupts
  scsi: ufs: Fix possible infinite loop in ufshcd_hold
  scsi: fcoe: Fix I/O path allocation
  selftests: disable rp_filter for icmp_redirect.sh
  ASoC: wm8994: Avoid attempts to read unreadable registers
  s390/cio: add cond_resched() in the slow_eval_known_fn() loop
  ALSA: hda/realtek: Add model alc298-samsung-headphone
  can: j1939: transport: j1939_xtp_rx_dat_one(): compare own packets to detect corruptions
  netfilter: avoid ipv6 -> nf_defrag_ipv6 module dependency
  drm/amd/display: Switch to immediate mode for updating infopackets
  drm/amd/powerplay: correct UVD/VCE PG state on custom pptable uploading
  drm/amd/powerplay: correct Vega20 cached smu feature state
  spi: stm32: always perform registers configuration prior to transfer
  spi: stm32: fix stm32_spi_prepare_mbr in case of odd clk_rate
  spi: stm32: fix fifo threshold level in case of short transfer
  spi: stm32h7: fix race condition at end of transfer
  fs: prevent BUG_ON in submit_bh_wbc()
  ext4: correctly restore system zone info when remount fails
  ext4: handle error of ext4_setup_system_zone() on remount
  ext4: handle option set by mount flags correctly
  jbd2: abort journal if free a async write error metadata buffer
  ext4: handle read only external journal device
  ext4: don't BUG on inconsistent journal feature
  jbd2: make sure jh have b_transaction set in refile/unfile_buffer
  spi: stm32: clear only asserted irq flags on interrupt
  usb: gadget: f_tcm: Fix some resource leaks in some error paths
  i2c: rcar: in slave mode, clear NACK earlier
  i2c: core: Don't fail PRP0001 enumeration when no ID table exist
  null_blk: fix passing of REQ_FUA flag in null_handle_rq
  nvme: multipath: round-robin: fix single non-optimized path case
  nvme-fc: Fix wrong return value in __nvme_fc_init_request()
  blkcg: fix memleak for iolatency
  blk-mq: insert request not through ->queue_rq into sw/scheduler queue
  hwmon: (nct7904) Correct divide by 0
  bfq: fix blkio cgroup leakage v4
  block: Fix page_is_mergeable() for compound pages
  drm/msm/adreno: fix updating ring fence
  block: virtio_blk: fix handling single range discard request
  block: respect queue limit of max discard segment
  media: gpio-ir-tx: improve precision of transmitted signal due to scheduling
  ALSA: usb-audio: Add capture support for Saffire 6 (USB 1.1)
  cpufreq: intel_pstate: Fix EPP setting via sysfs in active mode
  PCI: qcom: Add missing reset for ipq806x
  PCI: qcom: Change duplicate PCI reset to phy reset
  PCI: qcom: Add missing ipq806x clocks in PCIe driver
  EDAC/{i7core,sb,pnd2,skx}: Fix error event severity
  EDAC: skx_common: get rid of unused type var
  EDAC: sb_edac: get rid of unused vars
  mm/vunmap: add cond_resched() in vunmap_pmd_range
  drm/amd/display: Fix dmesg warning from setting abm level
  drm/amd/display: Add additional config guards for DCN
  drm/amd/display: Trigger modesets on MST DSC connectors
  drm/ingenic: Fix incorrect assumption about plane->index
  gpu/drm: ingenic: Use the plane's src_[x,y] to configure DMA length
  cma: don't quit at first error when activating reserved areas
  mm/cma.c: switch to bitmap_zalloc() for cma bitmap allocation
  mm: fix kthread_use_mm() vs TLB invalidate
  mm/shuffle: don't move pages between zones and don't read garbage memmaps
  btrfs: only commit delayed items at fsync if we are logging a directory
  btrfs: only commit the delayed inode when doing a full fsync
  btrfs: factor out inode items copy loop from btrfs_log_inode()
  s390/numa: set node distance to LOCAL_DISTANCE
  drm/xen-front: Fix misused IS_ERR_OR_NULL checks
  drm/xen: fix passing zero to 'PTR_ERR' warning
  PM / devfreq: rk3399_dmc: Fix kernel oops when rockchip,pmu is absent
  PM / devfreq: rk3399_dmc: Disable devfreq-event device when fails
  PM / devfreq: rk3399_dmc: Add missing of_node_put()
  usb: cdns3: gadget: always zeroed TRB buffer when enable endpoint
  sched/uclamp: Fix a deadlock when enabling uclamp static key
  sched/uclamp: Protect uclamp fast path code with static key
  Revert "ath10k: fix DMA related firmware crashes on multiple devices"
  arm64: Fix __cpu_logical_map undefined issue
  efi: provide empty efi_enter_virtual_mode implementation
  brcmfmac: Set timeout value when configuring power save
  USB: sisusbvga: Fix a potential UB casued by left shifting a negative value
  powerpc/spufs: add CONFIG_COREDUMP dependency
  KVM: arm64: Fix symbol dependency in __hyp_call_panic_nvhe
  media: davinci: vpif_capture: fix potential double free
  hugetlbfs: prevent filesystem stacking of hugetlbfs
  EDAC/ie31200: Fallback if host bridge device is already initialized
  scsi: fcoe: Memory leak fix in fcoe_sysfs_fcf_del()
  ceph: do not access the kiocb after aio requests
  ceph: fix potential mdsc use-after-free crash
  scsi: iscsi: Do not put host in iscsi_set_flashnode_param()
  btrfs: make btrfs_qgroup_check_reserved_leak take btrfs_inode
  btrfs: file: reserve qgroup space after the hole punch range is locked
  locking/lockdep: Fix overflow in presentation of average lock-time
  drm/nouveau: Fix reference count leak in nouveau_connector_detect
  drm/nouveau: fix reference count leak in nv50_disp_atomic_commit
  drm/nouveau/drm/noveau: fix reference count leak in nouveau_fbcon_open
  f2fs: fix use-after-free issue
  HID: quirks: add NOGET quirk for Logitech GROUP
  cec-api: prevent leaking memory through hole in structure
  ALSA: hda: Add support for Loongson 7A1000 controller
  mips/vdso: Fix resource leaks in genvdso.c
  rtlwifi: rtl8192cu: Prevent leaking urb
  ARM: dts: ls1021a: output PPS signal on FIPER2
  PCI: Fix pci_create_slot() reference count leak
  omapfb: fix multiple reference count leaks due to pm_runtime_get_sync
  f2fs: fix error path in do_recover_data()
  selftests/powerpc: Purge extra count_pmc() calls of ebb selftests
  scsi: target: Fix xcopy sess release leak
  xfs: Don't allow logging of XFS_ISTALE inodes
  scsi: lpfc: Fix shost refcount mismatch when deleting vport
  drm/amdgpu/display: fix ref count leak when pm_runtime_get_sync fails
  drm/amdgpu: fix ref count leak in amdgpu_display_crtc_set_config
  drm/amd/display: fix ref count leak in amdgpu_drm_ioctl
  drm/amdgpu: fix ref count leak in amdgpu_driver_open_kms
  drm/radeon: fix multiple reference count leak
  drm/amdkfd: Fix reference count leaks.
  iommu/iova: Don't BUG on invalid PFNs
  mfd: intel-lpss: Add Intel Tiger Lake PCH-H PCI IDs
  scsi: target: tcmu: Fix crash on ARM during cmd completion
  blktrace: ensure our debugfs dir exists
  media: pci: ttpci: av7110: fix possible buffer overflow caused by bad DMA value in debiirq()
  powerpc/xive: Ignore kmemleak false positives
  arm64: dts: qcom: msm8916: Pull down PDM GPIOs during sleep
  mfd: intel-lpss: Add Intel Emmitsburg PCH PCI IDs
  ASoC: tegra: Fix reference count leaks.
  ASoC: img-parallel-out: Fix a reference count leak
  ASoC: img: Fix a reference count leak in img_i2s_in_set_fmt
  ALSA: hda/hdmi: Use force connectivity quirk on another HP desktop
  ALSA: hda/realtek: Fix pin default on Intel NUC 8 Rugged
  ALSA: pci: delete repeated words in comments
  ALSA: hda/hdmi: Add quirk to force connectivity
  ipvlan: fix device features
  net/sched: act_ct: Fix skb double-free in tcf_ct_handle_fragments() error flow
  net: ena: Make missed_tx stat incremental
  tipc: fix uninit skb->data in tipc_nl_compat_dumpit()
  net/smc: Prevent kernel-infoleak in __smc_diag_dump()
  net: sctp: Fix negotiation of the number of data streams.
  net: qrtr: fix usage of idr in port assignment to socket
  net: nexthop: don't allow empty NHA_GROUP
  net: Fix potential wrong skb->protocol in skb_vlan_untag()
  gre6: Fix reception with IP6_TNL_F_RCV_DSCP_COPY
  binfmt_flat: revert "binfmt_flat: don't offset the data start"
  powerpc/64s: Don't init FSCR_DSCR in __init_FSCR()

 Conflicts:
	Documentation/devicetree/bindings
	Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt
	Documentation/devicetree/bindings/mmc/mtk-sd.txt
	Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
	Documentation/devicetree/bindings/net/can/tcan4x5x.txt
	Documentation/devicetree/bindings/net/nfc/nxp-nci.txt
	Documentation/devicetree/bindings/net/nfc/pn544.txt
	Documentation/devicetree/bindings/sound/wm8994.txt
	block/bio.c
	block/blk-mq-sysfs.c
	block/blk-sysfs.c
	drivers/hid/hid-quirks.c
	drivers/mailbox/mailbox.c
	drivers/mmc/host/cqhci.c
	drivers/scsi/ufs/ufshcd.c
	drivers/soc/qcom/smp2p.c
	drivers/usb/dwc3/core.c
	drivers/usb/dwc3/core.h
	drivers/usb/dwc3/ep0.c
	drivers/usb/dwc3/gadget.c
	drivers/usb/dwc3/gadget.h
	drivers/usb/gadget/function/f_fs.c
	drivers/usb/host/xhci.c
	kernel/sched/cpufreq_schedutil.c
	mm/cma.c
	mm/madvise.c
	mm/memory.c
	mm/page_alloc.c
	net/qrtr/qrtr.c

Change-Id: I5d64dc5428045e92d5d1ce73dd55f78b36e83371
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
2021-04-22 09:44:51 +05:30

4576 lines
130 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/vmscan.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Swap reorganised 29.12.95, Stephen Tweedie.
* kswapd added: 7.1.96 sct
* Removed kswapd_ctl limits, and swap out as many pages as needed
* to bring the system back to freepages.high: 2.4.97, Rik van Riel.
* Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com).
* Multiqueue VM started 5.8.00, Rik van Riel.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/mm.h>
#include <linux/sched/mm.h>
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/vmpressure.h>
#include <linux/vmstat.h>
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h> /* for try_to_release_page(),
buffer_heads_over_limit */
#include <linux/mm_inline.h>
#include <linux/backing-dev.h>
#include <linux/rmap.h>
#include <linux/topology.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
#include <linux/sysctl.h>
#include <linux/oom.h>
#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
#include <linux/dax.h>
#include <linux/psi.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
#include <linux/swapops.h>
#include <linux/balloon_compaction.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/vmscan.h>
struct scan_control {
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
/*
* Nodemask of nodes allowed by the caller. If NULL, all nodes
* are scanned.
*/
nodemask_t *nodemask;
/*
* The memory cgroup that hit its limit and as a result is the
* primary target of this reclaim invocation.
*/
struct mem_cgroup *target_mem_cgroup;
/* Writepage batching in laptop mode; RECLAIM_WRITE */
unsigned int may_writepage:1;
/* Can mapped pages be reclaimed? */
unsigned int may_unmap:1;
/* Can pages be swapped as part of reclaim? */
unsigned int may_swap:1;
/*
* Cgroups are not reclaimed below their configured memory.low,
* unless we threaten to OOM. If any cgroups are skipped due to
* memory.low and nothing was reclaimed, go back for memory.low.
*/
unsigned int memcg_low_reclaim:1;
unsigned int memcg_low_skipped:1;
unsigned int hibernation_mode:1;
/* One of the zones is ready for compaction */
unsigned int compaction_ready:1;
/* Allocation order */
s8 order;
/* Scan (total_size >> priority) pages at once */
s8 priority;
/* The highest zone to isolate pages for reclaim from */
s8 reclaim_idx;
/* This context's GFP mask */
gfp_t gfp_mask;
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
/* Number of pages freed so far during a call to shrink_zones() */
unsigned long nr_reclaimed;
struct {
unsigned int dirty;
unsigned int unqueued_dirty;
unsigned int congested;
unsigned int writeback;
unsigned int immediate;
unsigned int file_taken;
unsigned int taken;
} nr;
/* for recording the reclaimed slab by now */
struct reclaim_state reclaim_state;
/*
* Reclaim pages from a vma. If the page is shared by other tasks
* it is zapped from a vma without reclaim so it ends up remaining
* on memory until last task zap it.
*/
struct vm_area_struct *target_vma;
};
/*
* Number of active kswapd threads
*/
#define DEF_KSWAPD_THREADS_PER_NODE 1
int kswapd_threads = DEF_KSWAPD_THREADS_PER_NODE;
int kswapd_threads_current = DEF_KSWAPD_THREADS_PER_NODE;
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetch(&prev->_field); \
} \
} while (0)
#else
#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
#ifdef ARCH_HAS_PREFETCHW
#define prefetchw_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetchw(&prev->_field); \
} \
} while (0)
#else
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
/*
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
/*
* The total number of pages which are beyond the high watermark within all
* zones.
*/
unsigned long vm_total_pages;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)
{
/* Check for an overwrite */
WARN_ON_ONCE(rs && task->reclaim_state);
/* Check for the nulling of an already-nulled member */
WARN_ON_ONCE(!rs && !task->reclaim_state);
task->reclaim_state = rs;
}
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_MEMCG
/*
* We allow subsystems to populate their shrinker-related
* LRU lists before register_shrinker_prepared() is called
* for the shrinker, since we don't want to impose
* restrictions on their internal registration order.
* In this case shrink_slab_memcg() may find corresponding
* bit is set in the shrinkers map.
*
* This value is used by the function to detect registering
* shrinkers and to skip do_shrink_slab() calls for them.
*/
#define SHRINKER_REGISTERING ((struct shrinker *)~0UL)
static DEFINE_IDR(shrinker_idr);
static int shrinker_nr_max;
static int prealloc_memcg_shrinker(struct shrinker *shrinker)
{
int id, ret = -ENOMEM;
down_write(&shrinker_rwsem);
/* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, SHRINKER_REGISTERING, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
if (id >= shrinker_nr_max) {
if (memcg_expand_shrinker_maps(id)) {
idr_remove(&shrinker_idr, id);
goto unlock;
}
shrinker_nr_max = id + 1;
}
shrinker->id = id;
ret = 0;
unlock:
up_write(&shrinker_rwsem);
return ret;
}
static void unregister_memcg_shrinker(struct shrinker *shrinker)
{
int id = shrinker->id;
BUG_ON(id < 0);
down_write(&shrinker_rwsem);
idr_remove(&shrinker_idr, id);
up_write(&shrinker_rwsem);
}
static bool global_reclaim(struct scan_control *sc)
{
return !sc->target_mem_cgroup;
}
/**
* sane_reclaim - is the usual dirty throttling mechanism operational?
* @sc: scan_control in question
*
* The normal page dirty throttling mechanism in balance_dirty_pages() is
* completely broken with the legacy memcg and direct stalling in
* shrink_page_list() is used for throttling instead, which lacks all the
* niceties such as fairness, adaptive pausing, bandwidth proportional
* allocation and configurability.
*
* This function tests whether the vmscan currently in progress can assume
* that the normal dirty throttling mechanism is operational.
*/
static bool sane_reclaim(struct scan_control *sc)
{
struct mem_cgroup *memcg = sc->target_mem_cgroup;
if (!memcg)
return true;
#ifdef CONFIG_CGROUP_WRITEBACK
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
return true;
#endif
return false;
}
static void set_memcg_congestion(pg_data_t *pgdat,
struct mem_cgroup *memcg,
bool congested)
{
struct mem_cgroup_per_node *mn;
if (!memcg)
return;
mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
WRITE_ONCE(mn->congested, congested);
}
static bool memcg_congested(pg_data_t *pgdat,
struct mem_cgroup *memcg)
{
struct mem_cgroup_per_node *mn;
mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
return READ_ONCE(mn->congested);
}
#else
static int prealloc_memcg_shrinker(struct shrinker *shrinker)
{
return 0;
}
static void unregister_memcg_shrinker(struct shrinker *shrinker)
{
}
static bool global_reclaim(struct scan_control *sc)
{
return true;
}
static bool sane_reclaim(struct scan_control *sc)
{
return true;
}
static inline void set_memcg_congestion(struct pglist_data *pgdat,
struct mem_cgroup *memcg, bool congested)
{
}
static inline bool memcg_congested(struct pglist_data *pgdat,
struct mem_cgroup *memcg)
{
return false;
}
#endif
/*
* This misses isolated pages which are not accounted for to save counters.
* As the data only determines if reclaim or compaction continues, it is
* not expected that isolated pages will be a dominating factor.
*/
unsigned long zone_reclaimable_pages(struct zone *zone)
{
unsigned long nr;
nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
if (get_nr_swap_pages() > 0)
nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
return nr;
}
/**
* lruvec_lru_size - Returns the number of pages on the given LRU list.
* @lruvec: lru vector
* @lru: lru to use
* @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list)
*/
unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
{
unsigned long lru_size = 0;
int zid;
if (!mem_cgroup_disabled()) {
for (zid = 0; zid < MAX_NR_ZONES; zid++)
lru_size += mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
} else
lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
unsigned long size;
if (!managed_zone(zone))
continue;
if (!mem_cgroup_disabled())
size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
else
size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid],
NR_ZONE_LRU_BASE + lru);
lru_size -= min(size, lru_size);
}
return lru_size;
}
/*
* Add a shrinker callback to be called from the vm.
*/
int prealloc_shrinker(struct shrinker *shrinker)
{
unsigned int size = sizeof(*shrinker->nr_deferred);
if (shrinker->flags & SHRINKER_NUMA_AWARE)
size *= nr_node_ids;
shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
if (!shrinker->nr_deferred)
return -ENOMEM;
if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
if (prealloc_memcg_shrinker(shrinker))
goto free_deferred;
}
return 0;
free_deferred:
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;
return -ENOMEM;
}
void free_prealloced_shrinker(struct shrinker *shrinker)
{
if (!shrinker->nr_deferred)
return;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;
}
void register_shrinker_prepared(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
#ifdef CONFIG_MEMCG
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
idr_replace(&shrinker_idr, shrinker, shrinker->id);
#endif
up_write(&shrinker_rwsem);
}
int register_shrinker(struct shrinker *shrinker)
{
int err = prealloc_shrinker(shrinker);
if (err)
return err;
register_shrinker_prepared(shrinker);
return 0;
}
EXPORT_SYMBOL(register_shrinker);
/*
* Remove one
*/
void unregister_shrinker(struct shrinker *shrinker)
{
if (!shrinker->nr_deferred)
return;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
up_write(&shrinker_rwsem);
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;
}
EXPORT_SYMBOL(unregister_shrinker);
#define SHRINK_BATCH 128
static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
struct shrinker *shrinker, int priority)
{
unsigned long freed = 0;
unsigned long long delta;
long total_scan;
long freeable;
long nr;
long new_nr;
int nid = shrinkctl->nid;
long batch_size = shrinker->batch ? shrinker->batch
: SHRINK_BATCH;
long scanned = 0, next_deferred;
long min_cache_size = batch_size;
if (current_is_kswapd())
min_cache_size = 0;
if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
nid = 0;
freeable = shrinker->count_objects(shrinker, shrinkctl);
if (freeable == 0 || freeable == SHRINK_EMPTY)
return freeable;
/*
* copy the current shrinker scan count into a local variable
* and zero it so that other concurrent shrinker invocations
* don't also do this scanning work.
*/
nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
total_scan = nr;
if (shrinker->seeks) {
delta = freeable >> priority;
delta *= 4;
do_div(delta, shrinker->seeks);
} else {
/*
* These objects don't require any IO to create. Trim
* them aggressively under memory pressure to keep
* them from causing refetches in the IO caches.
*/
delta = freeable / 2;
}
total_scan += delta;
if (total_scan < 0) {
pr_err("shrink_slab: %pS negative objects to delete nr=%ld\n",
shrinker->scan_objects, total_scan);
total_scan = freeable;
next_deferred = nr;
} else
next_deferred = total_scan;
/*
* We need to avoid excessive windup on filesystem shrinkers
* due to large numbers of GFP_NOFS allocations causing the
* shrinkers to return -1 all the time. This results in a large
* nr being built up so when a shrink that can do some work
* comes along it empties the entire cache due to nr >>>
* freeable. This is bad for sustaining a working set in
* memory.
*
* Hence only allow the shrinker to scan the entire cache when
* a large delta change is calculated directly.
*/
if (delta < freeable / 4)
total_scan = min(total_scan, freeable / 2);
/*
* Avoid risking looping forever due to too large nr value:
* never try to free more than twice the estimate number of
* freeable entries.
*/
if (total_scan > freeable * 2)
total_scan = freeable * 2;
trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
freeable, delta, total_scan, priority);
/*
* Normally, we should not scan less than batch_size objects in one
* pass to avoid too frequent shrinker calls, but if the slab has less
* than batch_size objects in total and we are really tight on memory,
* we will try to reclaim all available objects, otherwise we can end
* up failing allocations although there are plenty of reclaimable
* objects spread over several slabs with usage less than the
* batch_size.
*
* We detect the "tight on memory" situations by looking at the total
* number of objects we want to scan (total_scan). If it is greater
* than the total number of objects on slab (freeable), we must be
* scanning at high prio and therefore should try to reclaim as much as
* possible.
*/
while (total_scan > min_cache_size ||
total_scan >= freeable) {
unsigned long ret;
unsigned long nr_to_scan = min(batch_size, total_scan);
shrinkctl->nr_to_scan = nr_to_scan;
shrinkctl->nr_scanned = nr_to_scan;
ret = shrinker->scan_objects(shrinker, shrinkctl);
if (ret == SHRINK_STOP)
break;
freed += ret;
count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned);
total_scan -= shrinkctl->nr_scanned;
scanned += shrinkctl->nr_scanned;
cond_resched();
}
if (next_deferred >= scanned)
next_deferred -= scanned;
else
next_deferred = 0;
/*
* move the unused scan count back into the shrinker in a
* manner that handles concurrent updates. If we exhausted the
* scan, there is no need to do an update.
*/
if (next_deferred > 0)
new_nr = atomic_long_add_return(next_deferred,
&shrinker->nr_deferred[nid]);
else
new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
trace_mm_shrink_slab_end(shrinker, nid, freed, nr, new_nr, total_scan);
return freed;
}
#ifdef CONFIG_MEMCG
static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg, int priority)
{
struct memcg_shrinker_map *map;
unsigned long ret, freed = 0;
int i;
if (!mem_cgroup_online(memcg))
return 0;
if (!down_read_trylock(&shrinker_rwsem))
return 0;
map = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_map,
true);
if (unlikely(!map))
goto unlock;
for_each_set_bit(i, map->map, shrinker_nr_max) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
.memcg = memcg,
};
struct shrinker *shrinker;
shrinker = idr_find(&shrinker_idr, i);
if (unlikely(!shrinker || shrinker == SHRINKER_REGISTERING)) {
if (!shrinker)
clear_bit(i, map->map);
continue;
}
/* Call non-slab shrinkers even though kmem is disabled */
if (!memcg_kmem_enabled() &&
!(shrinker->flags & SHRINKER_NONSLAB))
continue;
ret = do_shrink_slab(&sc, shrinker, priority);
if (ret == SHRINK_EMPTY) {
clear_bit(i, map->map);
/*
* After the shrinker reported that it had no objects to
* free, but before we cleared the corresponding bit in
* the memcg shrinker map, a new object might have been
* added. To make sure, we have the bit set in this
* case, we invoke the shrinker one more time and reset
* the bit if it reports that it is not empty anymore.
* The memory barrier here pairs with the barrier in
* memcg_set_shrinker_bit():
*
* list_lru_add() shrink_slab_memcg()
* list_add_tail() clear_bit()
* <MB> <MB>
* set_bit() do_shrink_slab()
*/
smp_mb__after_atomic();
ret = do_shrink_slab(&sc, shrinker, priority);
if (ret == SHRINK_EMPTY)
ret = 0;
else
memcg_set_shrinker_bit(memcg, nid, i);
}
freed += ret;
if (rwsem_is_contended(&shrinker_rwsem)) {
freed = freed ? : 1;
break;
}
}
unlock:
up_read(&shrinker_rwsem);
return freed;
}
#else /* CONFIG_MEMCG */
static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg, int priority)
{
return 0;
}
#endif /* CONFIG_MEMCG */
/**
* shrink_slab - shrink slab caches
* @gfp_mask: allocation context
* @nid: node whose slab caches to target
* @memcg: memory cgroup whose slab caches to target
* @priority: the reclaim priority
*
* Call the shrink functions to age shrinkable caches.
*
* @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
* unaware shrinkers will receive a node id of 0 instead.
*
* @memcg specifies the memory cgroup to target. Unaware shrinkers
* are called only if it is the root cgroup.
*
* @priority is sc->priority, we take the number of objects and >> by priority
* in order to get the scan target.
*
* Returns the number of reclaimed slab objects.
*/
static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg,
int priority)
{
unsigned long ret, freed = 0;
struct shrinker *shrinker;
/*
* The root memcg might be allocated even though memcg is disabled
* via "cgroup_disable=memory" boot parameter. This could make
* mem_cgroup_is_root() return false, then just run memcg slab
* shrink, but skip global shrink. This may result in premature
* oom.
*/
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
if (!down_read_trylock(&shrinker_rwsem))
goto out;
list_for_each_entry(shrinker, &shrinker_list, list) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
.memcg = memcg,
};
ret = do_shrink_slab(&sc, shrinker, priority);
if (ret == SHRINK_EMPTY)
ret = 0;
freed += ret;
/*
* Bail out if someone want to register a new shrinker to
* prevent the regsitration from being stalled for long periods
* by parallel ongoing shrinking.
*/
if (rwsem_is_contended(&shrinker_rwsem)) {
freed = freed ? : 1;
break;
}
}
up_read(&shrinker_rwsem);
out:
cond_resched();
return freed;
}
void drop_slab_node(int nid)
{
unsigned long freed;
do {
struct mem_cgroup *memcg = NULL;
freed = 0;
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
} while (freed > 10);
}
void drop_slab(void)
{
int nid;
for_each_online_node(nid)
drop_slab_node(nid);
}
static inline int is_page_cache_freeable(struct page *page)
{
/*
* A freeable page cache page is referenced only by the caller
* that isolated the page, the page cache and optional buffer
* heads at page->private.
*/
int page_cache_pins = PageTransHuge(page) && PageSwapCache(page) ?
HPAGE_PMD_NR : 1;
return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
}
static int may_write_to_inode(struct inode *inode, struct scan_control *sc)
{
if (current->flags & PF_SWAPWRITE)
return 1;
if (!inode_write_congested(inode))
return 1;
if (inode_to_bdi(inode) == current->backing_dev_info)
return 1;
return 0;
}
/*
* We detected a synchronous write error writing a page out. Probably
* -ENOSPC. We need to propagate that into the address_space for a subsequent
* fsync(), msync() or close().
*
* The tricky part is that after writepage we cannot touch the mapping: nothing
* prevents it from being freed up. But we have a ref on the page and once
* that page is locked, the mapping is pinned.
*
* We're allowed to run sleeping lock_page() here because we know the caller has
* __GFP_FS.
*/
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
lock_page(page);
if (page_mapping(page) == mapping)
mapping_set_error(mapping, error);
unlock_page(page);
}
/* possible outcome of pageout() */
typedef enum {
/* failed to write page out, page is locked */
PAGE_KEEP,
/* move page to the active list, page is locked */
PAGE_ACTIVATE,
/* page has been sent to the disk successfully, page is unlocked */
PAGE_SUCCESS,
/* page is clean and locked */
PAGE_CLEAN,
} pageout_t;
/*
* pageout is called by shrink_page_list() for each dirty page.
* Calls ->writepage().
*/
static pageout_t pageout(struct page *page, struct address_space *mapping,
struct scan_control *sc)
{
/*
* If the page is dirty, only perform writeback if that write
* will be non-blocking. To prevent this allocation from being
* stalled by pagecache activity. But note that there may be
* stalls if we need to run get_block(). We could test
* PagePrivate for that.
*
* If this process is currently in __generic_file_write_iter() against
* this page's queue, we can perform writeback even if that
* will block.
*
* If the page is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
*/
if (!is_page_cache_freeable(page))
return PAGE_KEEP;
if (!mapping) {
/*
* Some data journaling orphaned pages can have
* page->mapping == NULL while being dirty with clean buffers.
*/
if (page_has_private(page)) {
if (try_to_free_buffers(page)) {
ClearPageDirty(page);
pr_info("%s: orphaned page\n", __func__);
return PAGE_CLEAN;
}
}
return PAGE_KEEP;
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;
if (!may_write_to_inode(mapping->host, sc))
return PAGE_KEEP;
if (clear_page_dirty_for_io(page)) {
int res;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
.for_reclaim = 1,
};
SetPageReclaim(page);
res = mapping->a_ops->writepage(page, &wbc);
if (res < 0)
handle_write_error(mapping, page, res);
if (res == AOP_WRITEPAGE_ACTIVATE) {
ClearPageReclaim(page);
return PAGE_ACTIVATE;
}
if (!PageWriteback(page)) {
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
trace_mm_vmscan_writepage(page);
inc_node_page_state(page, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
return PAGE_CLEAN;
}
/*
* Same as remove_mapping, but if the page is removed from the mapping, it
* gets returned with a refcount of 0.
*/
static int __remove_mapping(struct address_space *mapping, struct page *page,
bool reclaimed)
{
unsigned long flags;
int refcount;
BUG_ON(!PageLocked(page));
BUG_ON(mapping != page_mapping(page));
xa_lock_irqsave(&mapping->i_pages, flags);
/*
* The non racy check for a busy page.
*
* Must be careful with the order of the tests. When someone has
* a ref to the page, it may be possible that they dirty it then
* drop the reference. So if PageDirty is tested before page_count
* here, then the following race may occur:
*
* get_user_pages(&page);
* [user mapping goes away]
* write_to(page);
* !PageDirty(page) [good]
* SetPageDirty(page);
* put_page(page);
* !page_count(page) [good, discard it]
*
* [oops, our write_to data is lost]
*
* Reversing the order of the tests ensures such a situation cannot
* escape unnoticed. The smp_rmb is needed to ensure the page->flags
* load is not satisfied before that of page->_refcount.
*
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under the i_pages lock, then this ordering is not required.
*/
refcount = 1 + compound_nr(page);
if (!page_ref_freeze(page, refcount))
goto cannot_free;
/* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
if (unlikely(PageDirty(page))) {
page_ref_unfreeze(page, refcount);
goto cannot_free;
}
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
mem_cgroup_swapout(page, swap);
__delete_from_swap_cache(page, swap);
xa_unlock_irqrestore(&mapping->i_pages, flags);
put_swap_page(page, swap);
} else {
void (*freepage)(struct page *);
void *shadow = NULL;
freepage = mapping->a_ops->freepage;
/*
* Remember a shadow entry for reclaimed file cache in
* order to detect refaults, thus thrashing, later on.
*
* But don't store shadows in an address space that is
* already exiting. This is not just an optizimation,
* inode reclaim needs to empty out the radix tree or
* the nodes are lost. Don't plant shadows behind its
* back.
*
* We also don't store shadows for DAX mappings because the
* only page cache pages found in these are zero pages
* covering holes, and because we don't want to mix DAX
* exceptional entries and shadow exceptional entries in the
* same address_space.
*/
if (reclaimed && page_is_file_cache(page) &&
!mapping_exiting(mapping) && !dax_mapping(mapping))
shadow = workingset_eviction(page);
__delete_from_page_cache(page, shadow);
xa_unlock_irqrestore(&mapping->i_pages, flags);
if (freepage != NULL)
freepage(page);
}
return 1;
cannot_free:
xa_unlock_irqrestore(&mapping->i_pages, flags);
return 0;
}
/*
* Attempt to detach a locked page from its ->mapping. If it is dirty or if
* someone else has a ref on the page, abort and return 0. If it was
* successfully detached, return 1. Assumes the caller has a single ref on
* this page.
*/
int remove_mapping(struct address_space *mapping, struct page *page)
{
if (__remove_mapping(mapping, page, false)) {
/*
* Unfreezing the refcount with 1 rather than 2 effectively
* drops the pagecache ref for us without requiring another
* atomic operation.
*/
page_ref_unfreeze(page, 1);
return 1;
}
return 0;
}
/**
* putback_lru_page - put previously isolated page onto appropriate LRU list
* @page: page to be put back to appropriate lru list
*
* Add previously isolated @page to appropriate LRU list.
* Page may still be unevictable for other reasons.
*
* lru_lock must not be held, interrupts must be enabled.
*/
void putback_lru_page(struct page *page)
{
lru_cache_add(page);
put_page(page); /* drop ref from isolate */
}
enum page_references {
PAGEREF_RECLAIM,
PAGEREF_RECLAIM_CLEAN,
PAGEREF_KEEP,
PAGEREF_ACTIVATE,
};
static enum page_references page_check_references(struct page *page,
struct scan_control *sc)
{
int referenced_ptes, referenced_page;
unsigned long vm_flags;
referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
&vm_flags);
referenced_page = TestClearPageReferenced(page);
/*
* Mlock lost the isolation race with us. Let try_to_unmap()
* move the page to the unevictable list.
*/
if (vm_flags & VM_LOCKED)
return PAGEREF_RECLAIM;
if (referenced_ptes) {
if (PageSwapBacked(page))
return PAGEREF_ACTIVATE;
/*
* All mapped pages start out with page table
* references from the instantiating fault, so we need
* to look twice if a mapped file page is used more
* than once.
*
* Mark it and spare it for another trip around the
* inactive list. Another page table reference will
* lead to its activation.
*
* Note: the mark is set for activated pages as well
* so that recently deactivated but used pages are
* quickly recovered.
*/
SetPageReferenced(page);
if (referenced_page || referenced_ptes > 1)
return PAGEREF_ACTIVATE;
/*
* Activate file-backed executable pages after first usage.
*/
if (vm_flags & VM_EXEC)
return PAGEREF_ACTIVATE;
return PAGEREF_KEEP;
}
/* Reclaim if clean, defer dirty pages to writeback */
if (referenced_page && !PageSwapBacked(page))
return PAGEREF_RECLAIM_CLEAN;
return PAGEREF_RECLAIM;
}
/* Check if a page is dirty or under writeback */
static void page_check_dirty_writeback(struct page *page,
bool *dirty, bool *writeback)
{
struct address_space *mapping;
/*
* Anonymous pages are not handled by flushers and must be written
* from reclaim context. Do not stall reclaim based on them
*/
if (!page_is_file_cache(page) ||
(PageAnon(page) && !PageSwapBacked(page))) {
*dirty = false;
*writeback = false;
return;
}
/* By default assume that the page flags are accurate */
*dirty = PageDirty(page);
*writeback = PageWriteback(page);
/* Verify dirty/writeback state if the filesystem supports it */
if (!page_has_private(page))
return;
mapping = page_mapping(page);
if (mapping && mapping->a_ops->is_dirty_writeback)
mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
}
/*
* shrink_page_list() returns the number of reclaimed pages
*/
static unsigned long shrink_page_list(struct list_head *page_list,
struct pglist_data *pgdat,
struct scan_control *sc,
enum ttu_flags ttu_flags,
struct reclaim_stat *stat,
bool ignore_references)
{
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
unsigned nr_reclaimed = 0;
unsigned pgactivate = 0;
memset(stat, 0, sizeof(*stat));
cond_resched();
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
int may_enter_fs;
enum page_references references = PAGEREF_RECLAIM;
bool dirty, writeback;
unsigned int nr_pages;
cond_resched();
page = lru_to_page(page_list);
list_del(&page->lru);
if (!trylock_page(page))
goto keep;
VM_BUG_ON_PAGE(PageActive(page), page);
if (pgdat)
VM_BUG_ON_PAGE(page_pgdat(page) != pgdat, page);
nr_pages = compound_nr(page);
/* Account the number of base pages even though THP */
sc->nr_scanned += nr_pages;
if (unlikely(!page_evictable(page)))
goto activate_locked;
if (!sc->may_unmap && page_mapped(page))
goto keep_locked;
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
/*
* The number of dirty pages determines if a node is marked
* reclaim_congested which affects wait_iff_congested. kswapd
* will stall and start writing pages if the tail of the LRU
* is all dirty unqueued pages.
*/
page_check_dirty_writeback(page, &dirty, &writeback);
if (dirty || writeback)
stat->nr_dirty++;
if (dirty && !writeback)
stat->nr_unqueued_dirty++;
/*
* Treat this page as congested if the underlying BDI is or if
* pages are cycling through the LRU so quickly that the
* pages marked for immediate reclaim are making it to the
* end of the LRU a second time.
*/
mapping = page_mapping(page);
if (((dirty || writeback) && mapping &&
inode_write_congested(mapping->host)) ||
(writeback && PageReclaim(page)))
stat->nr_congested++;
/*
* If a page at the tail of the LRU is under writeback, there
* are three cases to consider.
*
* 1) If reclaim is encountering an excessive number of pages
* under writeback and this page is both under writeback and
* PageReclaim then it indicates that pages are being queued
* for IO but are being recycled through the LRU before the
* IO can complete. Waiting on the page itself risks an
* indefinite stall if it is impossible to writeback the
* page due to IO error or disconnected storage so instead
* note that the LRU is being scanned too quickly and the
* caller can stall after page list has been processed.
*
* 2) Global or new memcg reclaim encounters a page that is
* not marked for immediate reclaim, or the caller does not
* have __GFP_FS (or __GFP_IO if it's simply going to swap,
* not to fs). In this case mark the page for immediate
* reclaim and continue scanning.
*
* Require may_enter_fs because we would wait on fs, which
* may not have submitted IO yet. And the loop driver might
* enter reclaim, and deadlock if it waits on a page for
* which it is needed to do the write (loop masks off
* __GFP_IO|__GFP_FS for this reason); but more thought
* would probably show more reasons.
*
* 3) Legacy memcg encounters a page that is already marked
* PageReclaim. memcg does not have any dirty pages
* throttling so we could easily OOM just because too many
* pages are in writeback and there is nothing else to
* reclaim. Wait for the writeback to complete.
*
* In cases 1) and 2) we activate the pages to get them out of
* the way while we continue scanning for clean pages on the
* inactive list and refilling from the active list. The
* observation here is that waiting for disk writes is more
* expensive than potentially causing reloads down the line.
* Since they're marked for immediate reclaim, they won't put
* memory pressure on the cache working set any longer than it
* takes to write them to disk.
*/
if (PageWriteback(page)) {
/* Case 1 above */
if (current_is_kswapd() &&
PageReclaim(page) &&
(pgdat &&
test_bit(PGDAT_WRITEBACK, &pgdat->flags))) {
stat->nr_immediate++;
goto activate_locked;
/* Case 2 above */
} else if (sane_reclaim(sc) ||
!PageReclaim(page) || !may_enter_fs) {
/*
* This is slightly racy - end_page_writeback()
* might have just cleared PageReclaim, then
* setting PageReclaim here end up interpreted
* as PageReadahead - but that does not matter
* enough to care. What we do want is for this
* page to have PageReclaim set next time memcg
* reclaim reaches the tests above, so it will
* then wait_on_page_writeback() to avoid OOM;
* and it's also appropriate in global reclaim.
*/
SetPageReclaim(page);
stat->nr_writeback++;
goto activate_locked;
/* Case 3 above */
} else {
unlock_page(page);
wait_on_page_writeback(page);
/* then go back and try same page again */
list_add_tail(&page->lru, page_list);
continue;
}
}
if (!ignore_references)
references = page_check_references(page, sc);
switch (references) {
case PAGEREF_ACTIVATE:
goto activate_locked;
case PAGEREF_KEEP:
stat->nr_ref_keep += nr_pages;
goto keep_locked;
case PAGEREF_RECLAIM:
case PAGEREF_RECLAIM_CLEAN:
; /* try to reclaim the page below */
}
/*
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
* Lazyfree page could be freed directly
*/
if (PageAnon(page) && PageSwapBacked(page)) {
if (!PageSwapCache(page)) {
if (!(sc->gfp_mask & __GFP_IO))
goto keep_locked;
if (PageTransHuge(page)) {
/* cannot split THP, skip it */
if (!can_split_huge_page(page, NULL))
goto activate_locked;
/*
* Split pages without a PMD map right
* away. Chances are some or all of the
* tail pages can be freed without IO.
*/
if (!compound_mapcount(page) &&
split_huge_page_to_list(page,
page_list))
goto activate_locked;
}
if (!add_to_swap(page)) {
if (!PageTransHuge(page))
goto activate_locked_split;
/* Fallback to swap normal pages */
if (split_huge_page_to_list(page,
page_list))
goto activate_locked;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
count_vm_event(THP_SWPOUT_FALLBACK);
#endif
if (!add_to_swap(page))
goto activate_locked_split;
}
may_enter_fs = 1;
/* Adding to swap updated mapping */
mapping = page_mapping(page);
}
} else if (unlikely(PageTransHuge(page))) {
/* Split file THP */
if (split_huge_page_to_list(page, page_list))
goto keep_locked;
}
/*
* THP may get split above, need minus tail pages and update
* nr_pages to avoid accounting tail pages twice.
*
* The tail pages that are added into swap cache successfully
* reach here.
*/
if ((nr_pages > 1) && !PageTransHuge(page)) {
sc->nr_scanned -= (nr_pages - 1);
nr_pages = 1;
}
/*
* The page is mapped into the page tables of one or more
* processes. Try to unmap it here.
*/
if (page_mapped(page)) {
enum ttu_flags flags = ttu_flags | TTU_BATCH_FLUSH;
if (unlikely(PageTransHuge(page)))
flags |= TTU_SPLIT_HUGE_PMD;
if (!try_to_unmap(page, flags, sc->target_vma)) {
stat->nr_unmap_fail += nr_pages;
goto activate_locked;
}
}
if (PageDirty(page)) {
/*
* Only kswapd can writeback filesystem pages
* to avoid risk of stack overflow. But avoid
* injecting inefficient single-page IO into
* flusher writeback as much as possible: only
* write pages when we've encountered many
* dirty pages, and when we've already scanned
* the rest of the LRU for clean pages and see
* the same dirty pages again (PageReclaim).
*/
if (page_is_file_cache(page) &&
(!current_is_kswapd() || !PageReclaim(page) ||
(pgdat &&
!test_bit(PGDAT_DIRTY, &pgdat->flags)))) {
/*
* Immediately reclaim when written back.
* Similar in principal to deactivate_page()
* except we already have the page isolated
* and know it's dirty
*/
inc_node_page_state(page, NR_VMSCAN_IMMEDIATE);
SetPageReclaim(page);
goto activate_locked;
}
if (references == PAGEREF_RECLAIM_CLEAN)
goto keep_locked;
if (!may_enter_fs)
goto keep_locked;
if (!sc->may_writepage)
goto keep_locked;
/*
* Page is dirty. Flush the TLB if a writable entry
* potentially exists to avoid CPU writes after IO
* starts and then write it out here.
*/
try_to_unmap_flush_dirty();
switch (pageout(page, mapping, sc)) {
case PAGE_KEEP:
goto keep_locked;
case PAGE_ACTIVATE:
goto activate_locked;
case PAGE_SUCCESS:
if (PageWriteback(page))
goto keep;
if (PageDirty(page))
goto keep;
/*
* A synchronous write - probably a ramdisk. Go
* ahead and try to reclaim the page.
*/
if (!trylock_page(page))
goto keep;
if (PageDirty(page) || PageWriteback(page))
goto keep_locked;
mapping = page_mapping(page);
case PAGE_CLEAN:
; /* try to free the page below */
}
}
/*
* If the page has buffers, try to free the buffer mappings
* associated with this page. If we succeed we try to free
* the page as well.
*
* We do this even if the page is PageDirty().
* try_to_release_page() does not perform I/O, but it is
* possible for a page to have PageDirty set, but it is actually
* clean (all its buffers are clean). This happens if the
* buffers were written out directly, with submit_bh(). ext3
* will do this, as well as the blockdev mapping.
* try_to_release_page() will discover that cleanness and will
* drop the buffers and mark the page clean - it can be freed.
*
* Rarely, pages can have buffers and no ->mapping. These are
* the pages which were not successfully invalidated in
* truncate_complete_page(). We try to drop those buffers here
* and if that worked, and the page is no longer mapped into
* process address space (page_count == 1) it can be freed.
* Otherwise, leave the page on the LRU so it is swappable.
*/
if (page_has_private(page)) {
if (!try_to_release_page(page, sc->gfp_mask))
goto activate_locked;
if (!mapping && page_count(page) == 1) {
unlock_page(page);
if (put_page_testzero(page))
goto free_it;
else {
/*
* rare race with speculative reference.
* the speculative reference will free
* this page shortly, so we may
* increment nr_reclaimed here (and
* leave it off the LRU).
*/
nr_reclaimed++;
continue;
}
}
}
if (PageAnon(page) && !PageSwapBacked(page)) {
/* follow __remove_mapping for reference */
if (!page_ref_freeze(page, 1))
goto keep_locked;
if (PageDirty(page)) {
page_ref_unfreeze(page, 1);
goto keep_locked;
}
count_vm_event(PGLAZYFREED);
count_memcg_page_event(page, PGLAZYFREED);
} else if (!mapping || !__remove_mapping(mapping, page, true))
goto keep_locked;
unlock_page(page);
free_it:
/*
* THP may get swapped out in a whole, need account
* all base pages.
*/
nr_reclaimed += nr_pages;
/*
* Is there need to periodically free_page_list? It would
* appear not as the counts should be low
*/
if (unlikely(PageTransHuge(page)))
(*get_compound_page_dtor(page))(page);
else
list_add(&page->lru, &free_pages);
/*
* If pagelist are from multiple nodes, we should decrease
* NR_ISOLATED_ANON + x on freed pages in here.
*/
if (!pgdat)
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
continue;
activate_locked_split:
/*
* The tail pages that are failed to add into swap cache
* reach here. Fixup nr_scanned and nr_pages.
*/
if (nr_pages > 1) {
sc->nr_scanned -= (nr_pages - 1);
nr_pages = 1;
}
activate_locked:
/* Not a candidate for swapping, so reclaim swap space. */
if (PageSwapCache(page) && (mem_cgroup_swap_full(page) ||
PageMlocked(page)))
try_to_free_swap(page);
VM_BUG_ON_PAGE(PageActive(page), page);
if (!PageMlocked(page)) {
int type = page_is_file_cache(page);
SetPageActive(page);
stat->nr_activate[type] += nr_pages;
count_memcg_page_event(page, PGACTIVATE);
}
keep_locked:
unlock_page(page);
keep:
list_add(&page->lru, &ret_pages);
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
}
pgactivate = stat->nr_activate[0] + stat->nr_activate[1];
mem_cgroup_uncharge_list(&free_pages);
try_to_unmap_flush();
free_unref_page_list(&free_pages);
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
return nr_reclaimed;
}
unsigned long reclaim_clean_pages_from_list(struct zone *zone,
struct list_head *page_list)
{
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
.may_unmap = 1,
};
struct reclaim_stat dummy_stat;
unsigned long ret;
struct page *page, *next;
LIST_HEAD(clean_pages);
list_for_each_entry_safe(page, next, page_list, lru) {
if (page_is_file_cache(page) && !PageDirty(page) &&
!__PageMovable(page) && !PageUnevictable(page)) {
ClearPageActive(page);
list_move(&page->lru, &clean_pages);
}
}
ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
TTU_IGNORE_ACCESS, &dummy_stat, true);
list_splice(&clean_pages, page_list);
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret);
return ret;
}
#ifdef CONFIG_PROCESS_RECLAIM
unsigned long reclaim_pages_from_list(struct list_head *page_list,
struct vm_area_struct *vma)
{
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
.may_writepage = 1,
.may_unmap = 1,
.may_swap = 1,
.target_vma = vma,
};
unsigned long nr_reclaimed;
struct reclaim_stat stat;
struct page *page;
list_for_each_entry(page, page_list, lru)
ClearPageActive(page);
nr_reclaimed = shrink_page_list(page_list, NULL, &sc,
TTU_IGNORE_ACCESS, &stat, true);
while (!list_empty(page_list)) {
page = lru_to_page(page_list);
list_del(&page->lru);
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
putback_lru_page(page);
}
return nr_reclaimed;
}
#endif
/*
* Attempt to remove the specified page from its LRU. Only take this page
* if it is of the appropriate PageActive status. Pages which are being
* freed elsewhere are also ignored.
*
* page: page to consider
* mode: one of the LRU isolation modes defined above
*
* returns 0 on success, -ve errno on failure.
*/
int __isolate_lru_page(struct page *page, isolate_mode_t mode)
{
int ret = -EINVAL;
/* Only take pages on the LRU. */
if (!PageLRU(page))
return ret;
/* Compaction should not handle unevictable pages but CMA can do so */
if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
return ret;
ret = -EBUSY;
/*
* To minimise LRU disruption, the caller can indicate that it only
* wants to isolate pages it will be able to operate on without
* blocking - clean pages for the most part.
*
* ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
* that it is possible to migrate without blocking
*/
if (mode & ISOLATE_ASYNC_MIGRATE) {
/* All the caller can do on PageWriteback is block */
if (PageWriteback(page))
return ret;
if (PageDirty(page)) {
struct address_space *mapping;
bool migrate_dirty;
/*
* Only pages without mappings or that have a
* ->migratepage callback are possible to migrate
* without blocking. However, we can be racing with
* truncation so it's necessary to lock the page
* to stabilise the mapping as truncation holds
* the page lock until after the page is removed
* from the page cache.
*/
if (!trylock_page(page))
return ret;
mapping = page_mapping(page);
migrate_dirty = !mapping || mapping->a_ops->migratepage;
unlock_page(page);
if (!migrate_dirty)
return ret;
}
}
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;
if (likely(get_page_unless_zero(page))) {
/*
* Be careful not to clear PageLRU until after we're
* sure the page is not being freed elsewhere -- the
* page release code relies on it.
*/
ClearPageLRU(page);
ret = 0;
}
return ret;
}
/*
* Update LRU sizes after isolating pages. The LRU size updates must
* be complete before mem_cgroup_update_lru_size due to a santity check.
*/
static __always_inline void update_lru_sizes(struct lruvec *lruvec,
enum lru_list lru, unsigned long *nr_zone_taken)
{
int zid;
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
if (!nr_zone_taken[zid])
continue;
__update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
#endif
}
}
/**
* pgdat->lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
* and working on them outside the LRU lock.
*
* For pagecache intensive workloads, this function is the hottest
* spot in the kernel (apart from copy_*_user functions).
*
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of eligible pages to look through on the list.
* @lruvec: The LRU vector to pull pages from.
* @dst: The temp list to put pages on to.
* @nr_scanned: The number of pages that were scanned.
* @sc: The scan_control struct for this reclaim session
* @mode: One of the LRU isolation modes
* @lru: LRU list id for isolating
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
struct lruvec *lruvec, struct list_head *dst,
unsigned long *nr_scanned, struct scan_control *sc,
enum lru_list lru)
{
struct list_head *src = &lruvec->lists[lru];
unsigned long nr_taken = 0;
unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
unsigned long skipped = 0;
unsigned long scan, total_scan, nr_pages;
LIST_HEAD(pages_skipped);
isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
total_scan = 0;
scan = 0;
while (scan < nr_to_scan && !list_empty(src)) {
struct page *page;
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
VM_BUG_ON_PAGE(!PageLRU(page), page);
nr_pages = compound_nr(page);
total_scan += nr_pages;
if (page_zonenum(page) > sc->reclaim_idx) {
list_move(&page->lru, &pages_skipped);
nr_skipped[page_zonenum(page)] += nr_pages;
continue;
}
/*
* Do not count skipped pages because that makes the function
* return with no isolated pages if the LRU mostly contains
* ineligible pages. This causes the VM to not reclaim any
* pages, triggering a premature OOM.
*
* Account all tail pages of THP. This would not cause
* premature OOM since __isolate_lru_page() returns -EBUSY
* only when the page is being freed somewhere else.
*/
scan += nr_pages;
switch (__isolate_lru_page(page, mode)) {
case 0:
nr_taken += nr_pages;
nr_zone_taken[page_zonenum(page)] += nr_pages;
list_move(&page->lru, dst);
break;
case -EBUSY:
/* else it is being freed elsewhere */
list_move(&page->lru, src);
continue;
default:
BUG();
}
}
/*
* Splice any skipped pages to the start of the LRU list. Note that
* this disrupts the LRU order when reclaiming for lower zones but
* we cannot splice to the tail. If we did then the SWAP_CLUSTER_MAX
* scanning would soon rescan the same pages to skip and put the
* system at risk of premature OOM.
*/
if (!list_empty(&pages_skipped)) {
int zid;
list_splice(&pages_skipped, src);
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
if (!nr_skipped[zid])
continue;
__count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]);
skipped += nr_skipped[zid];
}
}
*nr_scanned = total_scan;
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
total_scan, skipped, nr_taken, mode, lru);
update_lru_sizes(lruvec, lru, nr_zone_taken);
return nr_taken;
}
/**
* isolate_lru_page - tries to isolate a page from its LRU list
* @page: page to isolate from its LRU list
*
* Isolates a @page from an LRU list, clears PageLRU and adjusts the
* vmstat statistic corresponding to whatever LRU list the page was on.
*
* Returns 0 if the page was removed from an LRU list.
* Returns -EBUSY if the page was not on an LRU list.
*
* The returned page will have PageLRU() cleared. If it was found on
* the active list, it will have PageActive set. If it was found on
* the unevictable list, it will have the PageUnevictable bit set. That flag
* may need to be cleared by the caller before letting the page go.
*
* The vmstat statistic corresponding to the list on which the page was
* found will be decremented.
*
* Restrictions:
*
* (1) Must be called with an elevated refcount on the page. This is a
* fundamentnal difference from isolate_lru_pages (which is called
* without a stable reference).
* (2) the lru_lock must not be held.
* (3) interrupts must be enabled.
*/
int isolate_lru_page(struct page *page)
{
int ret = -EBUSY;
VM_BUG_ON_PAGE(!page_count(page), page);
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
if (PageLRU(page)) {
pg_data_t *pgdat = page_pgdat(page);
struct lruvec *lruvec;
spin_lock_irq(&pgdat->lru_lock);
lruvec = mem_cgroup_page_lruvec(page, pgdat);
if (PageLRU(page)) {
int lru = page_lru(page);
get_page(page);
ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, lru);
ret = 0;
}
spin_unlock_irq(&pgdat->lru_lock);
}
return ret;
}
/*
* A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and
* then get resheduled. When there are massive number of tasks doing page
* allocation, such sleeping direct reclaimers may keep piling up on each CPU,
* the LRU list will go small and be scanned faster than necessary, leading to
* unnecessary swapping, thrashing and OOM.
*/
static int too_many_isolated(struct pglist_data *pgdat, int file,
struct scan_control *sc)
{
unsigned long inactive, isolated;
if (current_is_kswapd())
return 0;
if (!sane_reclaim(sc))
return 0;
if (file) {
inactive = node_page_state(pgdat, NR_INACTIVE_FILE);
isolated = node_page_state(pgdat, NR_ISOLATED_FILE);
} else {
inactive = node_page_state(pgdat, NR_INACTIVE_ANON);
isolated = node_page_state(pgdat, NR_ISOLATED_ANON);
}
/*
* GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they
* won't get blocked by normal direct-reclaimers, forming a circular
* deadlock.
*/
if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
inactive >>= 3;
return isolated > inactive;
}
/*
* This moves pages from @list to corresponding LRU list.
*
* We move them the other way if the page is referenced by one or more
* processes, from rmap.
*
* If the pages are mostly unmapped, the processing is fast and it is
* appropriate to hold zone_lru_lock across the whole operation. But if
* the pages are mapped, the processing is slow (page_referenced()) so we
* should drop zone_lru_lock around each page. It's impossible to balance
* this, so instead we remove the pages from the LRU while processing them.
* It is safe to rely on PG_active against the non-LRU pages in here because
* nobody will play with that bit on a non-LRU page.
*
* The downside is that we have to touch page->_refcount against each page.
* But we had to alter page->flags anyway.
*
* Returns the number of pages moved to the given lruvec.
*/
static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
struct list_head *list)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
int nr_pages, nr_moved = 0;
LIST_HEAD(pages_to_free);
struct page *page;
enum lru_list lru;
while (!list_empty(list)) {
page = lru_to_page(list);
VM_BUG_ON_PAGE(PageLRU(page), page);
if (unlikely(!page_evictable(page))) {
list_del(&page->lru);
spin_unlock_irq(&pgdat->lru_lock);
putback_lru_page(page);
spin_lock_irq(&pgdat->lru_lock);
continue;
}
lruvec = mem_cgroup_page_lruvec(page, pgdat);
SetPageLRU(page);
lru = page_lru(page);
nr_pages = hpage_nr_pages(page);
update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
list_move(&page->lru, &lruvec->lists[lru]);
if (put_page_testzero(page)) {
__ClearPageLRU(page);
__ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru);
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&pgdat->lru_lock);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&pgdat->lru_lock);
} else
list_add(&page->lru, &pages_to_free);
} else {
nr_moved += nr_pages;
}
}
/*
* To save our caller's stack, now use input list for pages to free.
*/
list_splice(&pages_to_free, list);
return nr_moved;
}
/*
* If a kernel thread (such as nfsd for loop-back mounts) services
* a backing device by writing to the page cache it sets PF_LESS_THROTTLE.
* In that case we should only throttle if the backing device it is
* writing to is congested. In other cases it is safe to throttle.
*/
static int current_may_throttle(void)
{
return !(current->flags & PF_LESS_THROTTLE) ||
current->backing_dev_info == NULL ||
bdi_write_congested(current->backing_dev_info);
}
/*
* shrink_inactive_list() is a helper for shrink_node(). It returns the number
* of reclaimed pages
*/
static noinline_for_stack unsigned long
shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
struct scan_control *sc, enum lru_list lru)
{
LIST_HEAD(page_list);
unsigned long nr_scanned;
unsigned long nr_reclaimed = 0;
unsigned long nr_taken;
struct reclaim_stat stat;
int file = is_file_lru(lru);
enum vm_event_item item;
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
bool stalled = false;
while (unlikely(too_many_isolated(pgdat, file, sc))) {
if (stalled)
return 0;
/* We are about to die and free our memory. Return now. */
if (fatal_signal_pending(current))
return SWAP_CLUSTER_MAX;
/* wait a bit for the reclaimer. */
msleep(100);
stalled = true;
}
lru_add_drain();
spin_lock_irq(&pgdat->lru_lock);
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
&nr_scanned, sc, lru);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
reclaim_stat->recent_scanned[file] += nr_taken;
item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
if (global_reclaim(sc))
__count_vm_events(item, nr_scanned);
__count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
spin_unlock_irq(&pgdat->lru_lock);
if (nr_taken == 0)
return 0;
nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, 0,
&stat, false);
spin_lock_irq(&pgdat->lru_lock);
item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
if (global_reclaim(sc))
__count_vm_events(item, nr_reclaimed);
__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
reclaim_stat->recent_rotated[0] += stat.nr_activate[0];
reclaim_stat->recent_rotated[1] += stat.nr_activate[1];
move_pages_to_lru(lruvec, &page_list);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&pgdat->lru_lock);
mem_cgroup_uncharge_list(&page_list);
free_unref_page_list(&page_list);
/*
* If dirty pages are scanned that are not queued for IO, it
* implies that flushers are not doing their job. This can
* happen when memory pressure pushes dirty pages to the end of
* the LRU before the dirty limits are breached and the dirty
* data has expired. It can also happen when the proportion of
* dirty pages grows not through writes but through memory
* pressure reclaiming all the clean cache. And in some cases,
* the flushers simply cannot keep up with the allocation
* rate. Nudge the flusher threads in case they are asleep.
*/
if (stat.nr_unqueued_dirty == nr_taken)
wakeup_flusher_threads(WB_REASON_VMSCAN);
sc->nr.dirty += stat.nr_dirty;
sc->nr.congested += stat.nr_congested;
sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
sc->nr.writeback += stat.nr_writeback;
sc->nr.immediate += stat.nr_immediate;
sc->nr.taken += nr_taken;
if (file)
sc->nr.file_taken += nr_taken;
trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
nr_scanned, nr_reclaimed, &stat, sc->priority, file);
return nr_reclaimed;
}
static void shrink_active_list(unsigned long nr_to_scan,
struct lruvec *lruvec,
struct scan_control *sc,
enum lru_list lru)
{
unsigned long nr_taken;
unsigned long nr_scanned;
unsigned long vm_flags;
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct page *page;
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
unsigned nr_deactivate, nr_activate;
unsigned nr_rotated = 0;
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
lru_add_drain();
spin_lock_irq(&pgdat->lru_lock);
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, lru);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
reclaim_stat->recent_scanned[file] += nr_taken;
__count_vm_events(PGREFILL, nr_scanned);
__count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
spin_unlock_irq(&pgdat->lru_lock);
while (!list_empty(&l_hold)) {
cond_resched();
page = lru_to_page(&l_hold);
list_del(&page->lru);
if (unlikely(!page_evictable(page))) {
putback_lru_page(page);
continue;
}
if (unlikely(buffer_heads_over_limit)) {
if (page_has_private(page) && trylock_page(page)) {
if (page_has_private(page))
try_to_release_page(page, 0);
unlock_page(page);
}
}
if (page_referenced(page, 0, sc->target_mem_cgroup,
&vm_flags)) {
nr_rotated += hpage_nr_pages(page);
/*
* Identify referenced, file-backed active pages and
* give them one more trip around the active list. So
* that executable code get better chances to stay in
* memory under moderate memory pressure. Anon pages
* are not likely to be evicted by use-once streaming
* IO, plus JVM can create lots of anon VM_EXEC pages,
* so we ignore them here.
*/
if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
list_add(&page->lru, &l_active);
continue;
}
}
ClearPageActive(page); /* we are de-activating */
SetPageWorkingset(page);
list_add(&page->lru, &l_inactive);
}
/*
* Move pages back to the lru list.
*/
spin_lock_irq(&pgdat->lru_lock);
/*
* Count referenced pages from currently used mappings as rotated,
* even though only some of them are actually re-activated. This
* helps balance scan pressure between file and anonymous pages in
* get_scan_count.
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
nr_activate = move_pages_to_lru(lruvec, &l_active);
nr_deactivate = move_pages_to_lru(lruvec, &l_inactive);
/* Keep all free pages in l_active list */
list_splice(&l_inactive, &l_active);
__count_vm_events(PGDEACTIVATE, nr_deactivate);
__count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&pgdat->lru_lock);
mem_cgroup_uncharge_list(&l_active);
free_unref_page_list(&l_active);
trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
nr_deactivate, nr_rotated, sc->priority, file);
}
unsigned long reclaim_pages(struct list_head *page_list)
{
int nid = -1;
unsigned long nr_reclaimed = 0;
LIST_HEAD(node_page_list);
struct reclaim_stat dummy_stat;
struct page *page;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
.may_writepage = 1,
.may_unmap = 1,
.may_swap = 1,
};
while (!list_empty(page_list)) {
page = lru_to_page(page_list);
if (nid == -1) {
nid = page_to_nid(page);
INIT_LIST_HEAD(&node_page_list);
}
if (nid == page_to_nid(page)) {
ClearPageActive(page);
list_move(&page->lru, &node_page_list);
continue;
}
nr_reclaimed += shrink_page_list(&node_page_list,
NODE_DATA(nid),
&sc, 0,
&dummy_stat, false);
while (!list_empty(&node_page_list)) {
page = lru_to_page(&node_page_list);
list_del(&page->lru);
putback_lru_page(page);
}
nid = -1;
}
if (!list_empty(&node_page_list)) {
nr_reclaimed += shrink_page_list(&node_page_list,
NODE_DATA(nid),
&sc, 0,
&dummy_stat, false);
while (!list_empty(&node_page_list)) {
page = lru_to_page(&node_page_list);
list_del(&page->lru);
putback_lru_page(page);
}
}
return nr_reclaimed;
}
/*
* The inactive anon list should be small enough that the VM never has
* to do too much work.
*
* The inactive file list should be small enough to leave most memory
* to the established workingset on the scan-resistant active list,
* but large enough to avoid thrashing the aggregate readahead window.
*
* Both inactive lists should also be large enough that each inactive
* page has a chance to be referenced again before it is reclaimed.
*
* If that fails and refaulting is observed, the inactive list grows.
*
* The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages
* on this LRU, maintained by the pageout code. An inactive_ratio
* of 3 means 3:1 or 25% of the pages are kept on the inactive list.
*
* total target max
* memory ratio inactive
* -------------------------------------
* 10MB 1 5MB
* 100MB 1 50MB
* 1GB 3 250MB
* 10GB 10 0.9GB
* 100GB 31 3GB
* 1TB 101 10GB
* 10TB 320 32GB
*/
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
struct scan_control *sc, bool trace)
{
enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
enum lru_list inactive_lru = file * LRU_FILE;
unsigned long inactive, active;
unsigned long inactive_ratio;
unsigned long refaults;
unsigned long gb;
/*
* If we don't have swap space, anonymous page deactivation
* is pointless.
*/
if (!file && !total_swap_pages)
return false;
inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
/*
* When refaults are being observed, it means a new workingset
* is being established. Disable active list protection to get
* rid of the stale workingset quickly.
*/
refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE);
if (file && lruvec->refaults != refaults) {
inactive_ratio = 0;
} else {
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
inactive_ratio = int_sqrt(10 * gb);
else
inactive_ratio = 1;
}
if (trace)
trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx,
lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
inactive_ratio, file);
return inactive * inactive_ratio < active;
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
}
enum scan_balance {
SCAN_EQUAL,
SCAN_FRACT,
SCAN_ANON,
SCAN_FILE,
};
/*
* Determine how aggressively the anon and file LRU lists should be
* scanned. The relative value of each set of LRU lists is determined
* by looking at the fraction of the pages scanned we did rotate back
* onto the active list instead of evict.
*
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
struct scan_control *sc, unsigned long *nr,
unsigned long *lru_pages)
{
int swappiness = mem_cgroup_swappiness(memcg);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
u64 denominator = 0; /* gcc */
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
unsigned long anon_prio, file_prio;
enum scan_balance scan_balance;
unsigned long anon, file;
unsigned long ap, fp;
enum lru_list lru;
/* If we have no swap space, do not bother scanning anon pages. */
if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) {
scan_balance = SCAN_FILE;
goto out;
}
/*
* Global reclaim will swap to prevent OOM even with no
* swappiness, but memcg users want to use this knob to
* disable swapping for individual groups completely when
* using the memory controller's swap limit feature would be
* too expensive.
*/
if (!global_reclaim(sc) && !swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
/*
* Do not apply any pressure balancing cleverness when the
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
if (!sc->priority && swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
/*
* Prevent the reclaimer from falling into the cache trap: as
* cache pages start out inactive, every cache fault will tip
* the scan balance towards the file LRU. And as the file LRU
* shrinks, so does the window for rotation from references.
* This means we have a runaway feedback loop where a tiny
* thrashing file LRU becomes infinitely more attractive than
* anon pages. Try to detect this based on file LRU size.
*/
if (global_reclaim(sc)) {
unsigned long pgdatfile;
unsigned long pgdatfree;
int z;
unsigned long total_high_wmark = 0;
pgdatfree = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
pgdatfile = node_page_state(pgdat, NR_ACTIVE_FILE) +
node_page_state(pgdat, NR_INACTIVE_FILE);
for (z = 0; z < MAX_NR_ZONES; z++) {
struct zone *zone = &pgdat->node_zones[z];
if (!managed_zone(zone))
continue;
total_high_wmark += high_wmark_pages(zone);
}
if (unlikely(pgdatfile + pgdatfree <= total_high_wmark)) {
/*
* Force SCAN_ANON if there are enough inactive
* anonymous pages on the LRU in eligible zones.
* Otherwise, the small LRU gets thrashed.
*/
if (!inactive_list_is_low(lruvec, false, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
>> sc->priority) {
scan_balance = SCAN_ANON;
goto out;
}
}
}
/*
* If there is enough inactive page cache, i.e. if the size of the
* inactive list is greater than that of the active list *and* the
* inactive list actually has some pages to scan on this priority, we
* do not reclaim anything from the anonymous working set right now.
* Without the second condition we could end up never scanning an
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
if (!IS_ENABLED(CONFIG_BALANCE_ANON_FILE_RECLAIM) &&
!inactive_list_is_low(lruvec, true, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
}
scan_balance = SCAN_FRACT;
/*
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
anon_prio = swappiness;
file_prio = 200 - anon_prio;
/*
* OK, so we have swap space and a fair amount of page cache
* pages. We use the recently rotated / recently scanned
* ratios to determine how valuable each cache is.
*
* Because workloads change over time (and to avoid overflow)
* we keep these statistics as a floating average, which ends
* up weighing recent references more than old ones.
*
* anon in [0], file in [1]
*/
anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) +
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
spin_lock_irq(&pgdat->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
reclaim_stat->recent_scanned[0] /= 2;
reclaim_stat->recent_rotated[0] /= 2;
}
if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
reclaim_stat->recent_scanned[1] /= 2;
reclaim_stat->recent_rotated[1] /= 2;
}
/*
* The amount of pressure on anon vs file pages is inversely
* proportional to the fraction of recently scanned pages on
* each list that were recently referenced and in active use.
*/
ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
ap /= reclaim_stat->recent_rotated[0] + 1;
fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
fp /= reclaim_stat->recent_rotated[1] + 1;
spin_unlock_irq(&pgdat->lru_lock);
fraction[0] = ap;
fraction[1] = fp;
denominator = ap + fp + 1;
out:
*lru_pages = 0;
for_each_evictable_lru(lru) {
int file = is_file_lru(lru);
unsigned long lruvec_size;
unsigned long scan;
unsigned long protection;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
protection = mem_cgroup_protection(memcg,
sc->memcg_low_reclaim);
if (protection) {
/*
* Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min
* setting.
*
* This is important, as otherwise scanning aggression
* becomes extremely binary -- from nothing as we
* approach the memory protection threshold, to totally
* nominal as we exceed it. This results in requiring
* setting extremely liberal protection thresholds. It
* also means we simply get no protection at all if we
* set it too low, which is not ideal.
*
* If there is any protection in place, we reduce scan
* pressure by how much of the total memory used is
* within protection thresholds.
*
* There is one special case: in the first reclaim pass,
* we skip over all groups that are within their low
* protection. If that fails to reclaim enough pages to
* satisfy the reclaim goal, we come back and override
* the best-effort low protection. However, we still
* ideally want to honor how well-behaved groups are in
* that case instead of simply punishing them all
* equally. As such, we reclaim them based on how much
* memory they are using, reducing the scan pressure
* again by how much of the total memory used is under
* hard protection.
*/
unsigned long cgroup_size = mem_cgroup_size(memcg);
/* Avoid TOCTOU with earlier protection check */
cgroup_size = max(cgroup_size, protection);
scan = lruvec_size - lruvec_size * protection /
cgroup_size;
/*
* Minimally target SWAP_CLUSTER_MAX pages to keep
* reclaim moving forwards, avoiding decremeting
* sc->priority further than desirable.
*/
scan = max(scan, SWAP_CLUSTER_MAX);
} else {
scan = lruvec_size;
}
scan >>= sc->priority;
/*
* If the cgroup's already been deleted, make sure to
* scrape out the remaining cache.
*/
if (!scan && !mem_cgroup_online(memcg))
scan = min(lruvec_size, SWAP_CLUSTER_MAX);
switch (scan_balance) {
case SCAN_EQUAL:
/* Scan lists relative to size */
break;
case SCAN_FRACT:
/*
* Scan types proportional to swappiness and
* their relative recent reclaim efficiency.
* Make sure we don't miss the last page on
* the offlined memory cgroups because of a
* round-off error.
*/
scan = mem_cgroup_online(memcg) ?
div64_u64(scan * fraction[file], denominator) :
DIV64_U64_ROUND_UP(scan * fraction[file],
denominator);
break;
case SCAN_FILE:
case SCAN_ANON:
/* Scan one type exclusively */
if ((scan_balance == SCAN_FILE) != file) {
lruvec_size = 0;
scan = 0;
}
break;
default:
/* Look ma, no brain */
BUG();
}
*lru_pages += lruvec_size;
nr[lru] = scan;
}
}
/*
* This is a basic per-node page freer. Used by both kswapd and direct reclaim.
*/
static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg,
struct scan_control *sc, unsigned long *lru_pages)
{
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS];
unsigned long nr_to_scan;
enum lru_list lru;
unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
struct blk_plug plug;
bool scan_adjusted;
get_scan_count(lruvec, memcg, sc, nr, lru_pages);
/* Record the original scan target for proportional adjustments later */
memcpy(targets, nr, sizeof(nr));
/*
* Global reclaiming within direct reclaim at DEF_PRIORITY is a normal
* event that can occur when there is little memory pressure e.g.
* multiple streaming readers/writers. Hence, we do not abort scanning
* when the requested number of pages are reclaimed when scanning at
* DEF_PRIORITY on the assumption that the fact we are direct
* reclaiming implies that kswapd is not keeping up and it is best to
* do a batch of work at once. For memcg reclaim one check is made to
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/
scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
unsigned long nr_anon, nr_file, percentage;
unsigned long nr_scanned;
for_each_evictable_lru(lru) {
if (nr[lru]) {
nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan,
lruvec, sc);
}
}
cond_resched();
if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
continue;
/*
* For kswapd and memcg, reclaim at least the number of pages
* requested. Ensure that the anon and file LRUs are scanned
* proportionally what was requested by get_scan_count(). We
* stop reclaiming one LRU and reduce the amount scanning
* proportional to the original scan target.
*/
nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE];
nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON];
/*
* It's just vindictive to attack the larger once the smaller
* has gone to zero. And given the way we stop scanning the
* smaller below, this makes sure that we only make one nudge
* towards proportionality once we've got nr_to_reclaim.
*/
if (!nr_file || !nr_anon)
break;
if (nr_file > nr_anon) {
unsigned long scan_target = targets[LRU_INACTIVE_ANON] +
targets[LRU_ACTIVE_ANON] + 1;
lru = LRU_BASE;
percentage = nr_anon * 100 / scan_target;
} else {
unsigned long scan_target = targets[LRU_INACTIVE_FILE] +
targets[LRU_ACTIVE_FILE] + 1;
lru = LRU_FILE;
percentage = nr_file * 100 / scan_target;
}
/* Stop scanning the smaller of the LRU */
nr[lru] = 0;
nr[lru + LRU_ACTIVE] = 0;
/*
* Recalculate the other LRU scan count based on its original
* scan target and the percentage scanning already complete
*/
lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE;
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
lru += LRU_ACTIVE;
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;
/*
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
}
/* Use reclaim/compaction for costly allocs or under memory pressure */
static bool in_reclaim_compaction(struct scan_control *sc)
{
if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
(sc->order > PAGE_ALLOC_COSTLY_ORDER ||
sc->priority < DEF_PRIORITY - 2))
return true;
return false;
}
/*
* Reclaim/compaction is used for high-order allocation requests. It reclaims
* order-0 pages before compacting the zone. should_continue_reclaim() returns
* true if more pages should be reclaimed such that when the page allocator
* calls try_to_compact_zone() that it will have enough free pages to succeed.
* It will give up earlier than that if there is difficulty reclaiming pages.
*/
static inline bool should_continue_reclaim(struct pglist_data *pgdat,
unsigned long nr_reclaimed,
struct scan_control *sc)
{
unsigned long pages_for_compaction;
unsigned long inactive_lru_pages;
int z;
/* If not in reclaim/compaction mode, stop */
if (!in_reclaim_compaction(sc))
return false;
/*
* Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX
* number of pages that were scanned. This will return to the caller
* with the risk reclaim/compaction and the resulting allocation attempt
* fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL
* allocations through requiring that the full LRU list has been scanned
* first, by assuming that zero delta of sc->nr_scanned means full LRU
* scan, but that approximation was wrong, and there were corner cases
* where always a non-zero amount of pages were scanned.
*/
if (!nr_reclaimed)
return false;
/* If compaction would go ahead or the allocation would succeed, stop */
for (z = 0; z <= sc->reclaim_idx; z++) {
struct zone *zone = &pgdat->node_zones[z];
if (!managed_zone(zone))
continue;
switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
case COMPACT_SUCCESS:
case COMPACT_CONTINUE:
return false;
default:
/* check next zone */
;
}
}
/*
* If we have not reclaimed enough pages for compaction and the
* inactive lists are large enough, continue reclaiming
*/
pages_for_compaction = compact_gap(sc->order);
inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
if (get_nr_swap_pages() > 0)
inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
return inactive_lru_pages > pages_for_compaction;
}
static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
{
return test_bit(PGDAT_CONGESTED, &pgdat->flags) ||
(memcg && memcg_congested(pgdat, memcg));
}
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
{
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_reclaimed, nr_scanned;
bool reclaimable = false;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
unsigned long node_lru_pages = 0;
struct mem_cgroup *memcg;
memset(&sc->nr, 0, sizeof(sc->nr));
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
memcg = mem_cgroup_iter(root, NULL, NULL);
do {
unsigned long lru_pages;
unsigned long reclaimed;
unsigned long scanned;
/*
* This loop can become CPU-bound when target memcgs
* aren't eligible for reclaim - either because they
* don't have any reclaimable pages, or because their
* memory is explicitly protected. Avoid soft lockups.
*/
cond_resched();
switch (mem_cgroup_protected(root, memcg)) {
case MEMCG_PROT_MIN:
/*
* Hard protection.
* If there is no reclaimable memory, OOM.
*/
continue;
case MEMCG_PROT_LOW:
/*
* Soft protection.
* Respect the protection only as long as
* there is an unprotected supply
* of reclaimable memory from other cgroups.
*/
if (!sc->memcg_low_reclaim) {
sc->memcg_low_skipped = 1;
continue;
}
memcg_memory_event(memcg, MEMCG_LOW);
break;
case MEMCG_PROT_NONE:
/*
* All protection thresholds breached. We may
* still choose to vary the scan pressure
* applied based on by how much the cgroup in
* question has exceeded its protection
* thresholds (see get_scan_count).
*/
break;
}
reclaimed = sc->nr_reclaimed;
scanned = sc->nr_scanned;
shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
node_lru_pages += lru_pages;
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
sc->priority);
/* Record the group's reclaim efficiency */
vmpressure(sc->gfp_mask, memcg, false,
sc->nr_scanned - scanned,
sc->nr_reclaimed - reclaimed);
} while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
}
/* Record the subtree's reclaim efficiency */
vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);
if (sc->nr_reclaimed - nr_reclaimed)
reclaimable = true;
if (current_is_kswapd()) {
/*
* If reclaim is isolating dirty pages under writeback,
* it implies that the long-lived page allocation rate
* is exceeding the page laundering rate. Either the
* global limits are not being effective at throttling
* processes due to the page distribution throughout
* zones or there is heavy usage of a slow backing
* device. The only option is to throttle from reclaim
* context which is not ideal as there is no guarantee
* the dirtying process is throttled in the same way
* balance_dirty_pages() manages.
*
* Once a node is flagged PGDAT_WRITEBACK, kswapd will
* count the number of pages under pages flagged for
* immediate reclaim and stall if any are encountered
* in the nr_immediate check below.
*/
if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/*
* Tag a node as congested if all the dirty pages
* scanned were backed by a congested BDI and
* wait_iff_congested will stall.
*/
if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
set_bit(PGDAT_CONGESTED, &pgdat->flags);
/* Allow kswapd to start writing pages during reclaim.*/
if (sc->nr.unqueued_dirty == sc->nr.file_taken)
set_bit(PGDAT_DIRTY, &pgdat->flags);
/*
* If kswapd scans pages marked marked for immediate
* reclaim and under writeback (nr_immediate), it
* implies that pages are cycling through the LRU
* faster than they are written so also forcibly stall.
*/
if (sc->nr.immediate)
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
/*
* Legacy memcg will stall in page writeback so avoid forcibly
* stalling in wait_iff_congested().
*/
if (!global_reclaim(sc) && sane_reclaim(sc) &&
sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
set_memcg_congestion(pgdat, root, true);
/*
* Stall direct reclaim for IO completions if underlying BDIs
* and node is congested. Allow kswapd to continue until it
* starts encountering unqueued dirty pages or cycling through
* the LRU too quickly.
*/
if (!sc->hibernation_mode && !current_is_kswapd() &&
current_may_throttle() && pgdat_memcg_congested(pgdat, root))
wait_iff_congested(BLK_RW_ASYNC, HZ/10);
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc));
/*
* Kswapd gives up on balancing particular nodes after too
* many failures to reclaim anything from them and goes to
* sleep. On reclaim progress, reset the failure counter. A
* successful direct reclaim run will revive a dormant kswapd.
*/
if (reclaimable)
pgdat->kswapd_failures = 0;
return reclaimable;
}
/*
* Returns true if compaction should go ahead for a costly-order request, or
* the allocation would already succeed without compaction. Return false if we
* should reclaim first.
*/
static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
{
unsigned long watermark;
enum compact_result suitable;
suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
if (suitable == COMPACT_SUCCESS)
/* Allocation should succeed already. Don't reclaim. */
return true;
if (suitable == COMPACT_SKIPPED)
/* Compaction cannot yet proceed. Do reclaim. */
return false;
/*
* Compaction is already possible, but it takes time to run and there
* are potentially other callers using the pages just freed. So proceed
* with reclaim to make a buffer of free pages available to give
* compaction a reasonable chance of completing and allocating the page.
* Note that we won't actually reclaim the whole buffer in one attempt
* as the target watermark in should_continue_reclaim() is lower. But if
* we are already above the high+gap watermark, don't reclaim at all.
*/
watermark = high_wmark_pages(zone) + compact_gap(sc->order);
return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx);
}
/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
* request.
*
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*/
static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
struct zoneref *z;
struct zone *zone;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
gfp_t orig_mask;
pg_data_t *last_pgdat = NULL;
/*
* If the number of buffer_heads in the machine exceeds the maximum
* allowed level, force direct reclaim to scan the highmem zone as
* highmem pages could be pinning lowmem pages storing buffer_heads
*/
orig_mask = sc->gfp_mask;
if (buffer_heads_over_limit) {
sc->gfp_mask |= __GFP_HIGHMEM;
sc->reclaim_idx = gfp_zone(sc->gfp_mask);
}
for_each_zone_zonelist_nodemask(zone, z, zonelist,
sc->reclaim_idx, sc->nodemask) {
/*
* Take care memory controller reclaiming has small influence
* to global LRU.
*/
if (global_reclaim(sc)) {
if (!cpuset_zone_allowed(zone,
GFP_KERNEL | __GFP_HARDWALL))
continue;
/*
* If we already have plenty of memory free for
* compaction in this zone, don't free any more.
* Even though compaction is invoked for any
* non-zero order, only frequent costly order
* reclamation is disruptive enough to become a
* noticeable problem, like transparent huge
* page allocations.
*/
if (IS_ENABLED(CONFIG_COMPACTION) &&
sc->order > PAGE_ALLOC_COSTLY_ORDER &&
compaction_ready(zone, sc)) {
sc->compaction_ready = true;
continue;
}
/*
* Shrink each node in the zonelist once. If the
* zonelist is ordered by zone (not the default) then a
* node may be shrunk multiple times but in that case
* the user prefers lower zones being preserved.
*/
if (zone->zone_pgdat == last_pgdat)
continue;
/*
* This steals pages from memory cgroups over softlimit
* and returns the number of reclaimed pages and
* scanned pages. This works for global memory pressure
* and balancing, not for a memcg's limit.
*/
nr_soft_scanned = 0;
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone->zone_pgdat,
sc->order, sc->gfp_mask,
&nr_soft_scanned);
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
/* need some check for avoid more shrink_zone() */
}
/* See comment about same check for global reclaim above */
if (zone->zone_pgdat == last_pgdat)
continue;
last_pgdat = zone->zone_pgdat;
shrink_node(zone->zone_pgdat, sc);
}
/*
* Restore to original mask to avoid the impact on the caller if we
* promoted it to __GFP_HIGHMEM.
*/
sc->gfp_mask = orig_mask;
}
static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
{
struct mem_cgroup *memcg;
memcg = mem_cgroup_iter(root_memcg, NULL, NULL);
do {
unsigned long refaults;
struct lruvec *lruvec;
lruvec = mem_cgroup_lruvec(pgdat, memcg);
refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE);
lruvec->refaults = refaults;
} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
}
/*
* This is the main entry point to direct page reclaim.
*
* If a full scan of the inactive list fails to free enough memory then we
* are "out of memory" and something needs to be killed.
*
* If the caller is !__GFP_FS then the probability of a failure is reasonably
* high - the zone may be full of dirty or under-writeback pages, which this
* caller can't do much about. We kick the writeback threads and take explicit
* naps in the hope that some of these pages can be written. But if the
* allocating task holds filesystem locks which prevent writeout this might not
* work, and the allocation attempt will fail.
*
* returns: 0, if no pages reclaimed
* else, the number of pages reclaimed
*/
static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct scan_control *sc)
{
int initial_priority = sc->priority;
pg_data_t *last_pgdat;
struct zoneref *z;
struct zone *zone;
retry:
delayacct_freepages_start();
if (global_reclaim(sc))
__count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
do {
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
sc->nr_scanned = 0;
shrink_zones(zonelist, sc);
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
break;
if (sc->compaction_ready)
break;
/*
* If we're getting trouble reclaiming, start doing
* writepage even in laptop mode.
*/
if (sc->priority < DEF_PRIORITY - 2)
sc->may_writepage = 1;
} while (--sc->priority >= 0);
last_pgdat = NULL;
for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
sc->nodemask) {
if (zone->zone_pgdat == last_pgdat)
continue;
last_pgdat = zone->zone_pgdat;
snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false);
}
delayacct_freepages_end();
if (sc->nr_reclaimed)
return sc->nr_reclaimed;
/* Aborted reclaim to try compaction? don't OOM, then */
if (sc->compaction_ready)
return 1;
/* Untapped cgroup reserves? Don't OOM, retry. */
if (sc->memcg_low_skipped) {
sc->priority = initial_priority;
sc->memcg_low_reclaim = 1;
sc->memcg_low_skipped = 0;
goto retry;
}
return 0;
}
static bool allow_direct_reclaim(pg_data_t *pgdat)
{
struct zone *zone;
unsigned long pfmemalloc_reserve = 0;
unsigned long free_pages = 0;
int i;
bool wmark_ok;
if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
return true;
for (i = 0; i <= ZONE_NORMAL; i++) {
zone = &pgdat->node_zones[i];
if (!managed_zone(zone))
continue;
if (!zone_reclaimable_pages(zone))
continue;
pfmemalloc_reserve += min_wmark_pages(zone);
free_pages += zone_page_state(zone, NR_FREE_PAGES);
}
/* If there are no reserves (unexpected config) then do not throttle */
if (!pfmemalloc_reserve)
return true;
wmark_ok = free_pages > pfmemalloc_reserve / 2;
/* kswapd must be awake if processes are being throttled */
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
if (READ_ONCE(pgdat->kswapd_classzone_idx) > ZONE_NORMAL)
WRITE_ONCE(pgdat->kswapd_classzone_idx, ZONE_NORMAL);
wake_up_interruptible(&pgdat->kswapd_wait);
}
return wmark_ok;
}
/*
* Throttle direct reclaimers if backing storage is backed by the network
* and the PFMEMALLOC reserve for the preferred node is getting dangerously
* depleted. kswapd will continue to make progress and wake the processes
* when the low watermark is reached.
*
* Returns true if a fatal signal was delivered during throttling. If this
* happens, the page allocator should not consider triggering the OOM killer.
*/
static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
nodemask_t *nodemask)
{
struct zoneref *z;
struct zone *zone;
pg_data_t *pgdat = NULL;
/*
* Kernel threads should not be throttled as they may be indirectly
* responsible for cleaning pages necessary for reclaim to make forward
* progress. kjournald for example may enter direct reclaim while
* committing a transaction where throttling it could forcing other
* processes to block on log_wait_commit().
*/
if (current->flags & PF_KTHREAD)
goto out;
/*
* If a fatal signal is pending, this process should not throttle.
* It should return quickly so it can exit and free its memory
*/
if (fatal_signal_pending(current))
goto out;
/*
* Check if the pfmemalloc reserves are ok by finding the first node
* with a usable ZONE_NORMAL or lower zone. The expectation is that
* GFP_KERNEL will be required for allocating network buffers when
* swapping over the network so ZONE_HIGHMEM is unusable.
*
* Throttling is based on the first usable node and throttled processes
* wait on a queue until kswapd makes progress and wakes them. There
* is an affinity then between processes waking up and where reclaim
* progress has been made assuming the process wakes on the same node.
* More importantly, processes running on remote nodes will not compete
* for remote pfmemalloc reserves and processes on different nodes
* should make reasonable progress.
*/
for_each_zone_zonelist_nodemask(zone, z, zonelist,
gfp_zone(gfp_mask), nodemask) {
if (zone_idx(zone) > ZONE_NORMAL)
continue;
/* Throttle based on the first usable node */
pgdat = zone->zone_pgdat;
if (allow_direct_reclaim(pgdat))
goto out;
break;
}
/* If no zone was usable by the allocation flags then do not throttle */
if (!pgdat)
goto out;
/* Account for the throttling */
count_vm_event(PGSCAN_DIRECT_THROTTLE);
/*
* If the caller cannot enter the filesystem, it's possible that it
* is due to the caller holding an FS lock or performing a journal
* transaction in the case of a filesystem like ext[3|4]. In this case,
* it is not safe to block on pfmemalloc_wait as kswapd could be
* blocked waiting on the same lock. Instead, throttle for up to a
* second before continuing.
*/
if (!(gfp_mask & __GFP_FS)) {
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
allow_direct_reclaim(pgdat), HZ);
goto check_pending;
}
/* Throttle until kswapd wakes the process */
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
allow_direct_reclaim(pgdat));
check_pending:
if (fatal_signal_pending(current))
return true;
out:
return false;
}
unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *nodemask)
{
unsigned long nr_reclaimed;
struct scan_control sc = {
.nr_to_reclaim = SWAP_CLUSTER_MAX,
.gfp_mask = current_gfp_context(gfp_mask),
.reclaim_idx = gfp_zone(gfp_mask),
.order = order,
.nodemask = nodemask,
.priority = DEF_PRIORITY,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
};
/*
* scan_control uses s8 fields for order, priority, and reclaim_idx.
* Confirm they are large enough for max values.
*/
BUILD_BUG_ON(MAX_ORDER > S8_MAX);
BUILD_BUG_ON(DEF_PRIORITY > S8_MAX);
BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX);
/*
* Do not enter reclaim if fatal signal was delivered while throttled.
* 1 is returned so that the page allocator does not OOM kill at this
* point.
*/
if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask))
return 1;
set_task_reclaim_state(current, &sc.reclaim_state);
trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
set_task_reclaim_state(current, NULL);
return nr_reclaimed;
}
#ifdef CONFIG_MEMCG
/* Only used by soft limit reclaim. Do not reuse for anything else. */
unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
unsigned long *nr_scanned)
{
struct scan_control sc = {
.nr_to_reclaim = SWAP_CLUSTER_MAX,
.target_mem_cgroup = memcg,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.reclaim_idx = MAX_NR_ZONES - 1,
.may_swap = !noswap,
};
unsigned long lru_pages;
WARN_ON_ONCE(!current->reclaim_state);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
sc.gfp_mask);
/*
* NOTE: Although we can get the priority field, using it
* here is not a good idea, since it limits the pages we can scan.
* if we don't reclaim here, the shrink_node from balance_pgdat
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
shrink_node_memcg(pgdat, memcg, &sc, &lru_pages);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
*nr_scanned = sc.nr_scanned;
return sc.nr_reclaimed;
}
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
bool may_swap)
{
struct zonelist *zonelist;
unsigned long nr_reclaimed;
unsigned long pflags;
int nid;
unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
.reclaim_idx = MAX_NR_ZONES - 1,
.target_mem_cgroup = memcg,
.priority = DEF_PRIORITY,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = may_swap,
};
set_task_reclaim_state(current, &sc.reclaim_state);
/*
* Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
* take care of from where we get pages. So the node where we start the
* scan does not need to be the current node.
*/
nid = mem_cgroup_select_victim_node(memcg);
zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
psi_memstall_enter(&pflags);
noreclaim_flag = memalloc_noreclaim_save();
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
set_task_reclaim_state(current, NULL);
return nr_reclaimed;
}
#endif
static void age_active_anon(struct pglist_data *pgdat,
struct scan_control *sc)
{
struct mem_cgroup *memcg;
if (!total_swap_pages)
return;
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
memcg = mem_cgroup_iter(NULL, memcg, NULL);
} while (memcg);
}
static bool pgdat_watermark_boosted(pg_data_t *pgdat, int classzone_idx)
{
int i;
struct zone *zone;
/*
* Check for watermark boosts top-down as the higher zones
* are more likely to be boosted. Both watermarks and boosts
* should not be checked at the time time as reclaim would
* start prematurely when there is no boosting and a lower
* zone is balanced.
*/
for (i = classzone_idx; i >= 0; i--) {
zone = pgdat->node_zones + i;
if (!managed_zone(zone))
continue;
if (zone->watermark_boost)
return true;
}
return false;
}
/*
* Returns true if there is an eligible zone balanced for the request order
* and classzone_idx
*/
static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
{
int i;
unsigned long mark = -1;
struct zone *zone;
/*
* Check watermarks bottom-up as lower zones are more likely to
* meet watermarks.
*/
for (i = 0; i <= classzone_idx; i++) {
zone = pgdat->node_zones + i;
if (!managed_zone(zone))
continue;
mark = high_wmark_pages(zone);
if (zone_watermark_ok_safe(zone, order, mark, classzone_idx))
return true;
}
/*
* If a node has no populated zone within classzone_idx, it does not
* need balancing by definition. This can happen if a zone-restricted
* allocation tries to wake a remote kswapd.
*/
if (mark == -1)
return true;
return false;
}
/* Clear pgdat state for congested, dirty or under writeback. */
static void clear_pgdat_congested(pg_data_t *pgdat)
{
clear_bit(PGDAT_CONGESTED, &pgdat->flags);
clear_bit(PGDAT_DIRTY, &pgdat->flags);
clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
}
/*
* Prepare kswapd for sleeping. This verifies that there are no processes
* waiting in throttle_direct_reclaim() and that watermarks have been met.
*
* Returns true if kswapd is ready to sleep
*/
static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
{
/*
* The throttled processes are normally woken up in balance_pgdat() as
* soon as allow_direct_reclaim() is true. But there is a potential
* race between when kswapd checks the watermarks and a process gets
* throttled. There is also a potential race if processes get
* throttled, kswapd wakes, a large process exits thereby balancing the
* zones, which causes kswapd to exit balance_pgdat() before reaching
* the wake up checks. If kswapd is going to sleep, no process should
* be sleeping on pfmemalloc_wait, so wake them now if necessary. If
* the wake up is premature, processes will wake kswapd and get
* throttled again. The difference from wake ups in balance_pgdat() is
* that here we are under prepare_to_wait().
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait))
wake_up_all(&pgdat->pfmemalloc_wait);
/* Hopeless node, leave it to direct reclaim */
if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
return true;
if (pgdat_balanced(pgdat, order, classzone_idx)) {
clear_pgdat_congested(pgdat);
return true;
}
return false;
}
/*
* kswapd shrinks a node of pages that are at or below the highest usable
* zone that is currently unbalanced.
*
* Returns true if kswapd scanned at least the requested number of pages to
* reclaim or if the lack of progress was due to pages under writeback.
* This is used to determine if the scanning priority needs to be raised.
*/
static bool kswapd_shrink_node(pg_data_t *pgdat,
struct scan_control *sc)
{
struct zone *zone;
int z;
/* Reclaim a number of pages proportional to the number of zones */
sc->nr_to_reclaim = 0;
for (z = 0; z <= sc->reclaim_idx; z++) {
zone = pgdat->node_zones + z;
if (!managed_zone(zone))
continue;
sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX);
}
/*
* Historically care was taken to put equal pressure on all zones but
* now pressure is applied based on node LRU order.
*/
shrink_node(pgdat, sc);
/*
* Fragmentation may mean that the system cannot be rebalanced for
* high-order allocations. If twice the allocation size has been
* reclaimed then recheck watermarks only at order-0 to prevent
* excessive reclaim. Assume that a process requested a high-order
* can direct reclaim/compact.
*/
if (sc->order && sc->nr_reclaimed >= compact_gap(sc->order))
sc->order = 0;
return sc->nr_scanned >= sc->nr_to_reclaim;
}
/*
* For kswapd, balance_pgdat() will reclaim pages across a node from zones
* that are eligible for use by the caller until at least one zone is
* balanced.
*
* Returns the order kswapd finished reclaiming at.
*
* kswapd scans the zones in the highmem->normal->dma direction. It skips
* zones which have free_pages > high_wmark_pages(zone), but once a zone is
* found to have free_pages <= high_wmark_pages(zone), any page in that zone
* or lower is eligible for reclaim until at least one usable zone is
* balanced.
*/
static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
{
int i;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
unsigned long pflags;
unsigned long nr_boost_reclaim;
unsigned long zone_boosts[MAX_NR_ZONES] = { 0, };
bool boosted;
struct zone *zone;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.order = order,
.may_unmap = 1,
};
set_task_reclaim_state(current, &sc.reclaim_state);
psi_memstall_enter(&pflags);
__fs_reclaim_acquire();
count_vm_event(PAGEOUTRUN);
/*
* Account for the reclaim boost. Note that the zone boost is left in
* place so that parallel allocations that are near the watermark will
* stall or direct reclaim until kswapd is finished.
*/
nr_boost_reclaim = 0;
for (i = 0; i <= classzone_idx; i++) {
zone = pgdat->node_zones + i;
if (!managed_zone(zone))
continue;
nr_boost_reclaim += zone->watermark_boost;
zone_boosts[i] = zone->watermark_boost;
}
boosted = nr_boost_reclaim;
restart:
sc.priority = DEF_PRIORITY;
do {
unsigned long nr_reclaimed = sc.nr_reclaimed;
bool raise_priority = true;
bool balanced;
bool ret;
sc.reclaim_idx = classzone_idx;
/*
* If the number of buffer_heads exceeds the maximum allowed
* then consider reclaiming from all zones. This has a dual
* purpose -- on 64-bit systems it is expected that
* buffer_heads are stripped during active rotation. On 32-bit
* systems, highmem pages can pin lowmem memory and shrinking
* buffers can relieve lowmem pressure. Reclaim may still not
* go ahead if all eligible zones for the original allocation
* request are balanced to avoid excessive reclaim from kswapd.
*/
if (buffer_heads_over_limit) {
for (i = MAX_NR_ZONES - 1; i >= 0; i--) {
zone = pgdat->node_zones + i;
if (!managed_zone(zone))
continue;
sc.reclaim_idx = i;
break;
}
}
/*
* If the pgdat is imbalanced then ignore boosting and preserve
* the watermarks for a later time and restart. Note that the
* zone watermarks will be still reset at the end of balancing
* on the grounds that the normal reclaim should be enough to
* re-evaluate if boosting is required when kswapd next wakes.
*/
balanced = pgdat_balanced(pgdat, sc.order, classzone_idx);
if (!balanced && nr_boost_reclaim) {
nr_boost_reclaim = 0;
goto restart;
}
/*
* If boosting is not active then only reclaim if there are no
* eligible zones. Note that sc.reclaim_idx is not used as
* buffer_heads_over_limit may have adjusted it.
*/
if (!nr_boost_reclaim && balanced)
goto out;
/* Limit the priority of boosting to avoid reclaim writeback */
if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2)
raise_priority = false;
/*
* Do not writeback or swap pages for boosted reclaim. The
* intent is to relieve pressure not issue sub-optimal IO
* from reclaim context. If no pages are reclaimed, the
* reclaim will be aborted.
*/
sc.may_writepage = !laptop_mode && !nr_boost_reclaim;
sc.may_swap = !nr_boost_reclaim;
/*
* Do some background aging of the anon list, to give
* pages a chance to be referenced before reclaiming. All
* pages are rotated regardless of classzone as this is
* about consistent aging.
*/
age_active_anon(pgdat, &sc);
/*
* If we're getting trouble reclaiming, start doing writepage
* even in laptop mode.
*/
if (sc.priority < DEF_PRIORITY - 2)
sc.may_writepage = 1;
/* Call soft limit reclaim before calling shrink_node. */
sc.nr_scanned = 0;
nr_soft_scanned = 0;
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(pgdat, sc.order,
sc.gfp_mask, &nr_soft_scanned);
sc.nr_reclaimed += nr_soft_reclaimed;
/*
* There should be no need to raise the scanning priority if
* enough pages are already being scanned that that high
* watermark would be met at 100% efficiency.
*/
if (kswapd_shrink_node(pgdat, &sc))
raise_priority = false;
/*
* If the low watermark is met there is no need for processes
* to be throttled on pfmemalloc_wait as they should not be
* able to safely make forward progress. Wake them
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
allow_direct_reclaim(pgdat))
wake_up_all(&pgdat->pfmemalloc_wait);
/* Check if kswapd should be suspending */
__fs_reclaim_release();
ret = try_to_freeze();
__fs_reclaim_acquire();
if (ret || kthread_should_stop())
break;
/*
* Raise priority if scanning rate is too low or there was no
* progress in reclaiming pages
*/
nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed);
/*
* If reclaim made no progress for a boost, stop reclaim as
* IO cannot be queued and it could be an infinite loop in
* extreme circumstances.
*/
if (nr_boost_reclaim && !nr_reclaimed)
break;
if (raise_priority || !nr_reclaimed)
sc.priority--;
} while (sc.priority >= 1);
if (!sc.nr_reclaimed)
pgdat->kswapd_failures++;
out:
/* If reclaim was boosted, account for the reclaim done in this pass */
if (boosted) {
unsigned long flags;
for (i = 0; i <= classzone_idx; i++) {
if (!zone_boosts[i])
continue;
/* Increments are under the zone lock */
zone = pgdat->node_zones + i;
spin_lock_irqsave(&zone->lock, flags);
zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]);
spin_unlock_irqrestore(&zone->lock, flags);
}
/*
* As there is now likely space, wakeup kcompact to defragment
* pageblocks.
*/
wakeup_kcompactd(pgdat, pageblock_order, classzone_idx);
}
snapshot_refaults(NULL, pgdat);
__fs_reclaim_release();
psi_memstall_leave(&pflags);
set_task_reclaim_state(current, NULL);
/*
* Return the order kswapd stopped reclaiming at as
* prepare_kswapd_sleep() takes it into account. If another caller
* entered the allocator slow path while kswapd was awake, order will
* remain at the higher level.
*/
return sc.order;
}
/*
* The pgdat->kswapd_classzone_idx is used to pass the highest zone index to be
* reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is not
* a valid index then either kswapd runs for first time or kswapd couldn't sleep
* after previous reclaim attempt (node is still unbalanced). In that case
* return the zone index of the previous kswapd reclaim cycle.
*/
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
enum zone_type prev_classzone_idx)
{
enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx);
return curr_idx == MAX_NR_ZONES ? prev_classzone_idx : curr_idx;
}
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
unsigned int classzone_idx)
{
long remaining = 0;
DEFINE_WAIT(wait);
if (freezing(current) || kthread_should_stop())
return;
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
/*
* Try to sleep for a short interval. Note that kcompactd will only be
* woken if it is possible to sleep for a short interval. This is
* deliberate on the assumption that if reclaim cannot keep an
* eligible zone balanced that it's also unlikely that compaction will
* succeed.
*/
if (prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) {
/*
* Compaction records what page blocks it recently failed to
* isolate pages from and skips them in the future scanning.
* When kswapd is going to sleep, it is reasonable to assume
* that pages and compaction may succeed so reset the cache.
*/
reset_isolation_suitable(pgdat);
/*
* We have freed the memory, now we should compact it to make
* allocation of the requested order possible.
*/
wakeup_kcompactd(pgdat, alloc_order, classzone_idx);
remaining = schedule_timeout(HZ/10);
/*
* If woken prematurely then reset kswapd_classzone_idx and
* order. The values will either be from a wakeup request or
* the previous request that slept prematurely.
*/
if (remaining) {
WRITE_ONCE(pgdat->kswapd_classzone_idx,
kswapd_classzone_idx(pgdat, classzone_idx));
if (READ_ONCE(pgdat->kswapd_order) < reclaim_order)
WRITE_ONCE(pgdat->kswapd_order, reclaim_order);
}
finish_wait(&pgdat->kswapd_wait, &wait);
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
}
/*
* After a short sleep, check if it was a premature sleep. If not, then
* go fully to sleep until explicitly woken up.
*/
if (!remaining &&
prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) {
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
/*
* vmstat counters are not perfectly accurate and the estimated
* value for counters such as NR_FREE_PAGES can deviate from the
* true value by nr_online_cpus * threshold. To avoid the zone
* watermarks being breached while under pressure, we reduce the
* per-cpu vmstat threshold while kswapd is awake and restore
* them before going back to sleep.
*/
set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
if (!kthread_should_stop())
schedule();
set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
} else {
if (remaining)
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
else
count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
}
finish_wait(&pgdat->kswapd_wait, &wait);
}
/*
* The background pageout daemon, started as a kernel thread
* from the init process.
*
* This basically trickles out pages so that we have _some_
* free memory available even if there is no other activity
* that frees anything up. This is needed for things like routing
* etc, where we otherwise might have all activity going on in
* asynchronous contexts that cannot page things out.
*
* If there are applications that are active memory-allocators
* (most normal use), this basically shouldn't matter.
*/
static int kswapd(void *p)
{
unsigned int alloc_order, reclaim_order;
unsigned int classzone_idx = MAX_NR_ZONES - 1;
pg_data_t *pgdat = (pg_data_t*)p;
struct task_struct *tsk = current;
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
if (!cpumask_empty(cpumask))
set_cpus_allowed_ptr(tsk, cpumask);
/*
* Tell the memory management that we're a "memory allocator",
* and that if we need more memory we should get access to it
* regardless (see "__alloc_pages()"). "kswapd" should
* never get caught in the normal page freeing logic.
*
* (Kswapd normally doesn't need memory anyway, but sometimes
* you need a small amount of memory in order to be able to
* page out something else, and this flag essentially protects
* us from recursively trying to free more memory as we're
* trying to free the first piece of memory in the first place).
*/
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
set_freezable();
WRITE_ONCE(pgdat->kswapd_order, 0);
WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES);
for ( ; ; ) {
bool ret;
alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
kswapd_try_sleep:
kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order,
classzone_idx);
/* Read the new order and classzone_idx */
alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
WRITE_ONCE(pgdat->kswapd_order, 0);
WRITE_ONCE(pgdat->kswapd_classzone_idx, MAX_NR_ZONES);
ret = try_to_freeze();
if (kthread_should_stop())
break;
/*
* We can speed up thawing tasks if we don't call balance_pgdat
* after returning from the refrigerator
*/
if (ret)
continue;
/*
* Reclaim begins at the requested order but if a high-order
* reclaim fails then kswapd falls back to reclaiming for
* order-0. If that happens, kswapd will consider sleeping
* for the order it finished reclaiming at (reclaim_order)
* but kcompactd is woken to compact for the original
* request (alloc_order).
*/
trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
alloc_order);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
}
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
return 0;
}
/*
* A zone is low on free memory or too fragmented for high-order memory. If
* kswapd should reclaim (direct reclaim is deferred), wake it up for the zone's
* pgdat. It will wake up kcompactd after reclaiming memory. If kswapd reclaim
* has failed or is not needed, still wake up kcompactd if only compaction is
* needed.
*/
void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
enum zone_type classzone_idx)
{
pg_data_t *pgdat;
enum zone_type curr_idx;
if (!managed_zone(zone))
return;
if (!cpuset_zone_allowed(zone, gfp_flags))
return;
pgdat = zone->zone_pgdat;
curr_idx = READ_ONCE(pgdat->kswapd_classzone_idx);
if (curr_idx == MAX_NR_ZONES || curr_idx < classzone_idx)
WRITE_ONCE(pgdat->kswapd_classzone_idx, classzone_idx);
if (READ_ONCE(pgdat->kswapd_order) < order)
WRITE_ONCE(pgdat->kswapd_order, order);
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
/* Hopeless node, leave it to direct reclaim if possible */
if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ||
(pgdat_balanced(pgdat, order, classzone_idx) &&
!pgdat_watermark_boosted(pgdat, classzone_idx))) {
/*
* There may be plenty of free memory available, but it's too
* fragmented for high-order allocations. Wake up kcompactd
* and rely on compaction_suitable() to determine if it's
* needed. If it fails, it will defer subsequent attempts to
* ratelimit its work.
*/
if (!(gfp_flags & __GFP_DIRECT_RECLAIM))
wakeup_kcompactd(pgdat, order, classzone_idx);
return;
}
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order,
gfp_flags);
wake_up_interruptible(&pgdat->kswapd_wait);
}
#ifdef CONFIG_HIBERNATION
/*
* Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
* freed pages.
*
* Rather than trying to age LRUs the aim is to preserve the overall
* LRU order by reclaiming preferentially
* inactive > active > active referenced > active mapped
*/
unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
{
struct scan_control sc = {
.nr_to_reclaim = nr_to_reclaim,
.gfp_mask = GFP_HIGHUSER_MOVABLE,
.reclaim_idx = MAX_NR_ZONES - 1,
.priority = DEF_PRIORITY,
.may_writepage = 1,
.may_unmap = 1,
.may_swap = 1,
.hibernation_mode = 1,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
fs_reclaim_acquire(sc.gfp_mask);
noreclaim_flag = memalloc_noreclaim_save();
set_task_reclaim_state(current, &sc.reclaim_state);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
set_task_reclaim_state(current, NULL);
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(sc.gfp_mask);
return nr_reclaimed;
}
#endif /* CONFIG_HIBERNATION */
#ifdef CONFIG_MULTIPLE_KSWAPD
static void update_kswapd_threads_node(int nid)
{
pg_data_t *pgdat;
int drop, increase;
int last_idx, start_idx, hid;
int nr_threads = kswapd_threads_current;
pgdat = NODE_DATA(nid);
last_idx = nr_threads - 1;
if (kswapd_threads < nr_threads) {
drop = nr_threads - kswapd_threads;
for (hid = last_idx; hid > (last_idx - drop); hid--) {
if (pgdat->mkswapd[hid]) {
kthread_stop(pgdat->mkswapd[hid]);
pgdat->mkswapd[hid] = NULL;
}
}
} else {
increase = kswapd_threads - nr_threads;
start_idx = last_idx + 1;
for (hid = start_idx; hid < (start_idx + increase); hid++) {
pgdat->mkswapd[hid] = kthread_run(kswapd, pgdat,
"kswapd%d:%d", nid, hid);
if (IS_ERR(pgdat->mkswapd[hid])) {
pr_err("Failed to start kswapd%d on node %d\n",
hid, nid);
pgdat->mkswapd[hid] = NULL;
/*
* We are out of resources. Do not start any
* more threads.
*/
break;
}
}
}
}
void update_kswapd_threads(void)
{
int nid;
if (kswapd_threads_current == kswapd_threads)
return;
/*
* Hold the memory hotplug lock to avoid racing with memory
* hotplug initiated updates
*/
mem_hotplug_begin();
for_each_node_state(nid, N_MEMORY)
update_kswapd_threads_node(nid);
pr_info("kswapd_thread count changed, old:%d new:%d\n",
kswapd_threads_current, kswapd_threads);
kswapd_threads_current = kswapd_threads;
mem_hotplug_done();
}
static int multi_kswapd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
int hid, nr_threads = kswapd_threads;
int ret = 0;
pgdat->mkswapd[0] = pgdat->kswapd;
for (hid = 1; hid < nr_threads; ++hid) {
pgdat->mkswapd[hid] = kthread_run(kswapd, pgdat, "kswapd%d:%d",
nid, hid);
if (IS_ERR(pgdat->mkswapd[hid])) {
/* failure at boot is fatal */
WARN_ON(system_state < SYSTEM_RUNNING);
pr_err("Failed to start kswapd%d on node %d\n",
hid, nid);
ret = PTR_ERR(pgdat->mkswapd[hid]);
pgdat->mkswapd[hid] = NULL;
}
}
kswapd_threads_current = nr_threads;
return ret;
}
static void multi_kswapd_stop(int nid)
{
int hid = 0;
int nr_threads = kswapd_threads_current;
struct task_struct *kswapd;
NODE_DATA(nid)->mkswapd[hid] = NULL;
for (hid = 1; hid < nr_threads; hid++) {
kswapd = NODE_DATA(nid)->mkswapd[hid];
if (kswapd) {
kthread_stop(kswapd);
NODE_DATA(nid)->mkswapd[hid] = NULL;
}
}
}
static void multi_kswapd_cpu_online(pg_data_t *pgdat,
const struct cpumask *mask)
{
int hid;
int nr_threads = kswapd_threads_current;
for (hid = 1; hid < nr_threads; hid++)
set_cpus_allowed_ptr(pgdat->mkswapd[hid], mask);
}
#endif
/* It's optimal to keep kswapds on the same CPUs as their memory, but
not required for correctness. So if the last cpu in a node goes
away, we get changed to run anywhere: as the first one comes back,
restore their cpu bindings. */
static int kswapd_cpu_online(unsigned int cpu)
{
int nid;
for_each_node_state(nid, N_MEMORY) {
pg_data_t *pgdat = NODE_DATA(nid);
const struct cpumask *mask;
mask = cpumask_of_node(pgdat->node_id);
if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids) {
/* One of our CPUs online: restore mask */
set_cpus_allowed_ptr(pgdat->kswapd, mask);
multi_kswapd_cpu_online(pgdat, mask);
}
}
return 0;
}
/*
* This kswapd start function will be called by init and node-hot-add.
* On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
*/
int kswapd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
int ret = 0;
if (pgdat->kswapd)
return 0;
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d:0", nid);
if (IS_ERR(pgdat->kswapd)) {
/* failure at boot is fatal */
BUG_ON(system_state < SYSTEM_RUNNING);
pr_err("Failed to start kswapd on node %d\n", nid);
ret = PTR_ERR(pgdat->kswapd);
pgdat->kswapd = NULL;
return ret;
}
ret = multi_kswapd_run(nid);
return ret;
}
/*
* Called by memory hotplug when all memory in a node is offlined. Caller must
* hold mem_hotplug_begin/end().
*/
void kswapd_stop(int nid)
{
struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
if (kswapd) {
kthread_stop(kswapd);
NODE_DATA(nid)->kswapd = NULL;
}
multi_kswapd_stop(nid);
}
static int __init kswapd_init(void)
{
int nid, ret;
swap_setup();
for_each_node_state(nid, N_MEMORY)
kswapd_run(nid);
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"mm/vmscan:online", kswapd_cpu_online,
NULL);
WARN_ON(ret < 0);
return 0;
}
module_init(kswapd_init)
#ifdef CONFIG_NUMA
/*
* Node reclaim mode
*
* If non-zero call node_reclaim when the number of free pages falls below
* the watermarks.
*/
int node_reclaim_mode __read_mostly;
#define RECLAIM_OFF 0
#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
/*
* Priority for NODE_RECLAIM. This determines the fraction of pages
* of a node considered for each zone_reclaim. 4 scans 1/16th of
* a zone.
*/
#define NODE_RECLAIM_PRIORITY 4
/*
* Percentage of pages in a zone that must be unmapped for node_reclaim to
* occur.
*/
int sysctl_min_unmapped_ratio = 1;
/*
* If the number of slab pages in a zone grows beyond this percentage then
* slab reclaim needs to occur.
*/
int sysctl_min_slab_ratio = 5;
static inline unsigned long node_unmapped_file_pages(struct pglist_data *pgdat)
{
unsigned long file_mapped = node_page_state(pgdat, NR_FILE_MAPPED);
unsigned long file_lru = node_page_state(pgdat, NR_INACTIVE_FILE) +
node_page_state(pgdat, NR_ACTIVE_FILE);
/*
* It's possible for there to be more file mapped pages than
* accounted for by the pages on the file LRU lists because
* tmpfs pages accounted for as ANON can also be FILE_MAPPED
*/
return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
}
/* Work out how many page cache pages we can reclaim in this reclaim_mode */
static unsigned long node_pagecache_reclaimable(struct pglist_data *pgdat)
{
unsigned long nr_pagecache_reclaimable;
unsigned long delta = 0;
/*
* If RECLAIM_UNMAP is set, then all file pages are considered
* potentially reclaimable. Otherwise, we have to worry about
* pages like swapcache and node_unmapped_file_pages() provides
* a better estimate
*/
if (node_reclaim_mode & RECLAIM_UNMAP)
nr_pagecache_reclaimable = node_page_state(pgdat, NR_FILE_PAGES);
else
nr_pagecache_reclaimable = node_unmapped_file_pages(pgdat);
/* If we can't clean pages, remove dirty pages from consideration */
if (!(node_reclaim_mode & RECLAIM_WRITE))
delta += node_page_state(pgdat, NR_FILE_DIRTY);
/* Watch for any possible underflows due to delta */
if (unlikely(delta > nr_pagecache_reclaimable))
delta = nr_pagecache_reclaimable;
return nr_pagecache_reclaimable - delta;
}
/*
* Try to free up some pages from this node through reclaim.
*/
static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
{
/* Minimum pages needed in order to stay on node */
const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = current_gfp_context(gfp_mask),
.order = order,
.priority = NODE_RECLAIM_PRIORITY,
.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
.may_swap = 1,
.reclaim_idx = gfp_zone(gfp_mask),
};
trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
sc.gfp_mask);
cond_resched();
fs_reclaim_acquire(sc.gfp_mask);
/*
* We need to be able to allocate from the reserves for RECLAIM_UNMAP
* and we also need to be able to write out pages for RECLAIM_WRITE
* and RECLAIM_UNMAP.
*/
noreclaim_flag = memalloc_noreclaim_save();
p->flags |= PF_SWAPWRITE;
set_task_reclaim_state(p, &sc.reclaim_state);
if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
/*
* Free memory by calling shrink node with increasing
* priorities until we have enough memory freed.
*/
do {
shrink_node(pgdat, &sc);
} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
}
set_task_reclaim_state(p, NULL);
current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(sc.gfp_mask);
trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);
return sc.nr_reclaimed >= nr_pages;
}
int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
{
int ret;
/*
* Node reclaim reclaims unmapped file backed pages and
* slab pages if we are over the defined limits.
*
* A small portion of unmapped file backed pages is needed for
* file I/O otherwise pages read by file I/O will be immediately
* thrown out if the node is overallocated. So we do not reclaim
* if less than a specified percentage of the node is used by
* unmapped file backed pages.
*/
if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
node_page_state(pgdat, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
return NODE_RECLAIM_FULL;
/*
* Do not scan if the allocation should not be delayed.
*/
if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
return NODE_RECLAIM_NOSCAN;
/*
* Only run node reclaim on the local node or on nodes that do not
* have associated processors. This will favor the local processor
* over remote processors and spread off node memory allocations
* as wide as possible.
*/
if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id())
return NODE_RECLAIM_NOSCAN;
if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags))
return NODE_RECLAIM_NOSCAN;
ret = __node_reclaim(pgdat, gfp_mask, order);
clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags);
if (!ret)
count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
return ret;
}
#endif
/*
* page_evictable - test whether a page is evictable
* @page: the page to test
*
* Test whether page is evictable--i.e., should be placed on active/inactive
* lists vs unevictable list.
*
* Reasons page might not be evictable:
* (1) page's mapping marked unevictable
* (2) page is part of an mlocked VMA
*
*/
int page_evictable(struct page *page)
{
int ret;
/* Prevent address_space of inode and swap cache from being freed */
rcu_read_lock();
ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
rcu_read_unlock();
return ret;
}
/**
* check_move_unevictable_pages - check pages for evictability and move to
* appropriate zone lru list
* @pvec: pagevec with lru pages to check
*
* Checks pages for evictability, if an evictable page is in the unevictable
* lru list, moves it to the appropriate evictable lru list. This function
* should be only used for lru pages.
*/
void check_move_unevictable_pages(struct pagevec *pvec)
{
struct lruvec *lruvec;
struct pglist_data *pgdat = NULL;
int pgscanned = 0;
int pgrescued = 0;
int i;
for (i = 0; i < pvec->nr; i++) {
struct page *page = pvec->pages[i];
struct pglist_data *pagepgdat = page_pgdat(page);
pgscanned++;
if (pagepgdat != pgdat) {
if (pgdat)
spin_unlock_irq(&pgdat->lru_lock);
pgdat = pagepgdat;
spin_lock_irq(&pgdat->lru_lock);
}
lruvec = mem_cgroup_page_lruvec(page, pgdat);
if (!PageLRU(page) || !PageUnevictable(page))
continue;
if (page_evictable(page)) {
enum lru_list lru = page_lru_base_type(page);
VM_BUG_ON_PAGE(PageActive(page), page);
ClearPageUnevictable(page);
del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
add_page_to_lru_list(page, lruvec, lru);
pgrescued++;
}
}
if (pgdat) {
__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
spin_unlock_irq(&pgdat->lru_lock);
}
}
EXPORT_SYMBOL_GPL(check_move_unevictable_pages);