android_kernel_samsung_sm8650/kernel/time/tick-sched.c
Greg Kroah-Hartman dbb69752f7 This is the 6.1.53 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmUBaBUACgkQONu9yGCS
 aT6OkBAArqBSUyCYQJrhoUlFYBnBqF7BLSkj0GwINGSUOlt5ilJ3kZwH9ftjvpWp
 ZtO0Rp/1yH2H5PpcsaLljPl055Sf30e0oCkz6vX16vy17NGnzI4rJi55+nRZbFRH
 tBMhMjblgIJoTiTPEQPSGghENok+QzJ9Imffo4/Wru3w5ytkBnGcPPXreHJw+8V5
 Pjhzg5tcjhz23rk2wzVtR4VfEqWaHQaapv49rKB1Yls578WYn4QXl4jgUyB7rCo7
 9vBB7xy77H1hr9m8ifB/9v1ToV/vw6L1xGPWWWbhsSikFAMBoq34SCsq+6RdeURo
 43CCcFsx1s5acM7NQWvxkoV5Hgl8Hc3WgFsx5eVBlNd+vS6ezkgdYuGmN76t+dF/
 hZ7XGEoEFuoz9NKQC/5rKjdBd2p/IQYx6vf8EpK0IxFPD4h+DY9pn0FvwuAmxAcA
 M41xLYGbXX5l/QJR016B1AYiB3DqVxRRRyQT0yNip+PDAh2N06MOJ84KgMSR9lg7
 jyeFKZM2vQ619RopMIspuHTWxNiMw7x94aUhBnY1oD+fDzaRn+VNL8po6QYHLK8U
 QTDhrWplTbTuGIF72h+1IyX1aUj6ozoCewl9Y9ry1u9jBb7LZoupVd0s1dwqORIk
 2OSo74pDu5F2BT+4hEcCpDRcYvWlfKbZWBunRrMqvHN8BON0Mks=
 =aFyS
 -----END PGP SIGNATURE-----

Merge 6.1.53 into android14-6.1-lts

Changes in 6.1.53
	Revert "bridge: Add extack warning when enabling STP in netns."
	Partially revert "drm/amd/display: Fix possible underflow for displays with large vblank"
	scsi: ufs: Try harder to change the power mode
	Revert "Revert drm/amd/display: Enable Freesync Video Mode by default"
	ARM: dts: imx: Set default tuning step for imx7d usdhc
	ALSA: hda/realtek: Enable 4 amplifiers instead of 2 on a HP platform
	powerpc/boot: Disable power10 features after BOOTAFLAGS assignment
	media: uapi: HEVC: Add num_delta_pocs_of_ref_rps_idx field
	Revert "MIPS: unhide PATA_PLATFORM"
	phy: qcom-snps-femto-v2: use qcom_snps_hsphy_suspend/resume error code
	media: amphion: use dev_err_probe
	media: pulse8-cec: handle possible ping error
	media: pci: cx23885: fix error handling for cx23885 ATSC boards
	9p: virtio: fix unlikely null pointer deref in handle_rerror
	9p: virtio: make sure 'offs' is initialized in zc_request
	ksmbd: fix out of bounds in smb3_decrypt_req()
	ksmbd: validate session id and tree id in compound request
	ksmbd: no response from compound read
	ksmbd: fix out of bounds in init_smb2_rsp_hdr()
	ASoC: da7219: Flush pending AAD IRQ when suspending
	ASoC: da7219: Check for failure reading AAD IRQ events
	ASoC: nau8821: Add DMI quirk mechanism for active-high jack-detect
	ethernet: atheros: fix return value check in atl1c_tso_csum()
	m68k: Fix invalid .section syntax
	s390/dasd: use correct number of retries for ERP requests
	s390/dasd: fix hanging device after request requeue
	fs/nls: make load_nls() take a const parameter
	ASoC: rt5682-sdw: fix for JD event handling in ClockStop Mode0
	ASoc: codecs: ES8316: Fix DMIC config
	ASoC: rt711: fix for JD event handling in ClockStop Mode0
	ASoC: rt711-sdca: fix for JD event handling in ClockStop Mode0
	ASoC: atmel: Fix the 8K sample parameter in I2SC master
	ALSA: usb-audio: Add quirk for Microsoft Modern Wireless Headset
	platform/x86: intel: hid: Always call BTNL ACPI method
	platform/x86/intel/hid: Add HP Dragonfly G2 to VGBS DMI quirks
	platform/x86: think-lmi: Use kfree_sensitive instead of kfree
	platform/x86: asus-wmi: Fix setting RGB mode on some TUF laptops
	platform/x86: huawei-wmi: Silence ambient light sensor
	drm/amd/smu: use AverageGfxclkFrequency* to replace previous GFX Curr Clock
	drm/amd/display: Guard DCN31 PHYD32CLK logic against chip family
	drm/amd/display: Exit idle optimizations before attempt to access PHY
	ovl: Always reevaluate the file signature for IMA
	ata: pata_arasan_cf: Use dev_err_probe() instead dev_err() in data_xfer()
	ALSA: usb-audio: Update for native DSD support quirks
	staging: fbtft: ili9341: use macro FBTFT_REGISTER_SPI_DRIVER
	security: keys: perform capable check only on privileged operations
	kprobes: Prohibit probing on CFI preamble symbol
	clk: fixed-mmio: make COMMON_CLK_FIXED_MMIO depend on HAS_IOMEM
	vmbus_testing: fix wrong python syntax for integer value comparison
	Revert "wifi: ath6k: silence false positive -Wno-dangling-pointer warning on GCC 12"
	net: dsa: microchip: KSZ9477 register regmap alignment to 32 bit boundaries
	net: annotate data-races around sk->sk_{rcv|snd}timeo
	net: usb: qmi_wwan: add Quectel EM05GV2
	wifi: brcmfmac: Fix field-spanning write in brcmf_scan_params_v2_to_v1()
	powerpc/powermac: Use early_* IO variants in via_calibrate_decr()
	idmaengine: make FSL_EDMA and INTEL_IDMA64 depends on HAS_IOMEM
	platform/x86/amd/pmf: Fix unsigned comparison with less than zero
	scsi: lpfc: Remove reftag check in DIF paths
	scsi: qedi: Fix potential deadlock on &qedi_percpu->p_work_lock
	net: hns3: restore user pause configure when disable autoneg
	drm/amdgpu: Match against exact bootloader status
	wifi: cfg80211: remove links only on AP
	wifi: mac80211: Use active_links instead of valid_links in Tx
	netlabel: fix shift wrapping bug in netlbl_catmap_setlong()
	bnx2x: fix page fault following EEH recovery
	cifs: fix sockaddr comparison in iface_cmp
	cifs: fix max_credits implementation
	sctp: handle invalid error codes without calling BUG()
	scsi: aacraid: Reply queue mapping to CPUs based on IRQ affinity
	scsi: storvsc: Always set no_report_opcodes
	scsi: lpfc: Fix incorrect big endian type assignment in bsg loopback path
	LoongArch: Let pmd_present() return true when splitting pmd
	LoongArch: Fix the write_fcsr() macro
	ALSA: seq: oss: Fix racy open/close of MIDI devices
	net: sfp: handle 100G/25G active optical cables in sfp_parse_support
	tracing: Introduce pipe_cpumask to avoid race on trace_pipes
	platform/mellanox: Fix mlxbf-tmfifo not handling all virtio CONSOLE notifications
	of: property: Simplify of_link_to_phandle()
	cpufreq: intel_pstate: set stale CPU frequency to minimum
	crypto: rsa-pkcs1pad - Use helper to set reqsize
	tpm: Enable hwrng only for Pluton on AMD CPUs
	KVM: x86/mmu: Use kstrtobool() instead of strtobool()
	KVM: x86/mmu: Add "never" option to allow sticky disabling of nx_huge_pages
	net: Avoid address overwrite in kernel_connect
	drm/amd/display: ensure async flips are only accepted for fast updates
	udf: Check consistency of Space Bitmap Descriptor
	udf: Handle error when adding extent to a file
	Input: i8042 - add quirk for TUXEDO Gemini 17 Gen1/Clevo PD70PN
	Revert "PCI: tegra194: Enable support for 256 Byte payload"
	Revert "net: macsec: preserve ingress frame ordering"
	tools lib subcmd: Add install target
	tools lib subcmd: Make install_headers clearer
	tools lib subcmd: Add dependency test to install_headers
	tools/resolve_btfids: Use pkg-config to locate libelf
	tools/resolve_btfids: Install subcmd headers
	tools/resolve_btfids: Alter how HOSTCC is forced
	tools/resolve_btfids: Compile resolve_btfids as host program
	tools/resolve_btfids: Tidy HOST_OVERRIDES
	tools/resolve_btfids: Pass HOSTCFLAGS as EXTRA_CFLAGS to prepare targets
	tools/resolve_btfids: Fix setting HOSTCFLAGS
	reiserfs: Check the return value from __getblk()
	eventfd: prevent underflow for eventfd semaphores
	fs: Fix error checking for d_hash_and_lookup()
	iomap: Remove large folio handling in iomap_invalidate_folio()
	tmpfs: verify {g,u}id mount options correctly
	selftests/harness: Actually report SKIP for signal tests
	vfs, security: Fix automount superblock LSM init problem, preventing NFS sb sharing
	ARM: ptrace: Restore syscall restart tracing
	ARM: ptrace: Restore syscall skipping for tracers
	refscale: Fix uninitalized use of wait_queue_head_t
	OPP: Fix passing 0 to PTR_ERR in _opp_attach_genpd()
	selftests/resctrl: Add resctrl.h into build deps
	selftests/resctrl: Don't leak buffer in fill_cache()
	selftests/resctrl: Unmount resctrl FS if child fails to run benchmark
	selftests/resctrl: Close perf value read fd on errors
	arm64/ptrace: Clean up error handling path in sve_set_common()
	sched/psi: Select KERNFS as needed
	x86/decompressor: Don't rely on upper 32 bits of GPRs being preserved
	arm64/sme: Don't use streaming mode to probe the maximum SME VL
	arm64/fpsimd: Only provide the length to cpufeature for xCR registers
	sched/rt: Fix sysctl_sched_rr_timeslice intial value
	perf/imx_ddr: don't enable counter0 if none of 4 counters are used
	selftests/futex: Order calls to futex_lock_pi
	s390/pkey: fix/harmonize internal keyblob headers
	s390/pkey: fix PKEY_TYPE_EP11_AES handling in PKEY_GENSECK2 IOCTL
	s390/pkey: fix PKEY_TYPE_EP11_AES handling for sysfs attributes
	s390/paes: fix PKEY_TYPE_EP11_AES handling for secure keyblobs
	irqchip/loongson-eiointc: Fix return value checking of eiointc_index
	ACPI: x86: s2idle: Post-increment variables when getting constraints
	ACPI: x86: s2idle: Fix a logic error parsing AMD constraints table
	thermal/of: Fix potential uninitialized value access
	cpufreq: amd-pstate-ut: Remove module parameter access
	cpufreq: amd-pstate-ut: Fix kernel panic when loading the driver
	x86/efistub: Fix PCI ROM preservation in mixed mode
	cpufreq: powernow-k8: Use related_cpus instead of cpus in driver.exit()
	selftests/bpf: Fix bpf_nf failure upon test rerun
	bpftool: use a local copy of perf_event to fix accessing :: Bpf_cookie
	bpftool: Define a local bpf_perf_link to fix accessing its fields
	bpftool: Use a local copy of BPF_LINK_TYPE_PERF_EVENT in pid_iter.bpf.c
	bpftool: Use a local bpf_perf_event_value to fix accessing its fields
	libbpf: Fix realloc API handling in zero-sized edge cases
	bpf: Clear the probe_addr for uprobe
	bpf: Fix an error in verifying a field in a union
	crypto: qat - change value of default idle filter
	tcp: tcp_enter_quickack_mode() should be static
	hwrng: nomadik - keep clock enabled while hwrng is registered
	hwrng: pic32 - use devm_clk_get_enabled
	regmap: rbtree: Use alloc_flags for memory allocations
	wifi: rtw89: debug: Fix error handling in rtw89_debug_priv_btc_manual_set()
	wifi: mt76: mt7921: fix non-PSC channel scan fail
	udp: re-score reuseport groups when connected sockets are present
	bpf: reject unhashed sockets in bpf_sk_assign
	wifi: mt76: testmode: add nla_policy for MT76_TM_ATTR_TX_LENGTH
	spi: tegra20-sflash: fix to check return value of platform_get_irq() in tegra_sflash_probe()
	can: gs_usb: gs_usb_receive_bulk_callback(): count RX overflow errors also in case of OOM
	wifi: mt76: mt7915: fix power-limits while chan_switch
	wifi: mwifiex: Fix OOB and integer underflow when rx packets
	wifi: mwifiex: fix error recovery in PCIE buffer descriptor management
	selftests/bpf: fix static assert compilation issue for test_cls_*.c
	kbuild: rust_is_available: remove -v option
	kbuild: rust_is_available: fix version check when CC has multiple arguments
	kbuild: rust_is_available: add check for `bindgen` invocation
	kbuild: rust_is_available: fix confusion when a version appears in the path
	crypto: stm32 - Properly handle pm_runtime_get failing
	crypto: api - Use work queue in crypto_destroy_instance
	Bluetooth: nokia: fix value check in nokia_bluetooth_serdev_probe()
	Bluetooth: Fix potential use-after-free when clear keys
	Bluetooth: hci_sync: Don't double print name in add/remove adv_monitor
	Bluetooth: hci_sync: Avoid use-after-free in dbg for hci_add_adv_monitor()
	net: tcp: fix unexcepted socket die when snd_wnd is 0
	selftests/bpf: Fix repeat option when kfunc_call verification fails
	selftests/bpf: Clean up fmod_ret in bench_rename test script
	net-memcg: Fix scope of sockmem pressure indicators
	ice: ice_aq_check_events: fix off-by-one check when filling buffer
	crypto: caam - fix unchecked return value error
	hwrng: iproc-rng200 - Implement suspend and resume calls
	lwt: Fix return values of BPF xmit ops
	lwt: Check LWTUNNEL_XMIT_CONTINUE strictly
	fs: ocfs2: namei: check return value of ocfs2_add_entry()
	net: annotate data-races around sk->sk_lingertime
	wifi: mwifiex: fix memory leak in mwifiex_histogram_read()
	wifi: mwifiex: Fix missed return in oob checks failed path
	ARM: dts: Add .dts files missing from the build
	samples/bpf: fix bio latency check with tracepoint
	samples/bpf: fix broken map lookup probe
	wifi: ath9k: fix races between ath9k_wmi_cmd and ath9k_wmi_ctrl_rx
	wifi: ath9k: protect WMI command response buffer replacement with a lock
	wifi: nl80211/cfg80211: add forgotten nla_policy for BSS color attribute
	mac80211: make ieee80211_tx_info padding explicit
	wifi: mwifiex: avoid possible NULL skb pointer dereference
	Bluetooth: btusb: Do not call kfree_skb() under spin_lock_irqsave()
	arm64: mm: use ptep_clear() instead of pte_clear() in clear_flush()
	wifi: ath9k: use IS_ERR() with debugfs_create_dir()
	ice: avoid executing commands on other ports when driving sync
	net: arcnet: Do not call kfree_skb() under local_irq_disable()
	mlxsw: i2c: Fix chunk size setting in output mailbox buffer
	mlxsw: i2c: Limit single transaction buffer size
	mlxsw: core_hwmon: Adjust module label names based on MTCAP sensor counter
	hwmon: (tmp513) Fix the channel number in tmp51x_is_visible()
	octeontx2-pf: Refactor schedular queue alloc/free calls
	octeontx2-pf: Fix PFC TX scheduler free
	cteonxt2-pf: Fix backpressure config for multiple PFC priorities to work simultaneously
	sfc: Check firmware supports Ethernet PTP filter
	net/sched: sch_hfsc: Ensure inner classes have fsc curve
	netrom: Deny concurrent connect().
	drm/bridge: tc358764: Fix debug print parameter order
	ASoC: cs43130: Fix numerator/denominator mixup
	quota: factor out dquot_write_dquot()
	quota: rename dquot_active() to inode_quota_active()
	quota: add new helper dquot_active()
	quota: fix dqput() to follow the guarantees dquot_srcu should provide
	drm/amd/display: Do not set drr on pipe commit
	drm/hyperv: Fix a compilation issue because of not including screen_info.h
	ASoC: stac9766: fix build errors with REGMAP_AC97
	soc: qcom: ocmem: Add OCMEM hardware version print
	soc: qcom: ocmem: Fix NUM_PORTS & NUM_MACROS macros
	arm64: dts: qcom: sm6350: Fix ZAP region
	arm64: dts: qcom: sm8250: correct dynamic power coefficients
	arm64: dts: qcom: msm8916-l8150: correct light sensor VDDIO supply
	arm64: dts: qcom: sm8250-edo: Add gpio line names for TLMM
	arm64: dts: qcom: sm8250-edo: Add GPIO line names for PMIC GPIOs
	arm64: dts: qcom: sm8250-edo: Rectify gpio-keys
	arm64: dts: qcom: sc8280xp-crd: Correct vreg_misc_3p3 GPIO
	arm64: dts: qcom: sc8280xp: Add missing SCM interconnect
	arm64: dts: qcom: msm8996: Add missing interrupt to the USB2 controller
	arm64: dts: qcom: sdm845-tama: Set serial indices and stdout-path
	arm64: dts: qcom: sm8350: Fix CPU idle state residency times
	arm64: dts: qcom: sm8350: Add missing LMH interrupts to cpufreq
	arm64: dts: qcom: sm8350: Use proper CPU compatibles
	arm64: dts: qcom: pm8350: fix thermal zone name
	arm64: dts: qcom: pm8350b: fix thermal zone name
	arm64: dts: qcom: pmr735b: fix thermal zone name
	arm64: dts: qcom: pmk8350: fix ADC-TM compatible string
	arm64: dts: qcom: sm8250: Mark PCIe hosts as DMA coherent
	ARM: dts: stm32: Rename mdio0 to mdio
	ARM: dts: stm32: YAML validation fails for Argon Boards
	ARM: dts: stm32: adopt generic iio bindings for adc channels on emstamp-argon
	ARM: dts: stm32: Add missing detach mailbox for emtrion emSBC-Argon
	ARM: dts: stm32: YAML validation fails for Odyssey Boards
	ARM: dts: stm32: Add missing detach mailbox for Odyssey SoM
	ARM: dts: stm32: Update to generic ADC channel binding on DHSOM systems
	ARM: dts: stm32: Add missing detach mailbox for DHCOM SoM
	firmware: ti_sci: Use system_state to determine polling
	drm/amdgpu: avoid integer overflow warning in amdgpu_device_resize_fb_bar()
	ARM: dts: BCM53573: Drop nonexistent #usb-cells
	ARM: dts: BCM53573: Add cells sizes to PCIe node
	ARM: dts: BCM53573: Use updated "spi-gpio" binding properties
	arm64: tegra: Fix HSUART for Jetson AGX Orin
	arm64: dts: qcom: sm8250-sony-xperia: correct GPIO keys wakeup again
	arm64: dts: qcom: pm6150l: Add missing short interrupt
	arm64: dts: qcom: pm660l: Add missing short interrupt
	arm64: dts: qcom: pmi8994: Add missing OVP interrupt
	arm64: tegra: Fix HSUART for Smaug
	drm/etnaviv: fix dumping of active MMU context
	block: cleanup queue_wc_store
	block: don't allow enabling a cache on devices that don't support it
	x86/mm: Fix PAT bit missing from page protection modify mask
	drm/bridge: anx7625: Use common macros for DP power sequencing commands
	drm/bridge: anx7625: Use common macros for HDCP capabilities
	ARM: dts: samsung: s3c6410-mini6410: correct ethernet reg addresses (split)
	ARM: dts: s5pv210: add dummy 5V regulator for backlight on SMDKv210
	ARM: dts: samsung: s5pv210-smdkv210: correct ethernet reg addresses (split)
	drm: adv7511: Fix low refresh rate register for ADV7533/5
	ARM: dts: BCM53573: Fix Ethernet info for Luxul devices
	arm64: dts: qcom: sdm845: Add missing RPMh power domain to GCC
	arm64: dts: qcom: sdm845: Fix the min frequency of "ice_core_clk"
	arm64: dts: qcom: msm8996-gemini: fix touchscreen VIO supply
	drm/amdgpu: Update min() to min_t() in 'amdgpu_info_ioctl'
	md: Factor out is_md_suspended helper
	md: Change active_io to percpu
	md: restore 'noio_flag' for the last mddev_resume()
	md/raid10: factor out dereference_rdev_and_rrdev()
	md/raid10: use dereference_rdev_and_rrdev() to get devices
	md/md-bitmap: remove unnecessary local variable in backlog_store()
	md/md-bitmap: hold 'reconfig_mutex' in backlog_store()
	drm/msm: Update dev core dump to not print backwards
	drm/tegra: dpaux: Fix incorrect return value of platform_get_irq
	of: unittest: fix null pointer dereferencing in of_unittest_find_node_by_name()
	arm64: dts: qcom: sm8150: Fix the I2C7 interrupt
	ARM: dts: BCM53573: Fix Tenda AC9 switch CPU port
	drm/armada: Fix off-by-one error in armada_overlay_get_property()
	drm/repaper: Reduce temporary buffer size in repaper_fb_dirty()
	drm/panel: simple: Add missing connector type and pixel format for AUO T215HVN01
	ima: Remove deprecated IMA_TRUSTED_KEYRING Kconfig
	drm: xlnx: zynqmp_dpsub: Add missing check for dma_set_mask
	soc: qcom: smem: Fix incompatible types in comparison
	drm/msm/mdp5: Don't leak some plane state
	firmware: meson_sm: fix to avoid potential NULL pointer dereference
	drm/msm/dpu: fix the irq index in dpu_encoder_phys_wb_wait_for_commit_done
	smackfs: Prevent underflow in smk_set_cipso()
	drm/amd/pm: fix variable dereferenced issue in amdgpu_device_attr_create()
	drm/msm/a2xx: Call adreno_gpu_init() earlier
	audit: fix possible soft lockup in __audit_inode_child()
	block/mq-deadline: use correct way to throttling write requests
	io_uring: fix drain stalls by invalid SQE
	drm/mediatek: dp: Add missing error checks in mtk_dp_parse_capabilities
	bus: ti-sysc: Fix build warning for 64-bit build
	drm/mediatek: Remove freeing not dynamic allocated memory
	ARM: dts: qcom: ipq4019: correct SDHCI XO clock
	drm/mediatek: Fix potential memory leak if vmap() fail
	arm64: dts: qcom: apq8016-sbc: Fix ov5640 regulator supply names
	arm64: dts: qcom: msm8998: Drop bus clock reference from MMSS SMMU
	arm64: dts: qcom: msm8998: Add missing power domain to MMSS SMMU
	arm64: dts: qcom: msm8996: Fix dsi1 interrupts
	arm64: dts: qcom: sc8280xp-x13s: Unreserve NC pins
	bus: ti-sysc: Fix cast to enum warning
	md/raid5-cache: fix a deadlock in r5l_exit_log()
	md/raid5-cache: fix null-ptr-deref for r5l_flush_stripe_to_raid()
	firmware: cs_dsp: Fix new control name check
	md: add error_handlers for raid0 and linear
	md/raid0: Factor out helper for mapping and submitting a bio
	md/raid0: Fix performance regression for large sequential writes
	md: raid0: account for split bio in iostat accounting
	ASoC: SOF: amd: clear dsp to host interrupt status
	of: overlay: Call of_changeset_init() early
	of: unittest: Fix overlay type in apply/revert check
	ALSA: ac97: Fix possible error value of *rac97
	ipmi:ssif: Add check for kstrdup
	ipmi:ssif: Fix a memory leak when scanning for an adapter
	clk: qcom: gpucc-sm6350: Introduce index-based clk lookup
	clk: qcom: gpucc-sm6350: Fix clock source names
	clk: qcom: gcc-sc8280xp: Add EMAC GDSCs
	clk: qcom: gcc-sc8280xp: Add missing GDSC flags
	dt-bindings: clock: qcom,gcc-sc8280xp: Add missing GDSCs
	clk: qcom: gcc-sc8280xp: Add missing GDSCs
	clk: rockchip: rk3568: Fix PLL rate setting for 78.75MHz
	PCI: apple: Initialize pcie->nvecs before use
	PCI: qcom-ep: Switch MHI bus master clock off during L1SS
	drivers: clk: keystone: Fix parameter judgment in _of_pll_clk_init()
	PCI/DOE: Fix destroy_work_on_stack() race
	clk: sunxi-ng: Modify mismatched function name
	clk: qcom: gcc-sc7180: Fix up gcc_sdcc2_apps_clk_src
	EDAC/igen6: Fix the issue of no error events
	ext4: correct grp validation in ext4_mb_good_group
	ext4: avoid potential data overflow in next_linear_group
	clk: qcom: gcc-sm8250: Fix gcc_sdcc2_apps_clk_src
	kvm/vfio: Prepare for accepting vfio device fd
	kvm/vfio: ensure kvg instance stays around in kvm_vfio_group_add()
	clk: qcom: reset: Use the correct type of sleep/delay based on length
	clk: qcom: gcc-sm6350: Fix gcc_sdcc2_apps_clk_src
	PCI: microchip: Correct the DED and SEC interrupt bit offsets
	PCI: Mark NVIDIA T4 GPUs to avoid bus reset
	pinctrl: mcp23s08: check return value of devm_kasprintf()
	PCI: Allow drivers to request exclusive config regions
	PCI: Add locking to RMW PCI Express Capability Register accessors
	PCI: pciehp: Use RMW accessors for changing LNKCTL
	PCI/ASPM: Use RMW accessors for changing LNKCTL
	clk: qcom: gcc-sm8450: Use floor ops for SDCC RCGs
	clk: imx: pllv4: Fix SPLL2 MULT range
	clk: imx: imx8ulp: update SPLL2 type
	clk: imx8mp: fix sai4 clock
	clk: imx: composite-8m: fix clock pauses when set_rate would be a no-op
	powerpc/radix: Move some functions into #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
	vfio/type1: fix cap_migration information leak
	nvdimm: Fix memleak of pmu attr_groups in unregister_nvdimm_pmu()
	nvdimm: Fix dereference after free in register_nvdimm_pmu()
	powerpc/fadump: reset dump area size if fadump memory reserve fails
	powerpc/perf: Convert fsl_emb notifier to state machine callbacks
	drm/amdgpu: Use RMW accessors for changing LNKCTL
	drm/radeon: Use RMW accessors for changing LNKCTL
	net/mlx5: Use RMW accessors for changing LNKCTL
	wifi: ath11k: Use RMW accessors for changing LNKCTL
	wifi: ath10k: Use RMW accessors for changing LNKCTL
	NFSv4.2: Rework scratch handling for READ_PLUS
	NFSv4.2: Fix READ_PLUS smatch warnings
	NFSv4.2: Fix up READ_PLUS alignment
	NFSv4.2: Fix READ_PLUS size calculations
	powerpc: Don't include lppaca.h in paca.h
	powerpc/pseries: Rework lppaca_shared_proc() to avoid DEBUG_PREEMPT
	nfs/blocklayout: Use the passed in gfp flags
	powerpc/pseries: Fix hcall tracepoints with JUMP_LABEL=n
	powerpc/mpc5xxx: Add missing fwnode_handle_put()
	powerpc/iommu: Fix notifiers being shared by PCI and VIO buses
	ext4: fix unttached inode after power cut with orphan file feature enabled
	jfs: validate max amount of blocks before allocation.
	fs: lockd: avoid possible wrong NULL parameter
	NFSD: da_addr_body field missing in some GETDEVICEINFO replies
	NFS: Guard against READDIR loop when entry names exceed MAXNAMELEN
	NFSv4.2: fix handling of COPY ERR_OFFLOAD_NO_REQ
	pNFS: Fix assignment of xprtdata.cred
	cgroup/cpuset: Inherit parent's load balance state in v2
	RDMA/qedr: Remove a duplicate assignment in irdma_query_ah()
	media: ov5640: fix low resolution image abnormal issue
	media: ad5820: Drop unsupported ad5823 from i2c_ and of_device_id tables
	media: i2c: tvp5150: check return value of devm_kasprintf()
	media: v4l2-core: Fix a potential resource leak in v4l2_fwnode_parse_link()
	iommu/amd/iommu_v2: Fix pasid_state refcount dec hit 0 warning on pasid unbind
	iommu: rockchip: Fix directory table address encoding
	drivers: usb: smsusb: fix error handling code in smsusb_init_device
	media: dib7000p: Fix potential division by zero
	media: dvb-usb: m920x: Fix a potential memory leak in m920x_i2c_xfer()
	media: cx24120: Add retval check for cx24120_message_send()
	RDMA/siw: Fabricate a GID on tun and loopback devices
	scsi: hisi_sas: Fix warnings detected by sparse
	scsi: hisi_sas: Fix normally completed I/O analysed as failed
	dt-bindings: extcon: maxim,max77843: restrict connector properties
	media: amphion: reinit vpu if reqbufs output 0
	media: amphion: add helper function to get id name
	media: mtk-jpeg: Fix use after free bug due to uncanceled work
	media: rkvdec: increase max supported height for H.264
	media: amphion: fix CHECKED_RETURN issues reported by coverity
	media: amphion: fix REVERSE_INULL issues reported by coverity
	media: amphion: fix UNINIT issues reported by coverity
	media: amphion: fix UNUSED_VALUE issue reported by coverity
	media: amphion: ensure the bitops don't cross boundaries
	media: mediatek: vcodec: Return NULL if no vdec_fb is found
	media: mediatek: vcodec: fix potential double free
	media: mediatek: vcodec: fix resource leaks in vdec_msg_queue_init()
	usb: phy: mxs: fix getting wrong state with mxs_phy_is_otg_host()
	scsi: RDMA/srp: Fix residual handling
	scsi: iscsi: Rename iscsi_set_param() to iscsi_if_set_param()
	scsi: iscsi: Add length check for nlattr payload
	scsi: iscsi: Add strlen() check in iscsi_if_set{_host}_param()
	scsi: be2iscsi: Add length check when parsing nlattrs
	scsi: qla4xxx: Add length check when parsing nlattrs
	iio: accel: adxl313: Fix adxl313_i2c_id[] table
	serial: sprd: Assign sprd_port after initialized to avoid wrong access
	serial: sprd: Fix DMA buffer leak issue
	x86/APM: drop the duplicate APM_MINOR_DEV macro
	RDMA/rxe: Split rxe_run_task() into two subroutines
	RDMA/rxe: Fix incomplete state save in rxe_requester
	scsi: qedf: Do not touch __user pointer in qedf_dbg_stop_io_on_error_cmd_read() directly
	scsi: qedf: Do not touch __user pointer in qedf_dbg_debug_cmd_read() directly
	scsi: qedf: Do not touch __user pointer in qedf_dbg_fp_int_cmd_read() directly
	RDMA/irdma: Replace one-element array with flexible-array member
	coresight: tmc: Explicit type conversions to prevent integer overflow
	interconnect: qcom: qcm2290: Enable sync state
	dma-buf/sync_file: Fix docs syntax
	driver core: test_async: fix an error code
	driver core: Call dma_cleanup() on the test_remove path
	kernfs: add stub helper for kernfs_generic_poll()
	extcon: cht_wc: add POWER_SUPPLY dependency
	iommu/mediatek: Remove unused "mapping" member from mtk_iommu_data
	iommu/mediatek: Fix two IOMMU share pagetable issue
	iommu/sprd: Add missing force_aperture
	RDMA/hns: Fix port active speed
	RDMA/hns: Fix incorrect post-send with direct wqe of wr-list
	RDMA/hns: Fix inaccurate error label name in init instance
	RDMA/hns: Fix CQ and QP cache affinity
	IB/uverbs: Fix an potential error pointer dereference
	fsi: aspeed: Reset master errors after CFAM reset
	iommu/qcom: Disable and reset context bank before programming
	iommu/vt-d: Fix to flush cache of PASID directory table
	platform/x86: dell-sysman: Fix reference leak
	media: cec: core: add adap_nb_transmit_canceled() callback
	media: cec: core: add adap_unconfigured() callback
	media: go7007: Remove redundant if statement
	media: venus: hfi_venus: Only consider sys_idle_indicator on V1
	docs: ABI: fix spelling/grammar in SBEFIFO timeout interface
	USB: gadget: core: Add missing kerneldoc for vbus_work
	USB: gadget: f_mass_storage: Fix unused variable warning
	drivers: base: Free devm resources when unregistering a device
	HID: input: Support devices sending Eraser without Invert
	media: ov5640: Enable MIPI interface in ov5640_set_power_mipi()
	media: ov5640: Fix initial RESETB state and annotate timings
	media: i2c: ov2680: Set V4L2_CTRL_FLAG_MODIFY_LAYOUT on flips
	media: ov2680: Remove auto-gain and auto-exposure controls
	media: ov2680: Fix ov2680_bayer_order()
	media: ov2680: Fix vflip / hflip set functions
	media: ov2680: Remove VIDEO_V4L2_SUBDEV_API ifdef-s
	media: ov2680: Don't take the lock for try_fmt calls
	media: ov2680: Add ov2680_fill_format() helper function
	media: ov2680: Fix ov2680_set_fmt() which == V4L2_SUBDEV_FORMAT_TRY not working
	media: ov2680: Fix regulators being left enabled on ov2680_power_on() errors
	media: i2c: rdacm21: Fix uninitialized value
	f2fs: fix to avoid mmap vs set_compress_option case
	f2fs: judge whether discard_unit is section only when have CONFIG_BLK_DEV_ZONED
	f2fs: Only lfs mode is allowed with zoned block device feature
	Revert "f2fs: fix to do sanity check on extent cache correctly"
	cgroup:namespace: Remove unused cgroup_namespaces_init()
	coresight: trbe: Fix TRBE potential sleep in atomic context
	RDMA/irdma: Prevent zero-length STAG registration
	scsi: core: Use 32-bit hostnum in scsi_host_lookup()
	scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock
	interconnect: qcom: sm8450: Enable sync_state
	interconnect: qcom: bcm-voter: Improve enable_mask handling
	interconnect: qcom: bcm-voter: Use enable_maks for keepalive voting
	serial: tegra: handle clk prepare error in tegra_uart_hw_init()
	amba: bus: fix refcount leak
	Revert "IB/isert: Fix incorrect release of isert connection"
	RDMA/siw: Balance the reference of cep->kref in the error path
	RDMA/siw: Correct wrong debug message
	RDMA/efa: Fix wrong resources deallocation order
	HID: logitech-dj: Fix error handling in logi_dj_recv_switch_to_dj_mode()
	HID: uclogic: Correct devm device reference for hidinput input_dev name
	HID: multitouch: Correct devm device reference for hidinput input_dev name
	platform/x86/amd/pmf: Fix a missing cleanup path
	tick/rcu: Fix false positive "softirq work is pending" messages
	x86/speculation: Mark all Skylake CPUs as vulnerable to GDS
	tracing: Remove extra space at the end of hwlat_detector/mode
	tracing: Fix race issue between cpu buffer write and swap
	mtd: rawnand: brcmnand: Fix mtd oobsize
	dmaengine: idxd: Modify the dependence of attribute pasid_enabled
	phy/rockchip: inno-hdmi: use correct vco_div_5 macro on rk3328
	phy/rockchip: inno-hdmi: round fractal pixclock in rk3328 recalc_rate
	phy/rockchip: inno-hdmi: do not power on rk3328 post pll on reg write
	rpmsg: glink: Add check for kstrdup
	leds: pwm: Fix error code in led_pwm_create_fwnode()
	leds: multicolor: Use rounded division when calculating color components
	leds: Fix BUG_ON check for LED_COLOR_ID_MULTI that is always false
	leds: trigger: tty: Do not use LED_ON/OFF constants, use led_blink_set_oneshot instead
	mtd: spi-nor: Check bus width while setting QE bit
	mtd: rawnand: fsmc: handle clk prepare error in fsmc_nand_resume()
	um: Fix hostaudio build errors
	dmaengine: ste_dma40: Add missing IRQ check in d40_probe
	Drivers: hv: vmbus: Don't dereference ACPI root object handle
	cpufreq: Fix the race condition while updating the transition_task of policy
	virtio_ring: fix avail_wrap_counter in virtqueue_add_packed
	igmp: limit igmpv3_newpack() packet size to IP_MAX_MTU
	netfilter: ipset: add the missing IP_SET_HASH_WITH_NET0 macro for ip_set_hash_netportnet.c
	netfilter: nft_exthdr: Fix non-linear header modification
	netfilter: xt_u32: validate user space input
	netfilter: xt_sctp: validate the flag_info count
	skbuff: skb_segment, Call zero copy functions before using skbuff frags
	igb: set max size RX buffer when store bad packet is enabled
	PM / devfreq: Fix leak in devfreq_dev_release()
	ALSA: pcm: Fix missing fixup call in compat hw_refine ioctl
	rcu: dump vmalloc memory info safely
	printk: ringbuffer: Fix truncating buffer size min_t cast
	scsi: core: Fix the scsi_set_resid() documentation
	mm/vmalloc: add a safer version of find_vm_area() for debug
	cpu/hotplug: Prevent self deadlock on CPU hot-unplug
	media: i2c: ccs: Check rules is non-NULL
	media: i2c: Add a camera sensor top level menu
	PCI: rockchip: Use 64-bit mask on MSI 64-bit PCI address
	ipmi_si: fix a memleak in try_smi_init()
	ARM: OMAP2+: Fix -Warray-bounds warning in _pwrdm_state_switch()
	XArray: Do not return sibling entries from xa_load()
	io_uring: break iopolling on signal
	backlight/gpio_backlight: Compare against struct fb_info.device
	backlight/bd6107: Compare against struct fb_info.device
	backlight/lv5207lp: Compare against struct fb_info.device
	drm/amd/display: register edp_backlight_control() for DCN301
	xtensa: PMU: fix base address for the newer hardware
	LoongArch: mm: Add p?d_leaf() definitions
	i3c: master: svc: fix probe failure when no i3c device exist
	arm64: csum: Fix OoB access in IP checksum code for negative lengths
	ALSA: hda/cirrus: Fix broken audio on hardware with two CS42L42 codecs.
	media: dvb: symbol fixup for dvb_attach()
	media: venus: hfi_venus: Write to VIDC_CTRL_INIT after unmasking interrupts
	Revert "scsi: qla2xxx: Fix buffer overrun"
	scsi: mpt3sas: Perform additional retries if doorbell read returns 0
	PCI: Free released resource after coalescing
	PCI: hv: Fix a crash in hv_pci_restore_msi_msg() during hibernation
	PCI/PM: Only read PCI_PM_CTRL register when available
	ntb: Drop packets when qp link is down
	ntb: Clean up tx tail index on link down
	ntb: Fix calculation ntb_transport_tx_free_entry()
	Revert "PCI: Mark NVIDIA T4 GPUs to avoid bus reset"
	block: don't add or resize partition on the disk with GENHD_FL_NO_PART
	procfs: block chmod on /proc/thread-self/comm
	parisc: Fix /proc/cpuinfo output for lscpu
	drm/amd/display: Add smu write msg id fail retry process
	bpf: Fix issue in verifying allow_ptr_leaks
	dlm: fix plock lookup when using multiple lockspaces
	dccp: Fix out of bounds access in DCCP error handler
	x86/sev: Make enc_dec_hypercall() accept a size instead of npages
	r8169: fix ASPM-related issues on a number of systems with NIC version from RTL8168h
	X.509: if signature is unsupported skip validation
	net: handle ARPHRD_PPP in dev_is_mac_header_xmit()
	fsverity: skip PKCS#7 parser when keyring is empty
	x86/MCE: Always save CS register on AMD Zen IF Poison errors
	platform/chrome: chromeos_acpi: print hex string for ACPI_TYPE_BUFFER
	mmc: renesas_sdhi: register irqs before registering controller
	pstore/ram: Check start of empty przs during init
	arm64: sdei: abort running SDEI handlers during crash
	s390/dcssblk: fix kernel crash with list_add corruption
	s390/ipl: add missing secure/has_secure file to ipl type 'unknown'
	s390/dasd: fix string length handling
	crypto: stm32 - fix loop iterating through scatterlist for DMA
	cpufreq: brcmstb-avs-cpufreq: Fix -Warray-bounds bug
	of: property: fw_devlink: Add a devlink for panel followers
	usb: typec: tcpm: set initial svdm version based on pd revision
	usb: typec: bus: verify partner exists in typec_altmode_attention
	USB: core: Unite old scheme and new scheme descriptor reads
	USB: core: Change usb_get_device_descriptor() API
	USB: core: Fix race by not overwriting udev->descriptor in hub_port_init()
	USB: core: Fix oversight in SuperSpeed initialization
	x86/sgx: Break up long non-preemptible delays in sgx_vepc_release()
	perf/x86/uncore: Correct the number of CHAs on EMR
	serial: sc16is7xx: remove obsolete out_thread label
	serial: sc16is7xx: fix regression with GPIO configuration
	tracing: Zero the pipe cpumask on alloc to avoid spurious -EBUSY
	Revert "drm/amd/display: Do not set drr on pipe commit"
	md: Free resources in __md_stop
	NFSv4.2: Fix a potential double free with READ_PLUS
	NFSv4.2: Rework scratch handling for READ_PLUS (again)
	md: fix regression for null-ptr-deference in __md_stop()
	clk: Mark a fwnode as initialized when using CLK_OF_DECLARE() macro
	treewide: Fix probing of devices in DT overlays
	clk: Avoid invalid function names in CLK_OF_DECLARE()
	udf: initialize newblock to 0
	Linux 6.1.53

Change-Id: I6f5858bce0f20963ae42515eac36ac14cb686f24
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2023-09-18 09:57:37 +00:00

1622 lines
40 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
*
* No idle tick implementation for low and high resolution timers
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/percpu.h>
#include <linux/nmi.h>
#include <linux/profile.h>
#include <linux/sched/signal.h>
#include <linux/sched/clock.h>
#include <linux/sched/stat.h>
#include <linux/sched/nohz.h>
#include <linux/sched/loadavg.h>
#include <linux/module.h>
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
#include <linux/context_tracking.h>
#include <linux/mm.h>
#include <trace/hooks/sched.h>
#include <asm/irq_regs.h>
#include "tick-internal.h"
#include <trace/events/timer.h>
/*
* Per-CPU nohz control structure
*/
static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
struct tick_sched *tick_get_tick_sched(int cpu)
{
return &per_cpu(tick_cpu_sched, cpu);
}
#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
/*
* The time, when the last jiffy update happened. Write access must hold
* jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
* consistent view of jiffies and last_jiffies_update.
*/
static ktime_t last_jiffies_update;
/*
* Must be called with interrupts disabled !
*/
static void tick_do_update_jiffies64(ktime_t now)
{
unsigned long ticks = 1;
ktime_t delta, nextp;
/*
* 64bit can do a quick check without holding jiffies lock and
* without looking at the sequence count. The smp_load_acquire()
* pairs with the update done later in this function.
*
* 32bit cannot do that because the store of tick_next_period
* consists of two 32bit stores and the first store could move it
* to a random point in the future.
*/
if (IS_ENABLED(CONFIG_64BIT)) {
if (ktime_before(now, smp_load_acquire(&tick_next_period)))
return;
} else {
unsigned int seq;
/*
* Avoid contention on jiffies_lock and protect the quick
* check with the sequence count.
*/
do {
seq = read_seqcount_begin(&jiffies_seq);
nextp = tick_next_period;
} while (read_seqcount_retry(&jiffies_seq, seq));
if (ktime_before(now, nextp))
return;
}
/* Quick check failed, i.e. update is required. */
raw_spin_lock(&jiffies_lock);
/*
* Reevaluate with the lock held. Another CPU might have done the
* update already.
*/
if (ktime_before(now, tick_next_period)) {
raw_spin_unlock(&jiffies_lock);
return;
}
write_seqcount_begin(&jiffies_seq);
delta = ktime_sub(now, tick_next_period);
if (unlikely(delta >= TICK_NSEC)) {
/* Slow path for long idle sleep times */
s64 incr = TICK_NSEC;
ticks += ktime_divns(delta, incr);
last_jiffies_update = ktime_add_ns(last_jiffies_update,
incr * ticks);
} else {
last_jiffies_update = ktime_add_ns(last_jiffies_update,
TICK_NSEC);
}
/* Advance jiffies to complete the jiffies_seq protected job */
jiffies_64 += ticks;
/*
* Keep the tick_next_period variable up to date.
*/
nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC);
if (IS_ENABLED(CONFIG_64BIT)) {
/*
* Pairs with smp_load_acquire() in the lockless quick
* check above and ensures that the update to jiffies_64 is
* not reordered vs. the store to tick_next_period, neither
* by the compiler nor by the CPU.
*/
smp_store_release(&tick_next_period, nextp);
} else {
/*
* A plain store is good enough on 32bit as the quick check
* above is protected by the sequence count.
*/
tick_next_period = nextp;
}
/*
* Release the sequence count. calc_global_load() below is not
* protected by it, but jiffies_lock needs to be held to prevent
* concurrent invocations.
*/
write_seqcount_end(&jiffies_seq);
calc_global_load();
raw_spin_unlock(&jiffies_lock);
update_wall_time();
}
/*
* Initialize and return retrieve the jiffies update.
*/
static ktime_t tick_init_jiffy_update(void)
{
ktime_t period;
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
/* Did we start the jiffies update yet ? */
if (last_jiffies_update == 0) {
u32 rem;
/*
* Ensure that the tick is aligned to a multiple of
* TICK_NSEC.
*/
div_u64_rem(tick_next_period, TICK_NSEC, &rem);
if (rem)
tick_next_period += TICK_NSEC - rem;
last_jiffies_update = tick_next_period;
}
period = last_jiffies_update;
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
return period;
}
#define MAX_STALLED_JIFFIES 5
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
int cpu = smp_processor_id();
#ifdef CONFIG_NO_HZ_COMMON
/*
* Check if the do_timer duty was dropped. We don't care about
* concurrency: This happens only when the CPU in charge went
* into a long sleep. If two CPUs happen to assign themselves to
* this duty, then the jiffies update is still serialized by
* jiffies_lock.
*
* If nohz_full is enabled, this should not happen because the
* tick_do_timer_cpu never relinquishes.
*/
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
tick_do_timer_cpu = cpu;
}
#endif
/* Check, if the jiffies need an update */
if (tick_do_timer_cpu == cpu) {
tick_do_update_jiffies64(now);
trace_android_vh_jiffies_update(NULL);
}
/*
* If jiffies update stalled for too long (timekeeper in stop_machine()
* or VMEXIT'ed for several msecs), force an update.
*/
if (ts->last_tick_jiffies != jiffies) {
ts->stalled_jiffies = 0;
ts->last_tick_jiffies = READ_ONCE(jiffies);
} else {
if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
tick_do_update_jiffies64(now);
ts->stalled_jiffies = 0;
ts->last_tick_jiffies = READ_ONCE(jiffies);
}
}
if (ts->inidle)
ts->got_idle_tick = 1;
}
static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
{
#ifdef CONFIG_NO_HZ_COMMON
/*
* When we are idle and the tick is stopped, we have to touch
* the watchdog as we might not schedule for a really long
* time. This happens on complete idle SMP systems while
* waiting on the login prompt. We also increment the "start of
* idle" jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too much ticks.
*/
if (ts->tick_stopped) {
touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
/*
* In case the current tick fired too early past its expected
* expiration, make sure we don't bypass the next clock reprogramming
* to the same deadline.
*/
ts->next_tick = 0;
}
#endif
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
}
#endif
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
bool tick_nohz_full_running;
EXPORT_SYMBOL_GPL(tick_nohz_full_running);
static atomic_t tick_dep_mask;
static bool check_tick_dependency(atomic_t *dep)
{
int val = atomic_read(dep);
if (val & TICK_DEP_MASK_POSIX_TIMER) {
trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
return true;
}
if (val & TICK_DEP_MASK_PERF_EVENTS) {
trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
return true;
}
if (val & TICK_DEP_MASK_SCHED) {
trace_tick_stop(0, TICK_DEP_MASK_SCHED);
return true;
}
if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
return true;
}
if (val & TICK_DEP_MASK_RCU) {
trace_tick_stop(0, TICK_DEP_MASK_RCU);
return true;
}
if (val & TICK_DEP_MASK_RCU_EXP) {
trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP);
return true;
}
return false;
}
static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
{
lockdep_assert_irqs_disabled();
if (unlikely(!cpu_online(cpu)))
return false;
if (check_tick_dependency(&tick_dep_mask))
return false;
if (check_tick_dependency(&ts->tick_dep_mask))
return false;
if (check_tick_dependency(&current->tick_dep_mask))
return false;
if (check_tick_dependency(&current->signal->tick_dep_mask))
return false;
return true;
}
static void nohz_full_kick_func(struct irq_work *work)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) =
IRQ_WORK_INIT_HARD(nohz_full_kick_func);
/*
* Kick this CPU if it's full dynticks in order to force it to
* re-evaluate its dependency on the tick and restart it if necessary.
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
* is NMI safe.
*/
static void tick_nohz_full_kick(void)
{
if (!tick_nohz_full_cpu(smp_processor_id()))
return;
irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
}
/*
* Kick the CPU if it's full dynticks in order to force it to
* re-evaluate its dependency on the tick and restart it if necessary.
*/
void tick_nohz_full_kick_cpu(int cpu)
{
if (!tick_nohz_full_cpu(cpu))
return;
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}
static void tick_nohz_kick_task(struct task_struct *tsk)
{
int cpu;
/*
* If the task is not running, run_posix_cpu_timers()
* has nothing to elapse, IPI can then be spared.
*
* activate_task() STORE p->tick_dep_mask
* STORE p->on_rq
* __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or())
* LOCK rq->lock LOAD p->on_rq
* smp_mb__after_spin_lock()
* tick_nohz_task_switch()
* LOAD p->tick_dep_mask
*/
if (!sched_task_on_rq(tsk))
return;
/*
* If the task concurrently migrates to another CPU,
* we guarantee it sees the new tick dependency upon
* schedule.
*
* set_task_cpu(p, cpu);
* STORE p->cpu = @cpu
* __schedule() (switch to task 'p')
* LOCK rq->lock
* smp_mb__after_spin_lock() STORE p->tick_dep_mask
* tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
* LOAD p->tick_dep_mask LOAD p->cpu
*/
cpu = task_cpu(tsk);
preempt_disable();
if (cpu_online(cpu))
tick_nohz_full_kick_cpu(cpu);
preempt_enable();
}
/*
* Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary.
*/
static void tick_nohz_full_kick_all(void)
{
int cpu;
if (!tick_nohz_full_running)
return;
preempt_disable();
for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
tick_nohz_full_kick_cpu(cpu);
preempt_enable();
}
static void tick_nohz_dep_set_all(atomic_t *dep,
enum tick_dep_bits bit)
{
int prev;
prev = atomic_fetch_or(BIT(bit), dep);
if (!prev)
tick_nohz_full_kick_all();
}
/*
* Set a global tick dependency. Used by perf events that rely on freq and
* by unstable clock.
*/
void tick_nohz_dep_set(enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&tick_dep_mask, bit);
}
void tick_nohz_dep_clear(enum tick_dep_bits bit)
{
atomic_andnot(BIT(bit), &tick_dep_mask);
}
/*
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
* manage events throttling.
*/
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
int prev;
struct tick_sched *ts;
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
if (!prev) {
preempt_disable();
/* Perf needs local kick that is NMI safe */
if (cpu == smp_processor_id()) {
tick_nohz_full_kick();
} else {
/* Remote irq work not NMI-safe */
if (!WARN_ON_ONCE(in_nmi()))
tick_nohz_full_kick_cpu(cpu);
}
preempt_enable();
}
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
atomic_andnot(BIT(bit), &ts->tick_dep_mask);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
/*
* Set a per-task tick dependency. RCU need this. Also posix CPU timers
* in order to elapse per task timers.
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
tick_nohz_kick_task(tsk);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
/*
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
void tick_nohz_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
int prev;
struct signal_struct *sig = tsk->signal;
prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
if (!prev) {
struct task_struct *t;
lockdep_assert_held(&tsk->sighand->siglock);
__for_each_thread(sig, t)
tick_nohz_kick_task(t);
}
}
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
atomic_andnot(BIT(bit), &sig->tick_dep_mask);
}
/*
* Re-evaluate the need for the tick as we switch the current task.
* It might need the tick due to per task/process properties:
* perf events, posix CPU timers, ...
*/
void __tick_nohz_task_switch(void)
{
struct tick_sched *ts;
if (!tick_nohz_full_cpu(smp_processor_id()))
return;
ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->tick_stopped) {
if (atomic_read(&current->tick_dep_mask) ||
atomic_read(&current->signal->tick_dep_mask))
tick_nohz_full_kick();
}
}
/* Get the boot-time nohz CPU list from the kernel parameters. */
void __init tick_nohz_full_setup(cpumask_var_t cpumask)
{
alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
cpumask_copy(tick_nohz_full_mask, cpumask);
tick_nohz_full_running = true;
}
bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
{
/*
* The tick_do_timer_cpu CPU handles housekeeping duty (unbound
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
return false;
return true;
}
static int tick_nohz_cpu_down(unsigned int cpu)
{
return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
}
void __init tick_nohz_init(void)
{
int cpu, ret;
if (!tick_nohz_full_running)
return;
/*
* Full dynticks uses irq work to drive the tick rescheduling on safe
* locking contexts. But then we need irq work to raise its own
* interrupts to avoid circular dependency on the tick
*/
if (!arch_irq_work_has_interrupt()) {
pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
cpumask_clear(tick_nohz_full_mask);
tick_nohz_full_running = false;
return;
}
if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
!IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
pr_warn("NO_HZ: Clearing %d from nohz_full range "
"for timekeeping\n", cpu);
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
}
}
for_each_cpu(cpu, tick_nohz_full_mask)
ct_cpu_track_user(cpu);
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"kernel/nohz:predown", NULL,
tick_nohz_cpu_down);
WARN_ON(ret < 0);
pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
cpumask_pr_args(tick_nohz_full_mask));
}
#endif
/*
* NOHZ - aka dynamic tick functionality
*/
#ifdef CONFIG_NO_HZ_COMMON
/*
* NO HZ enabled ?
*/
bool tick_nohz_enabled __read_mostly = true;
unsigned long tick_nohz_active __read_mostly;
/*
* Enable / Disable tickless mode
*/
static int __init setup_tick_nohz(char *str)
{
return (kstrtobool(str, &tick_nohz_enabled) == 0);
}
__setup("nohz=", setup_tick_nohz);
bool tick_nohz_tick_stopped(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
return ts->tick_stopped;
}
bool tick_nohz_tick_stopped_cpu(int cpu)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
return ts->tick_stopped;
}
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
*
* Called from interrupt entry when the CPU was idle
*
* In case the sched_tick was stopped on this CPU, we have to check if jiffies
* must be updated. Otherwise an interrupt handler could use a stale jiffy
* value. We do this unconditionally on any CPU, as we don't know whether the
* CPU, which has the update task assigned is in a long sleep.
*/
static void tick_nohz_update_jiffies(ktime_t now)
{
unsigned long flags;
__this_cpu_write(tick_cpu_sched.idle_waketime, now);
local_irq_save(flags);
tick_do_update_jiffies64(now);
local_irq_restore(flags);
touch_softlockup_watchdog_sched();
}
/*
* Updates the per-CPU time idle statistics counters
*/
static void
update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
{
ktime_t delta;
if (ts->idle_active) {
delta = ktime_sub(now, ts->idle_entrytime);
if (nr_iowait_cpu(cpu) > 0)
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
else
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
ts->idle_entrytime = now;
}
if (last_update_time)
*last_update_time = ktime_to_us(now);
}
static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
update_ts_time_stats(smp_processor_id(), ts, now, NULL);
ts->idle_active = 0;
sched_clock_idle_wakeup_event();
}
static void tick_nohz_start_idle(struct tick_sched *ts)
{
ts->idle_entrytime = ktime_get();
ts->idle_active = 1;
sched_clock_idle_sleep_event();
}
/**
* get_cpu_idle_time_us - get the total idle time of a CPU
* @cpu: CPU number to query
* @last_update_time: variable to store update time in. Do not update
* counters if NULL.
*
* Return the cumulative idle time (since boot) for a given
* CPU, in microseconds.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
* This function returns -1 if NOHZ is not enabled.
*/
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t now, idle;
if (!tick_nohz_active)
return -1;
now = ktime_get();
if (last_update_time) {
update_ts_time_stats(cpu, ts, now, last_update_time);
idle = ts->idle_sleeptime;
} else {
if (ts->idle_active && !nr_iowait_cpu(cpu)) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
idle = ktime_add(ts->idle_sleeptime, delta);
} else {
idle = ts->idle_sleeptime;
}
}
return ktime_to_us(idle);
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
/**
* get_cpu_iowait_time_us - get the total iowait time of a CPU
* @cpu: CPU number to query
* @last_update_time: variable to store update time in. Do not update
* counters if NULL.
*
* Return the cumulative iowait time (since boot) for a given
* CPU, in microseconds.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
* This function returns -1 if NOHZ is not enabled.
*/
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t now, iowait;
if (!tick_nohz_active)
return -1;
now = ktime_get();
if (last_update_time) {
update_ts_time_stats(cpu, ts, now, last_update_time);
iowait = ts->iowait_sleeptime;
} else {
if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
iowait = ktime_add(ts->iowait_sleeptime, delta);
} else {
iowait = ts->iowait_sleeptime;
}
}
return ktime_to_us(iowait);
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
{
hrtimer_cancel(&ts->sched_timer);
hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
/* Forward the time to expire in the future */
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS_PINNED_HARD);
} else {
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
/*
* Reset to make sure next tick stop doesn't get fooled by past
* cached clock deadline.
*/
ts->next_tick = 0;
}
static inline bool local_timer_softirq_pending(void)
{
return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
}
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
unsigned int seq;
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqcount_begin(&jiffies_seq);
basemono = last_jiffies_update;
basejiff = jiffies;
} while (read_seqcount_retry(&jiffies_seq, seq));
ts->last_jiffies = basejiff;
ts->timer_expires_base = basemono;
/*
* Keep the periodic tick, when RCU, architecture or irq_work
* requests it.
* Aside of that check whether the local timer softirq is
* pending. If so its a bad idea to call get_next_timer_interrupt()
* because there is an already expired timer, so it will request
* immediate expiry, which rearms the hardware timer with a
* minimal delta which brings us back to this place
* immediately. Lather, rinse and repeat...
*/
if (rcu_needs_cpu() || arch_needs_cpu() ||
irq_work_needs_cpu() || local_timer_softirq_pending()) {
next_tick = basemono + TICK_NSEC;
} else {
/*
* Get the next pending timer. If high resolution
* timers are enabled this only takes the timer wheel
* timers into account. If high resolution timers are
* disabled this also looks at the next expiring
* hrtimer.
*/
next_tick = get_next_timer_interrupt(basejiff, basemono);
ts->next_timer = next_tick;
}
/*
* If the tick is due in the next period, keep it ticking or
* force prod the timer.
*/
delta = next_tick - basemono;
if (delta <= (u64)TICK_NSEC) {
/*
* Tell the timer code that the base is not idle, i.e. undo
* the effect of get_next_timer_interrupt():
*/
timer_clear_idle();
/*
* We've not stopped the tick yet, and there's a timer in the
* next period, so no point in stopping it either, bail.
*/
if (!ts->tick_stopped) {
ts->timer_expires = 0;
goto out;
}
}
/*
* If this CPU is the one which had the do_timer() duty last, we limit
* the sleep time to the timekeeping max_deferment value.
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
if (cpu != tick_do_timer_cpu &&
(tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
delta = KTIME_MAX;
/* Calculate the next expiry time */
if (delta < (KTIME_MAX - basemono))
expires = basemono + delta;
else
expires = KTIME_MAX;
ts->timer_expires = min_t(u64, expires, next_tick);
out:
return ts->timer_expires;
}
static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
u64 basemono = ts->timer_expires_base;
u64 expires = ts->timer_expires;
ktime_t tick = expires;
/* Make sure we won't be trying to stop it twice in a row. */
ts->timer_expires_base = 0;
/*
* If this CPU is the one which updates jiffies, then give up
* the assignment and let it be taken by the CPU which runs
* the tick timer next, which might be this CPU as well. If we
* don't drop this here the jiffies might be stale and
* do_timer() never invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->do_timer_last = 1;
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
ts->do_timer_last = 0;
}
/* Skip reprogram of event if its not changed */
if (ts->tick_stopped && (expires == ts->next_tick)) {
/* Sanity check: make sure clockevent is actually programmed */
if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
return;
WARN_ON_ONCE(1);
printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
basemono, ts->next_tick, dev->next_event,
hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
}
/*
* nohz_stop_sched_tick can be called several times before
* the nohz_restart_sched_tick is called. This happens when
* interrupts arrive which do not cause a reschedule. In the
* first call we save the current tick time, so we can restart
* the scheduler tick in nohz_restart_sched_tick.
*/
if (!ts->tick_stopped) {
calc_load_nohz_start();
quiet_vmstat();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
trace_tick_stop(1, TICK_DEP_MASK_NONE);
}
ts->next_tick = tick;
/*
* If the expiration time == KTIME_MAX, then we simply stop
* the tick timer.
*/
if (unlikely(expires == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer);
else
tick_program_event(KTIME_MAX, 1);
return;
}
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, tick,
HRTIMER_MODE_ABS_PINNED_HARD);
} else {
hrtimer_set_expires(&ts->sched_timer, tick);
tick_program_event(tick, 1);
}
}
static void tick_nohz_retain_tick(struct tick_sched *ts)
{
ts->timer_expires_base = 0;
}
#ifdef CONFIG_NO_HZ_FULL
static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
{
if (tick_nohz_next_event(ts, cpu))
tick_nohz_stop_tick(ts, cpu);
else
tick_nohz_retain_tick(ts);
}
#endif /* CONFIG_NO_HZ_FULL */
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
/*
* Clear the timer idle flag, so we avoid IPIs on remote queueing and
* the clock forward checks in the enqueue path:
*/
timer_clear_idle();
calc_load_nohz_stop();
touch_softlockup_watchdog_sched();
/*
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
tick_nohz_restart(ts, now);
}
static void __tick_nohz_full_update_tick(struct tick_sched *ts,
ktime_t now)
{
#ifdef CONFIG_NO_HZ_FULL
int cpu = smp_processor_id();
if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, now);
#endif
}
static void tick_nohz_full_update_tick(struct tick_sched *ts)
{
if (!tick_nohz_full_cpu(smp_processor_id()))
return;
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;
__tick_nohz_full_update_tick(ts, ktime_get());
}
/*
* A pending softirq outside an IRQ (or softirq disabled section) context
* should be waiting for ksoftirqd to handle it. Therefore we shouldn't
* reach here due to the need_resched() early check in can_stop_idle_tick().
*
* However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the
* cpu_down() process, softirqs can still be raised while ksoftirqd is parked,
* triggering the below since wakep_softirqd() is ignored.
*
*/
static bool report_idle_softirq(void)
{
static int ratelimit;
unsigned int pending = local_softirq_pending();
if (likely(!pending))
return false;
/* Some softirqs claim to be safe against hotplug and ksoftirqd parking */
if (!cpu_active(smp_processor_id())) {
pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK;
if (!pending)
return false;
}
if (ratelimit >= 10)
return false;
/* On RT, softirqs handling may be waiting on some lock */
if (local_bh_blocked())
return false;
pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
pending);
ratelimit++;
return true;
}
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
/*
* If this CPU is offline and it is the one which updates
* jiffies, then give up the assignment and let it be taken by
* the CPU which runs the tick timer next. If we don't drop
* this here the jiffies might be stale and do_timer() never
* invoked.
*/
if (unlikely(!cpu_online(cpu))) {
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
/*
* Make sure the CPU doesn't get fooled by obsolete tick
* deadline if it comes back online later.
*/
ts->next_tick = 0;
return false;
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
return false;
if (need_resched())
return false;
if (unlikely(report_idle_softirq()))
return false;
if (tick_nohz_full_enabled()) {
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
if (tick_do_timer_cpu == cpu)
return false;
/* Should not happen for nohz-full */
if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
return false;
}
return true;
}
static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
{
ktime_t expires;
int cpu = smp_processor_id();
/*
* If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
* tick timer expiration time is known already.
*/
if (ts->timer_expires_base)
expires = ts->timer_expires;
else if (can_stop_idle_tick(cpu, ts))
expires = tick_nohz_next_event(ts, cpu);
else
return;
ts->idle_calls++;
if (expires > 0LL) {
int was_stopped = ts->tick_stopped;
tick_nohz_stop_tick(ts, cpu);
ts->idle_sleeps++;
ts->idle_expires = expires;
if (!was_stopped && ts->tick_stopped) {
ts->idle_jiffies = ts->last_jiffies;
nohz_balance_enter_idle(cpu);
}
} else {
tick_nohz_retain_tick(ts);
}
}
/**
* tick_nohz_idle_stop_tick - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
*/
void tick_nohz_idle_stop_tick(void)
{
__tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
}
void tick_nohz_idle_retain_tick(void)
{
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
/*
* Undo the effect of get_next_timer_interrupt() called from
* tick_nohz_next_event().
*/
timer_clear_idle();
}
/**
* tick_nohz_idle_enter - prepare for entering idle on the current CPU
*
* Called when we start the idle loop.
*/
void tick_nohz_idle_enter(void)
{
struct tick_sched *ts;
lockdep_assert_irqs_enabled();
local_irq_disable();
ts = this_cpu_ptr(&tick_cpu_sched);
WARN_ON_ONCE(ts->timer_expires_base);
ts->inidle = 1;
tick_nohz_start_idle(ts);
local_irq_enable();
}
/**
* tick_nohz_irq_exit - update next tick event from interrupt exit
*
* When an interrupt fires while we are idle and it doesn't cause
* a reschedule, it may still add, modify or delete a timer, enqueue
* an RCU callback, etc...
* So we need to re-calculate and reprogram the next tick event.
*/
void tick_nohz_irq_exit(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->inidle)
tick_nohz_start_idle(ts);
else
tick_nohz_full_update_tick(ts);
}
/**
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
*/
bool tick_nohz_idle_got_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->got_idle_tick) {
ts->got_idle_tick = 0;
return true;
}
return false;
}
/**
* tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
* or the tick, whatever that expires first. Note that, if the tick has been
* stopped, it returns the next hrtimer.
*
* Called from power state control code with interrupts disabled
*/
ktime_t tick_nohz_get_next_hrtimer(void)
{
return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
}
/**
* tick_nohz_get_sleep_length - return the expected length of the current sleep
* @delta_next: duration until the next event if the tick cannot be stopped
*
* Called from power state control code with interrupts disabled.
*
* The return value of this function and/or the value returned by it through the
* @delta_next pointer can be negative which must be taken into account by its
* callers.
*/
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
int cpu = smp_processor_id();
/*
* The idle entry time is expected to be a sufficient approximation of
* the current time at this point.
*/
ktime_t now = ts->idle_entrytime;
ktime_t next_event;
WARN_ON_ONCE(!ts->inidle);
*delta_next = ktime_sub(dev->next_event, now);
if (!can_stop_idle_tick(cpu, ts))
return *delta_next;
next_event = tick_nohz_next_event(ts, cpu);
if (!next_event)
return *delta_next;
/*
* If the next highres timer to expire is earlier than next_event, the
* idle governor needs to know that.
*/
next_event = min_t(u64, next_event,
hrtimer_next_event_without(&ts->sched_timer));
return ktime_sub(next_event, now);
}
EXPORT_SYMBOL_GPL(tick_nohz_get_sleep_length);
/**
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
* for a particular CPU.
*
* Called from the schedutil frequency scaling governor in scheduler context.
*/
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
{
struct tick_sched *ts = tick_get_tick_sched(cpu);
return ts->idle_calls;
}
EXPORT_SYMBOL_GPL(tick_nohz_get_idle_calls_cpu);
/**
* tick_nohz_get_idle_calls - return the current idle calls counter value
*
* Called from the schedutil frequency scaling governor in scheduler context.
*/
unsigned long tick_nohz_get_idle_calls(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
return ts->idle_calls;
}
static void tick_nohz_account_idle_time(struct tick_sched *ts,
ktime_t now)
{
unsigned long ticks;
ts->idle_exittime = now;
if (vtime_accounting_enabled_this_cpu())
return;
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
* accounting. Enforce that this is accounted to idle !
*/
ticks = jiffies - ts->idle_jiffies;
/*
* We might be one off. Do not randomly account a huge number of ticks!
*/
if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks);
}
void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->tick_stopped) {
ktime_t now = ktime_get();
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_time(ts, now);
}
}
static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
{
if (tick_nohz_full_cpu(smp_processor_id()))
__tick_nohz_full_update_tick(ts, now);
else
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_time(ts, now);
}
/**
* tick_nohz_idle_exit - restart the idle tick from the idle task
*
* Restart the idle tick when the CPU is woken up from idle
* This also exit the RCU extended quiescent state. The CPU
* can use RCU again after this function is called.
*/
void tick_nohz_idle_exit(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
bool idle_active, tick_stopped;
ktime_t now;
local_irq_disable();
WARN_ON_ONCE(!ts->inidle);
WARN_ON_ONCE(ts->timer_expires_base);
ts->inidle = 0;
idle_active = ts->idle_active;
tick_stopped = ts->tick_stopped;
if (idle_active || tick_stopped)
now = ktime_get();
if (idle_active)
tick_nohz_stop_idle(ts, now);
if (tick_stopped)
tick_nohz_idle_update_tick(ts, now);
local_irq_enable();
}
/*
* The nohz low res interrupt handler
*/
static void tick_nohz_handler(struct clock_event_device *dev)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
dev->next_event = KTIME_MAX;
tick_sched_do_timer(ts, now);
tick_sched_handle(ts, regs);
if (unlikely(ts->tick_stopped)) {
/*
* The clockevent device is not reprogrammed, so change the
* clock event device to ONESHOT_STOPPED to avoid spurious
* interrupts on devices which might not be truly one shot.
*/
tick_program_event(KTIME_MAX, 1);
return;
}
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
{
if (!tick_nohz_enabled)
return;
ts->nohz_mode = mode;
/* One update is enough */
if (!test_and_set_bit(0, &tick_nohz_active))
timers_update_nohz();
}
/**
* tick_nohz_switch_to_nohz - switch to nohz mode
*/
static void tick_nohz_switch_to_nohz(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t next;
if (!tick_nohz_enabled)
return;
if (tick_switch_to_oneshot(tick_nohz_handler))
return;
/*
* Recycle the hrtimer in ts, so we can share the
* hrtimer_forward with the highres code.
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
/* Get the next period */
next = tick_init_jiffy_update();
hrtimer_set_expires(&ts->sched_timer, next);
hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
}
static inline void tick_nohz_irq_enter(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t now;
if (!ts->idle_active && !ts->tick_stopped)
return;
now = ktime_get();
if (ts->idle_active)
tick_nohz_stop_idle(ts, now);
/*
* If all CPUs are idle. We may need to update a stale jiffies value.
* Note nohz_full is a special case: a timekeeper is guaranteed to stay
* alive but it might be busy looping with interrupts disabled in some
* rare case (typically stop machine). So we must make sure we have a
* last resort.
*/
if (ts->tick_stopped)
tick_nohz_update_jiffies(now);
}
#else
static inline void tick_nohz_switch_to_nohz(void) { }
static inline void tick_nohz_irq_enter(void) { }
static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
#endif /* CONFIG_NO_HZ_COMMON */
/*
* Called from irq_enter to notify about the possible interruption of idle()
*/
void tick_irq_enter(void)
{
tick_check_oneshot_broadcast_this_cpu();
tick_nohz_irq_enter();
}
/*
* High resolution timer specific code
*/
#ifdef CONFIG_HIGH_RES_TIMERS
/*
* We rearm the timer until we get disabled by the idle code.
* Called with interrupts disabled.
*/
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
{
struct tick_sched *ts =
container_of(timer, struct tick_sched, sched_timer);
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
tick_sched_do_timer(ts, now);
/*
* Do not call, when we are not in irq context and have
* no valid regs pointer
*/
if (regs)
tick_sched_handle(ts, regs);
else
ts->next_tick = 0;
/* No need to reprogram if we are in idle or full dynticks mode */
if (unlikely(ts->tick_stopped))
return HRTIMER_NORESTART;
hrtimer_forward(timer, now, TICK_NSEC);
return HRTIMER_RESTART;
}
static int sched_skew_tick;
static int __init skew_tick(char *str)
{
get_option(&str, &sched_skew_tick);
return 0;
}
early_param("skew_tick", skew_tick);
/**
* tick_setup_sched_timer - setup the tick emulation timer
*/
void tick_setup_sched_timer(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t now = ktime_get();
/*
* Emulate tick processing via per-CPU hrtimers:
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
ts->sched_timer.function = tick_sched_timer;
/* Get the next period (per-CPU) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
/* Offset the tick to avert jiffies_lock contention. */
if (sched_skew_tick) {
u64 offset = TICK_NSEC >> 1;
do_div(offset, num_possible_cpus());
offset *= smp_processor_id();
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
}
#endif /* HIGH_RES_TIMERS */
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
# ifdef CONFIG_HIGH_RES_TIMERS
if (ts->sched_timer.base)
hrtimer_cancel(&ts->sched_timer);
# endif
memset(ts, 0, sizeof(*ts));
}
#endif
/*
* Async notification about clocksource changes
*/
void tick_clock_notify(void)
{
int cpu;
for_each_possible_cpu(cpu)
set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
}
/*
* Async notification about clock event changes
*/
void tick_oneshot_notify(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
set_bit(0, &ts->check_clocks);
}
/*
* Check, if a change happened, which makes oneshot possible.
*
* Called cyclic from the hrtimer softirq (driven by the timer
* softirq) allow_nohz signals, that we can switch into low-res nohz
* mode, because high resolution timers are disabled (either compile
* or runtime). Called with interrupts disabled.
*/
int tick_check_oneshot_change(int allow_nohz)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (!test_and_clear_bit(0, &ts->check_clocks))
return 0;
if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
return 0;
if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
return 0;
if (!allow_nohz)
return 1;
tick_nohz_switch_to_nohz();
return 0;
}