android_kernel_xiaomi_sm8450/kernel/watchdog.c
Greg Kroah-Hartman 76002c201f Merge 5.10.38 into android12-5.10
Changes in 5.10.38
	KEYS: trusted: Fix memory leak on object td
	tpm: fix error return code in tpm2_get_cc_attrs_tbl()
	tpm, tpm_tis: Extend locality handling to TPM2 in tpm_tis_gen_interrupt()
	tpm, tpm_tis: Reserve locality in tpm_tis_resume()
	KVM: x86/mmu: Remove the defunct update_pte() paging hook
	KVM/VMX: Invoke NMI non-IST entry instead of IST entry
	ACPI: PM: Add ACPI ID of Alder Lake Fan
	PM: runtime: Fix unpaired parent child_count for force_resume
	cpufreq: intel_pstate: Use HWP if enabled by platform firmware
	kvm: Cap halt polling at kvm->max_halt_poll_ns
	ath11k: fix thermal temperature read
	fs: dlm: fix debugfs dump
	fs: dlm: add errno handling to check callback
	fs: dlm: check on minimum msglen size
	fs: dlm: flush swork on shutdown
	tipc: convert dest node's address to network order
	ASoC: Intel: bytcr_rt5640: Enable jack-detect support on Asus T100TAF
	net/mlx5e: Use net_prefetchw instead of prefetchw in MPWQE TX datapath
	net: stmmac: Set FIFO sizes for ipq806x
	ASoC: rsnd: core: Check convert rate in rsnd_hw_params
	Bluetooth: Fix incorrect status handling in LE PHY UPDATE event
	i2c: bail out early when RDWR parameters are wrong
	ALSA: hdsp: don't disable if not enabled
	ALSA: hdspm: don't disable if not enabled
	ALSA: rme9652: don't disable if not enabled
	ALSA: bebob: enable to deliver MIDI messages for multiple ports
	Bluetooth: Set CONF_NOT_COMPLETE as l2cap_chan default
	Bluetooth: initialize skb_queue_head at l2cap_chan_create()
	net/sched: cls_flower: use ntohs for struct flow_dissector_key_ports
	net: bridge: when suppression is enabled exclude RARP packets
	Bluetooth: check for zapped sk before connecting
	selftests/powerpc: Fix L1D flushing tests for Power10
	powerpc/32: Statically initialise first emergency context
	net: hns3: remediate a potential overflow risk of bd_num_list
	net: hns3: add handling for xmit skb with recursive fraglist
	ip6_vti: proper dev_{hold|put} in ndo_[un]init methods
	ASoC: Intel: bytcr_rt5640: Add quirk for the Chuwi Hi8 tablet
	ice: handle increasing Tx or Rx ring sizes
	Bluetooth: btusb: Enable quirk boolean flag for Mediatek Chip.
	ASoC: rt5670: Add a quirk for the Dell Venue 10 Pro 5055
	i2c: Add I2C_AQ_NO_REP_START adapter quirk
	MIPS: Loongson64: Use _CACHE_UNCACHED instead of _CACHE_UNCACHED_ACCELERATED
	coresight: Do not scan for graph if none is present
	IB/hfi1: Correct oversized ring allocation
	mac80211: clear the beacon's CRC after channel switch
	pinctrl: samsung: use 'int' for register masks in Exynos
	rtw88: 8822c: add LC calibration for RTL8822C
	mt76: mt7615: support loading EEPROM for MT7613BE
	mt76: mt76x0: disable GTK offloading
	mt76: mt7915: fix txpower init for TSSI off chips
	fuse: invalidate attrs when page writeback completes
	virtiofs: fix userns
	cuse: prevent clone
	iwlwifi: pcie: make cfg vs. trans_cfg more robust
	powerpc/mm: Add cond_resched() while removing hpte mappings
	ASoC: rsnd: call rsnd_ssi_master_clk_start() from rsnd_ssi_init()
	Revert "iommu/amd: Fix performance counter initialization"
	iommu/amd: Remove performance counter pre-initialization test
	drm/amd/display: Force vsync flip when reconfiguring MPCC
	selftests: Set CC to clang in lib.mk if LLVM is set
	kconfig: nconf: stop endless search loops
	ALSA: hda/realtek: Add quirk for Lenovo Ideapad S740
	ASoC: Intel: sof_sdw: add quirk for new ADL-P Rvp
	ALSA: hda/hdmi: fix race in handling acomp ELD notification at resume
	sctp: Fix out-of-bounds warning in sctp_process_asconf_param()
	flow_dissector: Fix out-of-bounds warning in __skb_flow_bpf_to_target()
	powerpc/smp: Set numa node before updating mask
	ASoC: rt286: Generalize support for ALC3263 codec
	ethtool: ioctl: Fix out-of-bounds warning in store_link_ksettings_for_user()
	net: sched: tapr: prevent cycle_time == 0 in parse_taprio_schedule
	samples/bpf: Fix broken tracex1 due to kprobe argument change
	powerpc/pseries: Stop calling printk in rtas_stop_self()
	drm/amd/display: fixed divide by zero kernel crash during dsc enablement
	drm/amd/display: add handling for hdcp2 rx id list validation
	drm/amdgpu: Add mem sync flag for IB allocated by SA
	mt76: mt7615: fix entering driver-own state on mt7663
	crypto: ccp: Free SEV device if SEV init fails
	wl3501_cs: Fix out-of-bounds warnings in wl3501_send_pkt
	wl3501_cs: Fix out-of-bounds warnings in wl3501_mgmt_join
	qtnfmac: Fix possible buffer overflow in qtnf_event_handle_external_auth
	powerpc/iommu: Annotate nested lock for lockdep
	iavf: remove duplicate free resources calls
	net: ethernet: mtk_eth_soc: fix RX VLAN offload
	selftests: mlxsw: Increase the tolerance of backlog buildup
	selftests: mlxsw: Fix mausezahn invocation in ERSPAN scale test
	kbuild: generate Module.symvers only when vmlinux exists
	bnxt_en: Add PCI IDs for Hyper-V VF devices.
	ia64: module: fix symbolizer crash on fdescr
	watchdog: rename __touch_watchdog() to a better descriptive name
	watchdog: explicitly update timestamp when reporting softlockup
	watchdog/softlockup: remove logic that tried to prevent repeated reports
	watchdog: fix barriers when printing backtraces from all CPUs
	ASoC: rt286: Make RT286_SET_GPIO_* readable and writable
	thermal: thermal_of: Fix error return code of thermal_of_populate_bind_params()
	f2fs: move ioctl interface definitions to separated file
	f2fs: fix compat F2FS_IOC_{MOVE,GARBAGE_COLLECT}_RANGE
	f2fs: fix to allow migrating fully valid segment
	f2fs: fix panic during f2fs_resize_fs()
	f2fs: fix a redundant call to f2fs_balance_fs if an error occurs
	remoteproc: qcom_q6v5_mss: Replace ioremap with memremap
	remoteproc: qcom_q6v5_mss: Validate p_filesz in ELF loader
	PCI: iproc: Fix return value of iproc_msi_irq_domain_alloc()
	PCI: Release OF node in pci_scan_device()'s error path
	ARM: 9064/1: hw_breakpoint: Do not directly check the event's overflow_handler hook
	f2fs: fix to align to section for fallocate() on pinned file
	f2fs: fix to update last i_size if fallocate partially succeeds
	PCI: endpoint: Make *_get_first_free_bar() take into account 64 bit BAR
	PCI: endpoint: Add helper API to get the 'next' unreserved BAR
	PCI: endpoint: Make *_free_bar() to return error codes on failure
	PCI: endpoint: Fix NULL pointer dereference for ->get_features()
	f2fs: fix to avoid touching checkpointed data in get_victim()
	f2fs: fix to cover __allocate_new_section() with curseg_lock
	f2fs: Fix a hungtask problem in atomic write
	f2fs: fix to avoid accessing invalid fio in f2fs_allocate_data_block()
	rpmsg: qcom_glink_native: fix error return code of qcom_glink_rx_data()
	NFS: nfs4_bitmask_adjust() must not change the server global bitmasks
	NFS: Fix attribute bitmask in _nfs42_proc_fallocate()
	NFSv4.2: Always flush out writes in nfs42_proc_fallocate()
	NFS: Deal correctly with attribute generation counter overflow
	PCI: endpoint: Fix missing destroy_workqueue()
	pNFS/flexfiles: fix incorrect size check in decode_nfs_fh()
	NFSv4.2 fix handling of sr_eof in SEEK's reply
	SUNRPC: Move fault injection call sites
	SUNRPC: Remove trace_xprt_transmit_queued
	SUNRPC: Handle major timeout in xprt_adjust_timeout()
	thermal/drivers/tsens: Fix missing put_device error
	NFSv4.x: Don't return NFS4ERR_NOMATCHING_LAYOUT if we're unmounting
	nfsd: ensure new clients break delegations
	rtc: fsl-ftm-alarm: add MODULE_TABLE()
	dmaengine: idxd: Fix potential null dereference on pointer status
	dmaengine: idxd: fix dma device lifetime
	dmaengine: idxd: fix cdev setup and free device lifetime issues
	SUNRPC: fix ternary sign expansion bug in tracing
	pwm: atmel: Fix duty cycle calculation in .get_state()
	xprtrdma: Avoid Receive Queue wrapping
	xprtrdma: Fix cwnd update ordering
	xprtrdma: rpcrdma_mr_pop() already does list_del_init()
	swiotlb: Fix the type of index
	ceph: fix inode leak on getattr error in __fh_to_dentry
	scsi: qla2xxx: Prevent PRLI in target mode
	scsi: ufs: core: Do not put UFS power into LPM if link is broken
	scsi: ufs: core: Cancel rpm_dev_flush_recheck_work during system suspend
	scsi: ufs: core: Narrow down fast path in system suspend path
	rtc: ds1307: Fix wday settings for rx8130
	net: hns3: fix incorrect configuration for igu_egu_hw_err
	net: hns3: initialize the message content in hclge_get_link_mode()
	net: hns3: add check for HNS3_NIC_STATE_INITED in hns3_reset_notify_up_enet()
	net: hns3: fix for vxlan gpe tx checksum bug
	net: hns3: use netif_tx_disable to stop the transmit queue
	net: hns3: disable phy loopback setting in hclge_mac_start_phy
	sctp: do asoc update earlier in sctp_sf_do_dupcook_a
	RISC-V: Fix error code returned by riscv_hartid_to_cpuid()
	sunrpc: Fix misplaced barrier in call_decode
	libbpf: Fix signed overflow in ringbuf_process_ring
	block/rnbd-clt: Change queue_depth type in rnbd_clt_session to size_t
	block/rnbd-clt: Check the return value of the function rtrs_clt_query
	ethernet:enic: Fix a use after free bug in enic_hard_start_xmit
	sctp: fix a SCTP_MIB_CURRESTAB leak in sctp_sf_do_dupcook_b
	netfilter: xt_SECMARK: add new revision to fix structure layout
	xsk: Fix for xp_aligned_validate_desc() when len == chunk_size
	net: stmmac: Clear receive all(RA) bit when promiscuous mode is off
	drm/radeon: Fix off-by-one power_state index heap overwrite
	drm/radeon: Avoid power table parsing memory leaks
	arm64: entry: factor irq triage logic into macros
	arm64: entry: always set GIC_PRIO_PSR_I_SET during entry
	khugepaged: fix wrong result value for trace_mm_collapse_huge_page_isolate()
	mm/hugeltb: handle the error case in hugetlb_fix_reserve_counts()
	mm/migrate.c: fix potential indeterminate pte entry in migrate_vma_insert_page()
	ksm: fix potential missing rmap_item for stable_node
	mm/gup: check every subpage of a compound page during isolation
	mm/gup: return an error on migration failure
	mm/gup: check for isolation errors
	ethtool: fix missing NLM_F_MULTI flag when dumping
	net: fix nla_strcmp to handle more then one trailing null character
	smc: disallow TCP_ULP in smc_setsockopt()
	netfilter: nfnetlink_osf: Fix a missing skb_header_pointer() NULL check
	netfilter: nftables: Fix a memleak from userdata error path in new objects
	can: mcp251xfd: mcp251xfd_probe(): add missing can_rx_offload_del() in error path
	can: mcp251x: fix resume from sleep before interface was brought up
	can: m_can: m_can_tx_work_queue(): fix tx_skb race condition
	sched: Fix out-of-bound access in uclamp
	sched/fair: Fix unfairness caused by missing load decay
	fs/proc/generic.c: fix incorrect pde_is_permanent check
	kernel: kexec_file: fix error return code of kexec_calculate_store_digests()
	kernel/resource: make walk_system_ram_res() find all busy IORESOURCE_SYSTEM_RAM resources
	kernel/resource: make walk_mem_res() find all busy IORESOURCE_MEM resources
	netfilter: nftables: avoid overflows in nft_hash_buckets()
	i40e: fix broken XDP support
	i40e: Fix use-after-free in i40e_client_subtask()
	i40e: fix the restart auto-negotiation after FEC modified
	i40e: Fix PHY type identifiers for 2.5G and 5G adapters
	mptcp: fix splat when closing unaccepted socket
	f2fs: avoid unneeded data copy in f2fs_ioc_move_range()
	ARC: entry: fix off-by-one error in syscall number validation
	ARC: mm: PAE: use 40-bit physical page mask
	ARC: mm: Use max_high_pfn as a HIGHMEM zone border
	powerpc/64s: Fix crashes when toggling stf barrier
	powerpc/64s: Fix crashes when toggling entry flush barrier
	hfsplus: prevent corruption in shrinking truncate
	squashfs: fix divide error in calculate_skip()
	userfaultfd: release page in error path to avoid BUG_ON
	kasan: fix unit tests with CONFIG_UBSAN_LOCAL_BOUNDS enabled
	mm/hugetlb: fix F_SEAL_FUTURE_WRITE
	blk-iocost: fix weight updates of inner active iocgs
	arm64: mte: initialize RGSR_EL1.SEED in __cpu_setup
	arm64: Fix race condition on PG_dcache_clean in __sync_icache_dcache()
	btrfs: fix race leading to unpersisted data and metadata on fsync
	drm/radeon/dpm: Disable sclk switching on Oland when two 4K 60Hz monitors are connected
	drm/amd/display: Initialize attribute for hdcp_srm sysfs file
	drm/i915: Avoid div-by-zero on gen2
	kvm: exit halt polling on need_resched() as well
	KVM: LAPIC: Accurately guarantee busy wait for timer to expire when using hv_timer
	drm/msm/dp: initialize audio_comp when audio starts
	KVM: x86: Cancel pvclock_gtod_work on module removal
	KVM: x86: Prevent deadlock against tk_core.seq
	dax: Add an enum for specifying dax wakup mode
	dax: Add a wakeup mode parameter to put_unlocked_entry()
	dax: Wake up all waiters after invalidating dax entry
	xen/unpopulated-alloc: consolidate pgmap manipulation
	xen/unpopulated-alloc: fix error return code in fill_list()
	perf tools: Fix dynamic libbpf link
	usb: dwc3: gadget: Free gadget structure only after freeing endpoints
	iio: light: gp2ap002: Fix rumtime PM imbalance on error
	iio: proximity: pulsedlight: Fix rumtime PM imbalance on error
	iio: hid-sensors: select IIO_TRIGGERED_BUFFER under HID_SENSOR_IIO_TRIGGER
	usb: fotg210-hcd: Fix an error message
	hwmon: (occ) Fix poll rate limiting
	usb: musb: Fix an error message
	ACPI: scan: Fix a memory leak in an error handling path
	kyber: fix out of bounds access when preempted
	nvmet: add lba to sect conversion helpers
	nvmet: fix inline bio check for bdev-ns
	nvmet-rdma: Fix NULL deref when SEND is completed with error
	f2fs: compress: fix to free compress page correctly
	f2fs: compress: fix race condition of overwrite vs truncate
	f2fs: compress: fix to assign cc.cluster_idx correctly
	nbd: Fix NULL pointer in flush_workqueue
	blk-mq: plug request for shared sbitmap
	blk-mq: Swap two calls in blk_mq_exit_queue()
	usb: dwc3: omap: improve extcon initialization
	usb: dwc3: pci: Enable usb2-gadget-lpm-disable for Intel Merrifield
	usb: xhci: Increase timeout for HC halt
	usb: dwc2: Fix gadget DMA unmap direction
	usb: core: hub: fix race condition about TRSMRCY of resume
	usb: dwc3: gadget: Enable suspend events
	usb: dwc3: gadget: Return success always for kick transfer in ep queue
	usb: typec: ucsi: Retrieve all the PDOs instead of just the first 4
	usb: typec: ucsi: Put fwnode in any case during ->probe()
	xhci-pci: Allow host runtime PM as default for Intel Alder Lake xHCI
	xhci: Do not use GFP_KERNEL in (potentially) atomic context
	xhci: Add reset resume quirk for AMD xhci controller.
	iio: gyro: mpu3050: Fix reported temperature value
	iio: tsl2583: Fix division by a zero lux_val
	cdc-wdm: untangle a circular dependency between callback and softint
	xen/gntdev: fix gntdev_mmap() error exit path
	KVM: x86: Emulate RDPID only if RDTSCP is supported
	KVM: x86: Move RDPID emulation intercept to its own enum
	KVM: nVMX: Always make an attempt to map eVMCS after migration
	KVM: VMX: Do not advertise RDPID if ENABLE_RDTSCP control is unsupported
	KVM: VMX: Disable preemption when probing user return MSRs
	Revert "iommu/vt-d: Remove WO permissions on second-level paging entries"
	Revert "iommu/vt-d: Preset Access/Dirty bits for IOVA over FL"
	iommu/vt-d: Preset Access/Dirty bits for IOVA over FL
	iommu/vt-d: Remove WO permissions on second-level paging entries
	mm: fix struct page layout on 32-bit systems
	MIPS: Reinstate platform `__div64_32' handler
	MIPS: Avoid DIVU in `__div64_32' is result would be zero
	MIPS: Avoid handcoded DIVU in `__div64_32' altogether
	clocksource/drivers/timer-ti-dm: Prepare to handle dra7 timer wrap issue
	clocksource/drivers/timer-ti-dm: Handle dra7 timer wrap errata i940
	ARM: 9011/1: centralize phys-to-virt conversion of DT/ATAGS address
	ARM: 9012/1: move device tree mapping out of linear region
	ARM: 9020/1: mm: use correct section size macro to describe the FDT virtual address
	ARM: 9027/1: head.S: explicitly map DT even if it lives in the first physical section
	usb: typec: tcpm: Fix error while calculating PPS out values
	kobject_uevent: remove warning in init_uevent_argv()
	drm/i915/gt: Fix a double free in gen8_preallocate_top_level_pdp
	drm/i915: Read C0DRB3/C1DRB3 as 16 bits again
	drm/i915/overlay: Fix active retire callback alignment
	drm/i915: Fix crash in auto_retire
	clk: exynos7: Mark aclk_fsys1_200 as critical
	media: rkvdec: Remove of_match_ptr()
	i2c: mediatek: Fix send master code at more than 1MHz
	dt-bindings: media: renesas,vin: Make resets optional on R-Car Gen1
	dt-bindings: serial: 8250: Remove duplicated compatible strings
	debugfs: Make debugfs_allow RO after init
	ext4: fix debug format string warning
	nvme: do not try to reconfigure APST when the controller is not live
	ASoC: rsnd: check all BUSIF status when error
	Linux 5.10.38

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ia32e01283b488a38be48015c58a0e481f09aaf65
2021-05-20 15:35:25 +02:00

745 lines
20 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Detect hard and soft lockups on a system
*
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
*
* Note: Most of this code is borrowed heavily from the original softlockup
* detector, so thanks to Ingo for the initial implementation.
* Some chunks also taken from the old x86-specific nmi watchdog code, thanks
* to those contributors as well.
*/
#define pr_fmt(fmt) "watchdog: " fmt
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/tick.h>
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
#include <linux/sched/isolation.h>
#include <linux/stop_machine.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
#include <trace/hooks/softlockup.h>
static DEFINE_MUTEX(watchdog_mutex);
#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
# define NMI_WATCHDOG_DEFAULT 1
#else
# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
# define NMI_WATCHDOG_DEFAULT 0
#endif
unsigned long __read_mostly watchdog_enabled;
int __read_mostly watchdog_user_enabled = 1;
int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
int __read_mostly soft_watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
static int __read_mostly nmi_watchdog_available;
struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
# ifdef CONFIG_SMP
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
# endif /* CONFIG_SMP */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
* cases this function can be called to disable hard lockup detection. This
* function should only be executed once by the boot processor before the
* kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup().
*/
void __init hardlockup_detector_disable(void)
{
nmi_watchdog_user_enabled = 0;
}
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
nmi_watchdog_user_enabled = 0;
else if (!strncmp(str, "1", 1))
nmi_watchdog_user_enabled = 1;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
/*
* These functions can be overridden if an architecture implements its
* own hardlockup detector.
*
* watchdog_nmi_enable/disable can be implemented to start and stop when
* softlockup watchdog threads start and stop. The arch must select the
* SOFTLOCKUP_DETECTOR Kconfig.
*/
int __weak watchdog_nmi_enable(unsigned int cpu)
{
hardlockup_detector_perf_enable();
return 0;
}
void __weak watchdog_nmi_disable(unsigned int cpu)
{
hardlockup_detector_perf_disable();
}
/* Return 0, if a NMI watchdog is available. Error code otherwise */
int __weak __init watchdog_nmi_probe(void)
{
return hardlockup_detector_perf_init();
}
/**
* watchdog_nmi_stop - Stop the watchdog for reconfiguration
*
* The reconfiguration steps are:
* watchdog_nmi_stop();
* update_variables();
* watchdog_nmi_start();
*/
void __weak watchdog_nmi_stop(void) { }
/**
* watchdog_nmi_start - Start the watchdog after reconfiguration
*
* Counterpart to watchdog_nmi_stop().
*
* The following variables have been updated in update_variables() and
* contain the currently valid configuration:
* - watchdog_enabled
* - watchdog_thresh
* - watchdog_cpumask
*/
void __weak watchdog_nmi_start(void) { }
/**
* lockup_detector_update_enable - Update the sysctl enable bit
*
* Caller needs to make sure that the NMI/perf watchdogs are off, so this
* can't race with watchdog_nmi_disable().
*/
static void lockup_detector_update_enable(void)
{
watchdog_enabled = 0;
if (!watchdog_user_enabled)
return;
if (nmi_watchdog_available && nmi_watchdog_user_enabled)
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
if (soft_watchdog_user_enabled)
watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
}
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
#define SOFTLOCKUP_RESET ULONG_MAX
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#endif
static struct cpumask watchdog_allowed_mask __read_mostly;
/* Global variables, exported for sysctl */
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
static bool softlockup_initialized __read_mostly;
static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static unsigned long soft_lockup_nmi_warn;
static int __init nowatchdog_setup(char *str)
{
watchdog_user_enabled = 0;
return 1;
}
__setup("nowatchdog", nowatchdog_setup);
static int __init nosoftlockup_setup(char *str)
{
soft_watchdog_user_enabled = 0;
return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
static int __init watchdog_thresh_setup(char *str)
{
get_option(&str, &watchdog_thresh);
return 1;
}
__setup("watchdog_thresh=", watchdog_thresh_setup);
static void __lockup_detector_cleanup(void);
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
* lockups can have false positives under extreme conditions. So we generally
* want a higher threshold for soft lockups than for hard lockups. So we couple
* the thresholds with a factor: we make the soft threshold twice the amount of
* time the hard threshold is.
*/
static int get_softlockup_thresh(void)
{
return watchdog_thresh * 2;
}
/*
* Returns seconds, approximately. We don't need nanosecond
* resolution, and we don't need to waste time with a big divide when
* 2^30ns == 1.074s.
*/
static unsigned long get_timestamp(void)
{
return running_clock() >> 30LL; /* 2^30 ~= 10^9 */
}
static void set_sample_period(void)
{
/*
* convert watchdog_thresh from seconds to ns
* the divide by 5 is to give hrtimer several chances (two
* or three with the current relation between the soft
* and hard thresholds) to increment before the
* hardlockup detector generates a warning
*/
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
watchdog_update_hrtimer_threshold(sample_period);
}
/* Commands for resetting the watchdog */
static void update_touch_ts(void)
{
__this_cpu_write(watchdog_touch_ts, get_timestamp());
}
/**
* touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
*
* Call when the scheduler may have stalled for legitimate reasons
* preventing the watchdog task from executing - e.g. the scheduler
* entering idle state. This should only be used for scheduler events.
* Use touch_softlockup_watchdog() for everything else.
*/
notrace void touch_softlockup_watchdog_sched(void)
{
/*
* Preemption can be enabled. It doesn't matter which CPU's timestamp
* gets zeroed here, so use the raw_ operation.
*/
raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
}
notrace void touch_softlockup_watchdog(void)
{
touch_softlockup_watchdog_sched();
wq_watchdog_touch(raw_smp_processor_id());
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
void touch_all_softlockup_watchdogs(void)
{
int cpu;
/*
* watchdog_mutex cannpt be taken here, as this might be called
* from (soft)interrupt context, so the access to
* watchdog_allowed_cpumask might race with a concurrent update.
*
* The watchdog time stamp can race against a concurrent real
* update as well, the only side effect might be a cycle delay for
* the softlockup check.
*/
for_each_cpu(cpu, &watchdog_allowed_mask)
per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET;
wq_watchdog_touch(-1);
}
void touch_softlockup_watchdog_sync(void)
{
__this_cpu_write(softlockup_touch_sync, true);
__this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
}
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
/* Warn about unreasonable delays. */
if (time_after(now, touch_ts + get_softlockup_thresh()))
return now - touch_ts;
}
return 0;
}
/* watchdog detector functions */
bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
return true;
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
}
static void watchdog_interrupt_count(void)
{
__this_cpu_inc(hrtimer_interrupts);
}
static DEFINE_PER_CPU(struct completion, softlockup_completion);
static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
/*
* The watchdog thread function - touches the timestamp.
*
* It only runs once every sample_period seconds (4 seconds by
* default) to reset the softlockup timestamp. If this gets delayed
* for more than 2*watchdog_thresh seconds then the debug-printout
* triggers in watchdog_timer_fn().
*/
static int softlockup_fn(void *data)
{
update_touch_ts();
complete(this_cpu_ptr(&softlockup_completion));
return 0;
}
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
if (!watchdog_enabled)
return HRTIMER_NORESTART;
/* kick the hardlockup detector */
watchdog_interrupt_count();
/* kick the softlockup detector */
if (completion_done(this_cpu_ptr(&softlockup_completion))) {
reinit_completion(this_cpu_ptr(&softlockup_completion));
stop_one_cpu_nowait(smp_processor_id(),
softlockup_fn, NULL,
this_cpu_ptr(&softlockup_stop_work));
}
/* .. and repeat */
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
if (touch_ts == SOFTLOCKUP_RESET) {
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
__this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
/* Clear the guest paused flag on watchdog reset */
kvm_check_and_clear_guest_paused();
update_touch_ts();
return HRTIMER_RESTART;
}
/* check for a softlockup
* This is done by making sure a high priority task is
* being scheduled. The task touches the watchdog to
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*/
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like a soft lockup, check to see if the host
* stopped the vm before we issue the warning
*/
if (kvm_check_and_clear_guest_paused())
return HRTIMER_RESTART;
/*
* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping all cpu back traces.
*/
if (softlockup_all_cpu_backtrace) {
if (test_and_set_bit_lock(0, &soft_lockup_nmi_warn))
return HRTIMER_RESTART;
}
/* Start period for the next softlockup warning. */
update_touch_ts();
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
if (softlockup_all_cpu_backtrace) {
trigger_allbutself_cpu_backtrace();
clear_bit_unlock(0, &soft_lockup_nmi_warn);
}
trace_android_vh_watchdog_timer_softlockup(duration, regs, !!softlockup_panic);
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
}
return HRTIMER_RESTART;
}
static void watchdog_enable(unsigned int cpu)
{
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
struct completion *done = this_cpu_ptr(&softlockup_completion);
WARN_ON_ONCE(cpu != smp_processor_id());
init_completion(done);
complete(done);
/*
* Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire.
*/
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hrtimer->function = watchdog_timer_fn;
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED_HARD);
/* Initialize timestamp */
update_touch_ts();
/* Enable the perf event */
if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
watchdog_nmi_enable(cpu);
}
static void watchdog_disable(unsigned int cpu)
{
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
WARN_ON_ONCE(cpu != smp_processor_id());
/*
* Disable the perf event first. That prevents that a large delay
* between disabling the timer and disabling the perf event causes
* the perf NMI to detect a false positive.
*/
watchdog_nmi_disable(cpu);
hrtimer_cancel(hrtimer);
wait_for_completion(this_cpu_ptr(&softlockup_completion));
}
static int softlockup_stop_fn(void *data)
{
watchdog_disable(smp_processor_id());
return 0;
}
static void softlockup_stop_all(void)
{
int cpu;
if (!softlockup_initialized)
return;
for_each_cpu(cpu, &watchdog_allowed_mask)
smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
cpumask_clear(&watchdog_allowed_mask);
}
static int softlockup_start_fn(void *data)
{
watchdog_enable(smp_processor_id());
return 0;
}
static void softlockup_start_all(void)
{
int cpu;
cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
for_each_cpu(cpu, &watchdog_allowed_mask)
smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
}
int lockup_detector_online_cpu(unsigned int cpu)
{
if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
watchdog_enable(cpu);
return 0;
}
int lockup_detector_offline_cpu(unsigned int cpu)
{
if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
watchdog_disable(cpu);
return 0;
}
static void lockup_detector_reconfigure(void)
{
cpus_read_lock();
watchdog_nmi_stop();
softlockup_stop_all();
set_sample_period();
lockup_detector_update_enable();
if (watchdog_enabled && watchdog_thresh)
softlockup_start_all();
watchdog_nmi_start();
cpus_read_unlock();
/*
* Must be called outside the cpus locked section to prevent
* recursive locking in the perf code.
*/
__lockup_detector_cleanup();
}
/*
* Create the watchdog thread infrastructure and configure the detector(s).
*
* The threads are not unparked as watchdog_allowed_mask is empty. When
* the threads are successfully initialized, take the proper locks and
* unpark the threads in the watchdog_cpumask if the watchdog is enabled.
*/
static __init void lockup_detector_setup(void)
{
/*
* If sysctl is off and watchdog got disabled on the command line,
* nothing to do here.
*/
lockup_detector_update_enable();
if (!IS_ENABLED(CONFIG_SYSCTL) &&
!(watchdog_enabled && watchdog_thresh))
return;
mutex_lock(&watchdog_mutex);
lockup_detector_reconfigure();
softlockup_initialized = true;
mutex_unlock(&watchdog_mutex);
}
#else /* CONFIG_SOFTLOCKUP_DETECTOR */
static void lockup_detector_reconfigure(void)
{
cpus_read_lock();
watchdog_nmi_stop();
lockup_detector_update_enable();
watchdog_nmi_start();
cpus_read_unlock();
}
static inline void lockup_detector_setup(void)
{
lockup_detector_reconfigure();
}
#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
static void __lockup_detector_cleanup(void)
{
lockdep_assert_held(&watchdog_mutex);
hardlockup_detector_perf_cleanup();
}
/**
* lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
*
* Caller must not hold the cpu hotplug rwsem.
*/
void lockup_detector_cleanup(void)
{
mutex_lock(&watchdog_mutex);
__lockup_detector_cleanup();
mutex_unlock(&watchdog_mutex);
}
/**
* lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
*
* Special interface for parisc. It prevents lockup detector warnings from
* the default pm_poweroff() function which busy loops forever.
*/
void lockup_detector_soft_poweroff(void)
{
watchdog_enabled = 0;
}
#ifdef CONFIG_SYSCTL
/* Propagate any changes to the watchdog threads */
static void proc_watchdog_update(void)
{
/* Remove impossible cpus to keep sysctl output clean. */
cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
lockup_detector_reconfigure();
}
/*
* common function for watchdog, nmi_watchdog and soft_watchdog parameter
*
* caller | table->data points to | 'which'
* -------------------|----------------------------|--------------------------
* proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
* | | SOFT_WATCHDOG_ENABLED
* -------------------|----------------------------|--------------------------
* proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
* -------------------|----------------------------|--------------------------
* proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
*/
static int proc_watchdog_common(int which, struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err, old, *param = table->data;
mutex_lock(&watchdog_mutex);
if (!write) {
/*
* On read synchronize the userspace interface. This is a
* racy snapshot.
*/
*param = (watchdog_enabled & which) != 0;
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
} else {
old = READ_ONCE(*param);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!err && old != READ_ONCE(*param))
proc_watchdog_update();
}
mutex_unlock(&watchdog_mutex);
return err;
}
/*
* /proc/sys/kernel/watchdog
*/
int proc_watchdog(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/nmi_watchdog
*/
int proc_nmi_watchdog(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!nmi_watchdog_available && write)
return -ENOTSUPP;
return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/soft_watchdog
*/
int proc_soft_watchdog(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/watchdog_thresh
*/
int proc_watchdog_thresh(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err, old;
mutex_lock(&watchdog_mutex);
old = READ_ONCE(watchdog_thresh);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!err && write && old != READ_ONCE(watchdog_thresh))
proc_watchdog_update();
mutex_unlock(&watchdog_mutex);
return err;
}
/*
* The cpumask is the mask of possible cpus that the watchdog can run
* on, not the mask of cpus it is actually running on. This allows the
* user to specify a mask that will include cpus that have not yet
* been brought online, if desired.
*/
int proc_watchdog_cpumask(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err;
mutex_lock(&watchdog_mutex);
err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
if (!err && write)
proc_watchdog_update();
mutex_unlock(&watchdog_mutex);
return err;
}
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
{
if (tick_nohz_full_enabled())
pr_info("Disabling watchdog on nohz_full cores by default\n");
cpumask_copy(&watchdog_cpumask,
housekeeping_cpumask(HK_FLAG_TIMER));
if (!watchdog_nmi_probe())
nmi_watchdog_available = true;
lockup_detector_setup();
}