android_kernel_xiaomi_sm8450/kernel/watchdog.c
Greg Kroah-Hartman 7999a9a70d This is the 5.10.202 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmVmG20ACgkQONu9yGCS
 aT6dzg/7BnCP2SpVmgEaD7FdPvGO/A6O5VrC9zu3sQE6g2gAwirZhdgE8NRn+ggm
 WSQ1kIA+HEcY23FKpq46pBED4P1irudiW7DkLw8nyOGp+XLb4wGkF5lBBP5z+B2P
 ga2RgwqKvYWeDaUW4n1Uy7m2Cz+wqCg/EvnITo40glSWPh20gM532/CSnA5akoje
 9mjZYZ0rKHKTZGu65aNScNR7XnXHIivJU6C1jF6L9N1+Xn679nUHKQP4KM/RcjpX
 g1WQMWFC3mGIn5IX28W1wvKS320D5HLmTLnLqJvFpJN9+13DUnUoXcX469zvQoxJ
 GL3S94goWN/0BPOgr5KcKvTj00b4O+EWhQuQt+x8NLdydzRQuyFu2UpLNhIKKSou
 sT+BcxzeuqJhEh1tZItcZkZBptpLEkb0ezT11u5McnU5FjPzzzP8CtEetKKmEaBU
 AUoEP/lQQlVyk1I6xAeuzu53smncNQt6CqnXJxYXOBGgJ2txAM5kroMKXPin5C8k
 BCpUIqghhKmBd1hwuKyaOBKF99eLKKZsuvXppoPD0Yz7/Nq5TgdBw0qbNt2iLr05
 XSM7WIIeCBROaV+ZiVxgtcXDR51FpMr7CLTbkBQ6IgLwircHeHSK7rQn7kFO3fCg
 OezhWAuh72qDZ2PCJ84fj21IhZ49a5oCLbUdBew+KzZervVpSo0=
 =eW67
 -----END PGP SIGNATURE-----

Merge 5.10.202 into android12-5.10-lts

Changes in 5.10.202
	locking/ww_mutex/test: Fix potential workqueue corruption
	perf/core: Bail out early if the request AUX area is out of bound
	clocksource/drivers/timer-imx-gpt: Fix potential memory leak
	clocksource/drivers/timer-atmel-tcb: Fix initialization on SAM9 hardware
	x86/mm: Drop the 4 MB restriction on minimal NUMA node memory size
	wifi: mac80211_hwsim: fix clang-specific fortify warning
	wifi: mac80211: don't return unset power in ieee80211_get_tx_power()
	bpf: Detect IP == ksym.end as part of BPF program
	wifi: ath9k: fix clang-specific fortify warnings
	wifi: ath10k: fix clang-specific fortify warning
	net: annotate data-races around sk->sk_tx_queue_mapping
	net: annotate data-races around sk->sk_dst_pending_confirm
	wifi: ath10k: Don't touch the CE interrupt registers after power up
	Bluetooth: btusb: Add date->evt_skb is NULL check
	Bluetooth: Fix double free in hci_conn_cleanup
	platform/x86: thinkpad_acpi: Add battery quirk for Thinkpad X120e
	drm/komeda: drop all currently held locks if deadlock happens
	drm/msm/dp: skip validity check for DP CTS EDID checksum
	drm/amd: Fix UBSAN array-index-out-of-bounds for SMU7
	drm/amd: Fix UBSAN array-index-out-of-bounds for Polaris and Tonga
	drm/amdgpu: Fix potential null pointer derefernce
	drm/panel: fix a possible null pointer dereference
	drm/panel/panel-tpo-tpg110: fix a possible null pointer dereference
	drm/panel: st7703: Pick different reset sequence
	drm/amdgpu: Fix a null pointer access when the smc_rreg pointer is NULL
	selftests/efivarfs: create-read: fix a resource leak
	ASoC: soc-card: Add storage for PCI SSID
	crypto: pcrypt - Fix hungtask for PADATA_RESET
	RDMA/hfi1: Use FIELD_GET() to extract Link Width
	fs/jfs: Add check for negative db_l2nbperpage
	fs/jfs: Add validity check for db_maxag and db_agpref
	jfs: fix array-index-out-of-bounds in dbFindLeaf
	jfs: fix array-index-out-of-bounds in diAlloc
	HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround
	ARM: 9320/1: fix stack depot IRQ stack filter
	ALSA: hda: Fix possible null-ptr-deref when assigning a stream
	PCI: tegra194: Use FIELD_GET()/FIELD_PREP() with Link Width fields
	atm: iphase: Do PCI error checks on own line
	scsi: libfc: Fix potential NULL pointer dereference in fc_lport_ptp_setup()
	misc: pci_endpoint_test: Add Device ID for R-Car S4-8 PCIe controller
	HID: Add quirk for Dell Pro Wireless Keyboard and Mouse KM5221W
	exfat: support handle zero-size directory
	tty: vcc: Add check for kstrdup() in vcc_probe()
	usb: gadget: f_ncm: Always set current gadget in ncm_bind()
	9p/trans_fd: Annotate data-racy writes to file::f_flags
	i2c: sun6i-p2wi: Prevent potential division by zero
	media: gspca: cpia1: shift-out-of-bounds in set_flicker
	media: vivid: avoid integer overflow
	gfs2: ignore negated quota changes
	gfs2: fix an oops in gfs2_permission
	media: cobalt: Use FIELD_GET() to extract Link Width
	media: imon: fix access to invalid resource for the second interface
	drm/amd/display: Avoid NULL dereference of timing generator
	kgdb: Flush console before entering kgdb on panic
	ASoC: ti: omap-mcbsp: Fix runtime PM underflow warnings
	drm/amdgpu: fix software pci_unplug on some chips
	pwm: Fix double shift bug
	wifi: iwlwifi: Use FW rate for non-data frames
	xhci: turn cancelled td cleanup to its own function
	SUNRPC: ECONNRESET might require a rebind
	SUNRPC: Add an IS_ERR() check back to where it was
	NFSv4.1: fix SP4_MACH_CRED protection for pnfs IO
	SUNRPC: Fix RPC client cleaned up the freed pipefs dentries
	gfs2: Silence "suspicious RCU usage in gfs2_permission" warning
	ipvlan: add ipvlan_route_v6_outbound() helper
	tty: Fix uninit-value access in ppp_sync_receive()
	net: hns3: fix variable may not initialized problem in hns3_init_mac_addr()
	net: hns3: fix VF reset fail issue
	tipc: Fix kernel-infoleak due to uninitialized TLV value
	ppp: limit MRU to 64K
	xen/events: fix delayed eoi list handling
	ptp: annotate data-race around q->head and q->tail
	bonding: stop the device in bond_setup_by_slave()
	net: ethernet: cortina: Fix max RX frame define
	net: ethernet: cortina: Handle large frames
	net: ethernet: cortina: Fix MTU max setting
	netfilter: nf_conntrack_bridge: initialize err to 0
	net: stmmac: fix rx budget limit check
	net/mlx5e: fix double free of encap_header
	net/mlx5_core: Clean driver version and name
	net/mlx5e: Check return value of snprintf writing to fw_version buffer for representors
	macvlan: Don't propagate promisc change to lower dev in passthru
	tools/power/turbostat: Fix a knl bug
	cifs: spnego: add ';' in HOST_KEY_LEN
	cifs: fix check of rc in function generate_smb3signingkey
	media: venus: hfi: add checks to perform sanity on queue pointers
	powerpc/perf: Fix disabling BHRB and instruction sampling
	randstruct: Fix gcc-plugin performance mode to stay in group
	bpf: Fix check_stack_write_fixed_off() to correctly spill imm
	bpf: Fix precision tracking for BPF_ALU | BPF_TO_BE | BPF_END
	scsi: mpt3sas: Fix loop logic
	scsi: megaraid_sas: Increase register read retry rount from 3 to 30 for selected registers
	x86/cpu/hygon: Fix the CPU topology evaluation for real
	KVM: x86: hyper-v: Don't auto-enable stimer on write from user-space
	KVM: x86: Ignore MSR_AMD64_TW_CFG access
	audit: don't take task_lock() in audit_exe_compare() code path
	audit: don't WARN_ON_ONCE(!current->mm) in audit_exe_compare()
	tty/sysrq: replace smp_processor_id() with get_cpu()
	hvc/xen: fix console unplug
	hvc/xen: fix error path in xen_hvc_init() to always register frontend driver
	PCI/sysfs: Protect driver's D3cold preference from user space
	watchdog: move softlockup_panic back to early_param
	ACPI: resource: Do IRQ override on TongFang GMxXGxx
	arm64: Restrict CPU_BIG_ENDIAN to GNU as or LLVM IAS 15.x or newer
	parisc/pdc: Add width field to struct pdc_model
	clk: qcom: ipq8074: drop the CLK_SET_RATE_PARENT flag from PLL clocks
	clk: qcom: ipq6018: drop the CLK_SET_RATE_PARENT flag from PLL clocks
	mmc: vub300: fix an error code
	mmc: sdhci_am654: fix start loop index for TAP value parsing
	PCI/ASPM: Fix L1 substate handling in aspm_attr_store_common()
	arm64: dts: qcom: ipq6018: Fix hwlock index for SMEM
	PM: hibernate: Use __get_safe_page() rather than touching the list
	PM: hibernate: Clean up sync_read handling in snapshot_write_next()
	rcu: kmemleak: Ignore kmemleak false positives when RCU-freeing objects
	btrfs: don't arbitrarily slow down delalloc if we're committing
	firmware: qcom_scm: use 64-bit calling convention only when client is 64-bit
	ima: detect changes to the backing overlay file
	wifi: ath11k: fix temperature event locking
	wifi: ath11k: fix dfs radar event locking
	wifi: ath11k: fix htt pktlog locking
	mmc: meson-gx: Remove setting of CMD_CFG_ERROR
	genirq/generic_chip: Make irq_remove_generic_chip() irqdomain aware
	PCI: keystone: Don't discard .remove() callback
	PCI: keystone: Don't discard .probe() callback
	jbd2: fix potential data lost in recovering journal raced with synchronizing fs bdev
	quota: explicitly forbid quota files from being encrypted
	kernel/reboot: emergency_restart: Set correct system_state
	i2c: core: Run atomic i2c xfer when !preemptible
	mcb: fix error handling for different scenarios when parsing
	dmaengine: stm32-mdma: correct desc prep when channel running
	mm/cma: use nth_page() in place of direct struct page manipulation
	mm/memory_hotplug: use pfn math in place of direct struct page manipulation
	mtd: cfi_cmdset_0001: Byte swap OTP info
	i3c: master: cdns: Fix reading status register
	parisc: Prevent booting 64-bit kernels on PA1.x machines
	parisc/pgtable: Do not drop upper 5 address bits of physical address
	xhci: Enable RPM on controllers that support low-power states
	ALSA: info: Fix potential deadlock at disconnection
	ALSA: hda/realtek - Add Dell ALC295 to pin fall back table
	ALSA: hda/realtek - Enable internal speaker of ASUS K6500ZC
	serial: meson: remove redundant initialization of variable id
	tty: serial: meson: retrieve port FIFO size from DT
	serial: meson: Use platform_get_irq() to get the interrupt
	tty: serial: meson: fix hard LOCKUP on crtscts mode
	cpufreq: stats: Fix buffer overflow detection in trans_stats()
	Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x0cb8:0xc559
	bluetooth: Add device 0bda:887b to device tables
	bluetooth: Add device 13d3:3571 to device tables
	Bluetooth: btusb: Add RTW8852BE device 13d3:3570 to device tables
	Bluetooth: btusb: Add 0bda:b85b for Fn-Link RTL8852BE
	PCI: exynos: Don't discard .remove() callback
	arm64: dts: qcom: ipq6018: switch TCSR mutex to MMIO
	arm64: dts: qcom: ipq6018: Fix tcsr_mutex register size
	Revert ncsi: Propagate carrier gain/loss events to the NCSI controller
	lsm: fix default return value for vm_enough_memory
	lsm: fix default return value for inode_getsecctx
	i2c: designware: Disable TX_EMPTY irq while waiting for block length byte
	net: dsa: lan9303: consequently nested-lock physical MDIO
	net: phylink: initialize carrier state at creation
	i2c: i801: fix potential race in i801_block_transaction_byte_by_byte
	f2fs: avoid format-overflow warning
	media: lirc: drop trailing space from scancode transmit
	media: sharp: fix sharp encoding
	media: venus: hfi_parser: Add check to keep the number of codecs within range
	media: venus: hfi: fix the check to handle session buffer requirement
	media: venus: hfi: add checks to handle capabilities from firmware
	nfsd: fix file memleak on client_opens_release
	mm: kmem: drop __GFP_NOFAIL when allocating objcg vectors
	media: qcom: camss: Fix vfe_get() error jump
	Revert "net: r8169: Disable multicast filter for RTL8168H and RTL8107E"
	ext4: apply umask if ACL support is disabled
	ext4: correct offset of gdb backup in non meta_bg group to update_backups
	ext4: correct return value of ext4_convert_meta_bg
	ext4: correct the start block of counting reserved clusters
	ext4: remove gdb backup copy for meta bg in setup_new_flex_group_blocks
	drm/amd/pm: Handle non-terminated overdrive commands.
	drm/amdgpu: fix error handling in amdgpu_bo_list_get()
	drm/amd/display: Change the DMCUB mailbox memory location from FB to inbox
	io_uring/fdinfo: lock SQ thread while retrieving thread cpu/pid
	tracing: Have trace_event_file have ref counters
	netfilter: nftables: update table flags from the commit phase
	netfilter: nf_tables: fix table flag updates
	netfilter: nf_tables: disable toggling dormant table state more than once
	interconnect: qcom: Add support for mask-based BCMs
	Linux 5.10.202

Change-Id: I762bcd4848d9b87cbb4efe4104fe1685999dc0f7
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2023-12-08 16:26:36 +00:00

763 lines
20 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Detect hard and soft lockups on a system
*
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
*
* Note: Most of this code is borrowed heavily from the original softlockup
* detector, so thanks to Ingo for the initial implementation.
* Some chunks also taken from the old x86-specific nmi watchdog code, thanks
* to those contributors as well.
*/
#define pr_fmt(fmt) "watchdog: " fmt
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/tick.h>
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
#include <linux/sched/isolation.h>
#include <linux/stop_machine.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
#include <trace/hooks/softlockup.h>
static DEFINE_MUTEX(watchdog_mutex);
#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
# define NMI_WATCHDOG_DEFAULT 1
#else
# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
# define NMI_WATCHDOG_DEFAULT 0
#endif
unsigned long __read_mostly watchdog_enabled;
int __read_mostly watchdog_user_enabled = 1;
int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
int __read_mostly soft_watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
static int __read_mostly nmi_watchdog_available;
struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
# ifdef CONFIG_SMP
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
# endif /* CONFIG_SMP */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
* cases this function can be called to disable hard lockup detection. This
* function should only be executed once by the boot processor before the
* kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup().
*/
void __init hardlockup_detector_disable(void)
{
nmi_watchdog_user_enabled = 0;
}
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
nmi_watchdog_user_enabled = 0;
else if (!strncmp(str, "1", 1))
nmi_watchdog_user_enabled = 1;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
/*
* These functions can be overridden if an architecture implements its
* own hardlockup detector.
*
* watchdog_nmi_enable/disable can be implemented to start and stop when
* softlockup watchdog threads start and stop. The arch must select the
* SOFTLOCKUP_DETECTOR Kconfig.
*/
int __weak watchdog_nmi_enable(unsigned int cpu)
{
hardlockup_detector_perf_enable();
return 0;
}
void __weak watchdog_nmi_disable(unsigned int cpu)
{
hardlockup_detector_perf_disable();
}
/* Return 0, if a NMI watchdog is available. Error code otherwise */
int __weak __init watchdog_nmi_probe(void)
{
return hardlockup_detector_perf_init();
}
/**
* watchdog_nmi_stop - Stop the watchdog for reconfiguration
*
* The reconfiguration steps are:
* watchdog_nmi_stop();
* update_variables();
* watchdog_nmi_start();
*/
void __weak watchdog_nmi_stop(void) { }
/**
* watchdog_nmi_start - Start the watchdog after reconfiguration
*
* Counterpart to watchdog_nmi_stop().
*
* The following variables have been updated in update_variables() and
* contain the currently valid configuration:
* - watchdog_enabled
* - watchdog_thresh
* - watchdog_cpumask
*/
void __weak watchdog_nmi_start(void) { }
/**
* lockup_detector_update_enable - Update the sysctl enable bit
*
* Caller needs to make sure that the NMI/perf watchdogs are off, so this
* can't race with watchdog_nmi_disable().
*/
static void lockup_detector_update_enable(void)
{
watchdog_enabled = 0;
if (!watchdog_user_enabled)
return;
if (nmi_watchdog_available && nmi_watchdog_user_enabled)
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
if (soft_watchdog_user_enabled)
watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
}
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
#define SOFTLOCKUP_RESET ULONG_MAX
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#endif
static struct cpumask watchdog_allowed_mask __read_mostly;
/* Global variables, exported for sysctl */
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
static bool softlockup_initialized __read_mostly;
static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static unsigned long soft_lockup_nmi_warn;
static int __init softlockup_panic_setup(char *str)
{
softlockup_panic = simple_strtoul(str, NULL, 0);
return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);
static int __init nowatchdog_setup(char *str)
{
watchdog_user_enabled = 0;
return 1;
}
__setup("nowatchdog", nowatchdog_setup);
static int __init nosoftlockup_setup(char *str)
{
soft_watchdog_user_enabled = 0;
return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
static int __init watchdog_thresh_setup(char *str)
{
get_option(&str, &watchdog_thresh);
return 1;
}
__setup("watchdog_thresh=", watchdog_thresh_setup);
static void __lockup_detector_cleanup(void);
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
* lockups can have false positives under extreme conditions. So we generally
* want a higher threshold for soft lockups than for hard lockups. So we couple
* the thresholds with a factor: we make the soft threshold twice the amount of
* time the hard threshold is.
*/
static int get_softlockup_thresh(void)
{
return watchdog_thresh * 2;
}
/*
* Returns seconds, approximately. We don't need nanosecond
* resolution, and we don't need to waste time with a big divide when
* 2^30ns == 1.074s.
*/
static unsigned long get_timestamp(void)
{
return running_clock() >> 30LL; /* 2^30 ~= 10^9 */
}
static void set_sample_period(void)
{
/*
* convert watchdog_thresh from seconds to ns
* the divide by 5 is to give hrtimer several chances (two
* or three with the current relation between the soft
* and hard thresholds) to increment before the
* hardlockup detector generates a warning
*/
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
watchdog_update_hrtimer_threshold(sample_period);
}
/* Commands for resetting the watchdog */
static void update_touch_ts(void)
{
__this_cpu_write(watchdog_touch_ts, get_timestamp());
}
/**
* touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
*
* Call when the scheduler may have stalled for legitimate reasons
* preventing the watchdog task from executing - e.g. the scheduler
* entering idle state. This should only be used for scheduler events.
* Use touch_softlockup_watchdog() for everything else.
*/
notrace void touch_softlockup_watchdog_sched(void)
{
/*
* Preemption can be enabled. It doesn't matter which CPU's timestamp
* gets zeroed here, so use the raw_ operation.
*/
raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
}
notrace void touch_softlockup_watchdog(void)
{
touch_softlockup_watchdog_sched();
wq_watchdog_touch(raw_smp_processor_id());
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
void touch_all_softlockup_watchdogs(void)
{
int cpu;
/*
* watchdog_mutex cannpt be taken here, as this might be called
* from (soft)interrupt context, so the access to
* watchdog_allowed_cpumask might race with a concurrent update.
*
* The watchdog time stamp can race against a concurrent real
* update as well, the only side effect might be a cycle delay for
* the softlockup check.
*/
for_each_cpu(cpu, &watchdog_allowed_mask)
per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET;
wq_watchdog_touch(-1);
}
void touch_softlockup_watchdog_sync(void)
{
__this_cpu_write(softlockup_touch_sync, true);
__this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
}
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
/* Warn about unreasonable delays. */
if (time_after(now, touch_ts + get_softlockup_thresh()))
return now - touch_ts;
}
return 0;
}
/* watchdog detector functions */
bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
return true;
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
}
static void watchdog_interrupt_count(void)
{
__this_cpu_inc(hrtimer_interrupts);
}
static DEFINE_PER_CPU(struct completion, softlockup_completion);
static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
/*
* The watchdog thread function - touches the timestamp.
*
* It only runs once every sample_period seconds (4 seconds by
* default) to reset the softlockup timestamp. If this gets delayed
* for more than 2*watchdog_thresh seconds then the debug-printout
* triggers in watchdog_timer_fn().
*/
static int softlockup_fn(void *data)
{
update_touch_ts();
complete(this_cpu_ptr(&softlockup_completion));
return 0;
}
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
if (!watchdog_enabled)
return HRTIMER_NORESTART;
/* kick the hardlockup detector */
watchdog_interrupt_count();
/* kick the softlockup detector */
if (completion_done(this_cpu_ptr(&softlockup_completion))) {
reinit_completion(this_cpu_ptr(&softlockup_completion));
stop_one_cpu_nowait(smp_processor_id(),
softlockup_fn, NULL,
this_cpu_ptr(&softlockup_stop_work));
}
/* .. and repeat */
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
if (touch_ts == SOFTLOCKUP_RESET) {
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
__this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
/* Clear the guest paused flag on watchdog reset */
kvm_check_and_clear_guest_paused();
update_touch_ts();
return HRTIMER_RESTART;
}
/* check for a softlockup
* This is done by making sure a high priority task is
* being scheduled. The task touches the watchdog to
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*/
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like a soft lockup, check to see if the host
* stopped the vm before we issue the warning
*/
if (kvm_check_and_clear_guest_paused())
return HRTIMER_RESTART;
/*
* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping all cpu back traces.
*/
if (softlockup_all_cpu_backtrace) {
if (test_and_set_bit_lock(0, &soft_lockup_nmi_warn))
return HRTIMER_RESTART;
}
/* Start period for the next softlockup warning. */
update_touch_ts();
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
if (softlockup_all_cpu_backtrace) {
trigger_allbutself_cpu_backtrace();
clear_bit_unlock(0, &soft_lockup_nmi_warn);
}
trace_android_vh_watchdog_timer_softlockup(duration, regs, !!softlockup_panic);
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
}
return HRTIMER_RESTART;
}
static void watchdog_enable(unsigned int cpu)
{
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
struct completion *done = this_cpu_ptr(&softlockup_completion);
WARN_ON_ONCE(cpu != smp_processor_id());
init_completion(done);
complete(done);
/*
* Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire.
*/
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hrtimer->function = watchdog_timer_fn;
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED_HARD);
/* Initialize timestamp */
update_touch_ts();
/* Enable the perf event */
if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
watchdog_nmi_enable(cpu);
}
static void watchdog_disable(unsigned int cpu)
{
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
WARN_ON_ONCE(cpu != smp_processor_id());
/*
* Disable the perf event first. That prevents that a large delay
* between disabling the timer and disabling the perf event causes
* the perf NMI to detect a false positive.
*/
watchdog_nmi_disable(cpu);
hrtimer_cancel(hrtimer);
wait_for_completion(this_cpu_ptr(&softlockup_completion));
}
static int softlockup_stop_fn(void *data)
{
watchdog_disable(smp_processor_id());
return 0;
}
static void softlockup_stop_all(void)
{
int cpu;
if (!softlockup_initialized)
return;
for_each_cpu(cpu, &watchdog_allowed_mask)
smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
cpumask_clear(&watchdog_allowed_mask);
}
static int softlockup_start_fn(void *data)
{
watchdog_enable(smp_processor_id());
return 0;
}
static void softlockup_start_all(void)
{
int cpu;
cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
for_each_cpu(cpu, &watchdog_allowed_mask)
smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
}
int lockup_detector_online_cpu(unsigned int cpu)
{
if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
watchdog_enable(cpu);
return 0;
}
int lockup_detector_offline_cpu(unsigned int cpu)
{
if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
watchdog_disable(cpu);
return 0;
}
static void __lockup_detector_reconfigure(void)
{
cpus_read_lock();
watchdog_nmi_stop();
softlockup_stop_all();
set_sample_period();
lockup_detector_update_enable();
if (watchdog_enabled && watchdog_thresh)
softlockup_start_all();
watchdog_nmi_start();
cpus_read_unlock();
/*
* Must be called outside the cpus locked section to prevent
* recursive locking in the perf code.
*/
__lockup_detector_cleanup();
}
void lockup_detector_reconfigure(void)
{
mutex_lock(&watchdog_mutex);
__lockup_detector_reconfigure();
mutex_unlock(&watchdog_mutex);
}
/*
* Create the watchdog thread infrastructure and configure the detector(s).
*
* The threads are not unparked as watchdog_allowed_mask is empty. When
* the threads are successfully initialized, take the proper locks and
* unpark the threads in the watchdog_cpumask if the watchdog is enabled.
*/
static __init void lockup_detector_setup(void)
{
/*
* If sysctl is off and watchdog got disabled on the command line,
* nothing to do here.
*/
lockup_detector_update_enable();
if (!IS_ENABLED(CONFIG_SYSCTL) &&
!(watchdog_enabled && watchdog_thresh))
return;
mutex_lock(&watchdog_mutex);
__lockup_detector_reconfigure();
softlockup_initialized = true;
mutex_unlock(&watchdog_mutex);
}
#else /* CONFIG_SOFTLOCKUP_DETECTOR */
static void __lockup_detector_reconfigure(void)
{
cpus_read_lock();
watchdog_nmi_stop();
lockup_detector_update_enable();
watchdog_nmi_start();
cpus_read_unlock();
}
void lockup_detector_reconfigure(void)
{
__lockup_detector_reconfigure();
}
static inline void lockup_detector_setup(void)
{
__lockup_detector_reconfigure();
}
#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
static void __lockup_detector_cleanup(void)
{
lockdep_assert_held(&watchdog_mutex);
hardlockup_detector_perf_cleanup();
}
/**
* lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
*
* Caller must not hold the cpu hotplug rwsem.
*/
void lockup_detector_cleanup(void)
{
mutex_lock(&watchdog_mutex);
__lockup_detector_cleanup();
mutex_unlock(&watchdog_mutex);
}
/**
* lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
*
* Special interface for parisc. It prevents lockup detector warnings from
* the default pm_poweroff() function which busy loops forever.
*/
void lockup_detector_soft_poweroff(void)
{
watchdog_enabled = 0;
}
#ifdef CONFIG_SYSCTL
/* Propagate any changes to the watchdog threads */
static void proc_watchdog_update(void)
{
/* Remove impossible cpus to keep sysctl output clean. */
cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
__lockup_detector_reconfigure();
}
/*
* common function for watchdog, nmi_watchdog and soft_watchdog parameter
*
* caller | table->data points to | 'which'
* -------------------|----------------------------|--------------------------
* proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
* | | SOFT_WATCHDOG_ENABLED
* -------------------|----------------------------|--------------------------
* proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
* -------------------|----------------------------|--------------------------
* proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
*/
static int proc_watchdog_common(int which, struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err, old, *param = table->data;
mutex_lock(&watchdog_mutex);
if (!write) {
/*
* On read synchronize the userspace interface. This is a
* racy snapshot.
*/
*param = (watchdog_enabled & which) != 0;
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
} else {
old = READ_ONCE(*param);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!err && old != READ_ONCE(*param))
proc_watchdog_update();
}
mutex_unlock(&watchdog_mutex);
return err;
}
/*
* /proc/sys/kernel/watchdog
*/
int proc_watchdog(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/nmi_watchdog
*/
int proc_nmi_watchdog(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!nmi_watchdog_available && write)
return -ENOTSUPP;
return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/soft_watchdog
*/
int proc_soft_watchdog(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
/*
* /proc/sys/kernel/watchdog_thresh
*/
int proc_watchdog_thresh(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err, old;
mutex_lock(&watchdog_mutex);
old = READ_ONCE(watchdog_thresh);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!err && write && old != READ_ONCE(watchdog_thresh))
proc_watchdog_update();
mutex_unlock(&watchdog_mutex);
return err;
}
/*
* The cpumask is the mask of possible cpus that the watchdog can run
* on, not the mask of cpus it is actually running on. This allows the
* user to specify a mask that will include cpus that have not yet
* been brought online, if desired.
*/
int proc_watchdog_cpumask(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int err;
mutex_lock(&watchdog_mutex);
err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
if (!err && write)
proc_watchdog_update();
mutex_unlock(&watchdog_mutex);
return err;
}
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
{
if (tick_nohz_full_enabled())
pr_info("Disabling watchdog on nohz_full cores by default\n");
cpumask_copy(&watchdog_cpumask,
housekeeping_cpumask(HK_FLAG_TIMER));
if (!watchdog_nmi_probe())
nmi_watchdog_available = true;
lockup_detector_setup();
}