android_kernel_xiaomi_sm8450/net/sunrpc/sched.c
Greg Kroah-Hartman b84ad15be5 This is the 5.10.224 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmbCv24ACgkQONu9yGCS
 aT7lNRAAzP2lSCUHROaMTldoQdahqoWqwFSiMI9p32HYLTerpg1GHVsi1IUvD+pv
 zhmUG9w+ACbSbZ9337G61FeEDCIBzgqaIXLCtbK2Be9nWMa9I1ZtMSFUKoSmVJBw
 YbrI/UOscJmAf44G6DeMp+N+/S2o7INK463u51SYjufo/zhFF8KsYElm23p06kgn
 lTkkUAoo9mSVvEr64zbjwLrWyBWTlcvYH/xrkWeJWXl+hBv0K5Ig9IBm0sc0DSQR
 fErADzDLFkmD9pduZbMwbzUUzC8ST41KKjTgClaHQhSMeoLoWT8CJM5Swwds4XVE
 JkoClkqnj3+stYFpLFm9UUgZ12wu/9slzgRCN6fTraSNT8gE9F9BRJXFGL+3S5OO
 oHKZYEEPTZDsD3PihgufJ4Ft27+KpMUzAgQUmVH/y47wrVJ2pf4fCK8LKT0MbjBi
 pjZaDRCxwo1aORL3+jYJBVRecrNqQ0DhacYOKznhb2KKeaHojIwLaE6k/W/0Q8U5
 1uMYv+NJ3LWDNzGcNUTCfNtuDELOpkp24Xc8RN0MK2iMMMyfjMpgKssjSBZtz0QW
 NH0UVpfiWKECKH+m03NeFnYdMuK8/VyM8vatkcemz0FfgJP2UazeiVwSujfS2r2S
 0TtsCMPP3kgKa9mAnni7lQs4wkG+OTNDNZqbuDqFZ1rHUS2Usrg=
 =8i2e
 -----END PGP SIGNATURE-----

Merge 5.10.224 into android12-5.10-lts

Changes in 5.10.224
	EDAC/skx_common: Add new ADXL components for 2-level memory
	EDAC, i10nm: make skx_common.o a separate module
	platform/chrome: cros_ec_debugfs: fix wrong EC message version
	hfsplus: fix to avoid false alarm of circular locking
	x86/of: Return consistent error type from x86_of_pci_irq_enable()
	x86/pci/intel_mid_pci: Fix PCIBIOS_* return code handling
	x86/pci/xen: Fix PCIBIOS_* return code handling
	x86/platform/iosf_mbi: Convert PCIBIOS_* return codes to errnos
	hwmon: (adt7475) Fix default duty on fan is disabled
	pwm: stm32: Always do lazy disabling
	hwmon: (max6697) Fix underflow when writing limit attributes
	hwmon: (max6697) Fix swapped temp{1,8} critical alarms
	arm64: dts: qcom: sdm845: add power-domain to UFS PHY
	soc: qcom: rpmh-rsc: Ensure irqs aren't disabled by rpmh_rsc_send_data() callers
	arm64: dts: qcom: msm8996: specify UFS core_clk frequencies
	soc: qcom: pdr: protect locator_addr with the main mutex
	soc: qcom: pdr: fix parsing of domains lists
	arm64: dts: rockchip: Increase VOP clk rate on RK3328
	ARM: dts: imx6qdl-kontron-samx6i: move phy reset into phy-node
	ARM: dts: imx6qdl-kontron-samx6i: fix PHY reset
	ARM: dts: imx6qdl-kontron-samx6i: fix board reset
	ARM: dts: imx6qdl-kontron-samx6i: fix SPI0 chip selects
	ARM: dts: imx6qdl-kontron-samx6i: fix PCIe reset polarity
	arm64: dts: mediatek: mt8183-kukui: Drop bogus output-enable property
	arm64: dts: mediatek: mt7622: fix "emmc" pinctrl mux
	arm64: dts: amlogic: gx: correct hdmi clocks
	m68k: atari: Fix TT bootup freeze / unexpected (SCU) interrupt messages
	x86/xen: Convert comma to semicolon
	m68k: cmpxchg: Fix return value for default case in __arch_xchg()
	ARM: pxa: spitz: use gpio descriptors for audio
	ARM: spitz: fix GPIO assignment for backlight
	firmware: turris-mox-rwtm: Fix checking return value of wait_for_completion_timeout()
	firmware: turris-mox-rwtm: Initialize completion before mailbox
	wifi: brcmsmac: LCN PHY code is used for BCM4313 2G-only device
	selftests/bpf: Fix prog numbers in test_sockmap
	net: esp: cleanup esp_output_tail_tcp() in case of unsupported ESPINTCP
	net/smc: Allow SMC-D 1MB DMB allocations
	net/smc: set rmb's SG_MAX_SINGLE_ALLOC limitation only when CONFIG_ARCH_NO_SG_CHAIN is defined
	selftests/bpf: Check length of recv in test_sockmap
	lib: objagg: Fix general protection fault
	mlxsw: spectrum_acl_erp: Fix object nesting warning
	mlxsw: spectrum_acl_bloom_filter: Make mlxsw_sp_acl_bf_key_encode() more flexible
	mlxsw: spectrum_acl: Fix ACL scale regression and firmware errors
	ath11k: dp: stop rx pktlog before suspend
	wifi: ath11k: fix wrong handling of CCMP256 and GCMP ciphers
	wifi: cfg80211: fix typo in cfg80211_calculate_bitrate_he()
	wifi: cfg80211: handle 2x996 RU allocation in cfg80211_calculate_bitrate_he()
	net: fec: Refactor: #define magic constants
	net: fec: Fix FEC_ECR_EN1588 being cleared on link-down
	ipvs: Avoid unnecessary calls to skb_is_gso_sctp
	netfilter: nf_tables: rise cap on SELinux secmark context
	perf/x86/intel/pt: Fix pt_topa_entry_for_page() address calculation
	perf: Fix perf_aux_size() for greater-than 32-bit size
	perf: Prevent passing zero nr_pages to rb_alloc_aux()
	qed: Improve the stack space of filter_config()
	wifi: virt_wifi: avoid reporting connection success with wrong SSID
	gss_krb5: Fix the error handling path for crypto_sync_skcipher_setkey
	wifi: virt_wifi: don't use strlen() in const context
	selftests/bpf: Close fd in error path in drop_on_reuseport
	bpf: annotate BTF show functions with __printf
	bna: adjust 'name' buf size of bna_tcb and bna_ccb structures
	bpf: Eliminate remaining "make W=1" warnings in kernel/bpf/btf.o
	selftests: forwarding: devlink_lib: Wait for udev events after reloading
	xdp: fix invalid wait context of page_pool_destroy()
	drm/panel: boe-tv101wum-nl6: If prepare fails, disable GPIO before regulators
	drm/panel: boe-tv101wum-nl6: Check for errors on the NOP in prepare()
	media: dvb-usb: Fix unexpected infinite loop in dvb_usb_read_remote_control()
	media: imon: Fix race getting ictx->lock
	saa7134: Unchecked i2c_transfer function result fixed
	media: uvcvideo: Allow entity-defined get_info and get_cur
	media: uvcvideo: Override default flags
	media: renesas: vsp1: Fix _irqsave and _irq mix
	media: renesas: vsp1: Store RPF partition configuration per RPF instance
	leds: trigger: Unregister sysfs attributes before calling deactivate()
	perf report: Fix condition in sort__sym_cmp()
	drm/etnaviv: fix DMA direction handling for cached RW buffers
	drm/qxl: Add check for drm_cvt_mode
	Revert "leds: led-core: Fix refcount leak in of_led_get()"
	ext4: fix infinite loop when replaying fast_commit
	media: venus: flush all buffers in output plane streamoff
	mfd: omap-usb-tll: Use struct_size to allocate tll
	xprtrdma: Rename frwr_release_mr()
	xprtrdma: Fix rpcrdma_reqs_reset()
	SUNRPC: avoid soft lockup when transmitting UDP to reachable server.
	ext4: avoid writing unitialized memory to disk in EA inodes
	sparc64: Fix incorrect function signature and add prototype for prom_cif_init
	SUNRPC: Fixup gss_status tracepoint error output
	PCI: Fix resource double counting on remove & rescan
	coresight: Fix ref leak when of_coresight_parse_endpoint() fails
	Input: qt1050 - handle CHIP_ID reading error
	RDMA/mlx4: Fix truncated output warning in mad.c
	RDMA/mlx4: Fix truncated output warning in alias_GUID.c
	RDMA/rxe: Don't set BTH_ACK_MASK for UC or UD QPs
	ASoC: max98088: Check for clk_prepare_enable() error
	mtd: make mtd_test.c a separate module
	RDMA/device: Return error earlier if port in not valid
	Input: elan_i2c - do not leave interrupt disabled on suspend failure
	MIPS: Octeron: remove source file executable bit
	powerpc/xmon: Fix disassembly CPU feature checks
	macintosh/therm_windtunnel: fix module unload.
	RDMA/hns: Fix missing pagesize and alignment check in FRMR
	bnxt_re: Fix imm_data endianness
	netfilter: ctnetlink: use helper function to calculate expect ID
	net: dsa: mv88e6xxx: Limit chip-wide frame size config to CPU ports
	net: dsa: b53: Limit chip-wide jumbo frame config to CPU ports
	pinctrl: rockchip: update rk3308 iomux routes
	pinctrl: core: fix possible memory leak when pinctrl_enable() fails
	pinctrl: single: fix possible memory leak when pinctrl_enable() fails
	pinctrl: ti: ti-iodelay: Drop if block with always false condition
	pinctrl: ti: ti-iodelay: fix possible memory leak when pinctrl_enable() fails
	pinctrl: freescale: mxs: Fix refcount of child
	fs/proc/task_mmu: indicate PM_FILE for PMD-mapped file THP
	fs/nilfs2: remove some unused macros to tame gcc
	nilfs2: avoid undefined behavior in nilfs_cnt32_ge macro
	rtc: interface: Add RTC offset to alarm after fix-up
	dt-bindings: thermal: correct thermal zone node name limit
	tick/broadcast: Make takeover of broadcast hrtimer reliable
	net: netconsole: Disable target before netpoll cleanup
	af_packet: Handle outgoing VLAN packets without hardware offloading
	ipv6: take care of scope when choosing the src addr
	sched/fair: set_load_weight() must also call reweight_task() for SCHED_IDLE tasks
	char: tpm: Fix possible memory leak in tpm_bios_measurements_open()
	media: venus: fix use after free in vdec_close
	hfs: fix to initialize fields of hfs_inode_info after hfs_alloc_inode()
	ext2: Verify bitmap and itable block numbers before using them
	drm/gma500: fix null pointer dereference in cdv_intel_lvds_get_modes
	drm/gma500: fix null pointer dereference in psb_intel_lvds_get_modes
	scsi: qla2xxx: Fix optrom version displayed in FDMI
	drm/amd/display: Check for NULL pointer
	sched/fair: Use all little CPUs for CPU-bound workloads
	apparmor: use kvfree_sensitive to free data->data
	task_work: s/task_work_cancel()/task_work_cancel_func()/
	task_work: Introduce task_work_cancel() again
	udf: Avoid using corrupted block bitmap buffer
	m68k: amiga: Turn off Warp1260 interrupts during boot
	ext4: check dot and dotdot of dx_root before making dir indexed
	ext4: make sure the first directory block is not a hole
	wifi: mwifiex: Fix interface type change
	leds: ss4200: Convert PCIBIOS_* return codes to errnos
	jbd2: make jbd2_journal_get_max_txn_bufs() internal
	KVM: VMX: Split out the non-virtualization part of vmx_interrupt_blocked()
	tools/memory-model: Fix bug in lock.cat
	hwrng: amd - Convert PCIBIOS_* return codes to errnos
	PCI: hv: Return zero, not garbage, when reading PCI_INTERRUPT_PIN
	PCI: rockchip: Use GPIOD_OUT_LOW flag while requesting ep_gpio
	binder: fix hang of unregistered readers
	dev/parport: fix the array out-of-bounds risk
	scsi: qla2xxx: Return ENOBUFS if sg_cnt is more than one for ELS cmds
	f2fs: fix to don't dirty inode for readonly filesystem
	clk: davinci: da8xx-cfgchip: Initialize clk_init_data before use
	ubi: eba: properly rollback inside self_check_eba
	decompress_bunzip2: fix rare decompression failure
	kbuild: Fix '-S -c' in x86 stack protector scripts
	kobject_uevent: Fix OOB access within zap_modalias_env()
	devres: Fix devm_krealloc() wasting memory
	rtc: cmos: Fix return value of nvmem callbacks
	scsi: qla2xxx: During vport delete send async logout explicitly
	scsi: qla2xxx: Fix for possible memory corruption
	scsi: qla2xxx: Fix flash read failure
	scsi: qla2xxx: Complete command early within lock
	scsi: qla2xxx: validate nvme_local_port correctly
	perf/x86/intel/pt: Fix topa_entry base length
	perf/x86/intel/pt: Fix a topa_entry base address calculation
	rtc: isl1208: Fix return value of nvmem callbacks
	watchdog/perf: properly initialize the turbo mode timestamp and rearm counter
	platform: mips: cpu_hwmon: Disable driver on unsupported hardware
	RDMA/iwcm: Fix a use-after-free related to destroying CM IDs
	selftests/sigaltstack: Fix ppc64 GCC build
	rbd: don't assume rbd_is_lock_owner() for exclusive mappings
	MIPS: ip30: ip30-console: Add missing include
	MIPS: Loongson64: env: Hook up Loongsson-2K
	drm/panfrost: Mark simple_ondemand governor as softdep
	rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
	rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings
	Bluetooth: btusb: Add RTL8852BE device 0489:e125 to device tables
	Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x13d3:0x3591
	nilfs2: handle inconsistent state in nilfs_btnode_create_block()
	io_uring/io-wq: limit retrying worker initialisation
	kernel: rerun task_work while freezing in get_signal()
	kdb: address -Wformat-security warnings
	kdb: Use the passed prompt in kdb_position_cursor()
	jfs: Fix array-index-out-of-bounds in diFree
	um: time-travel: fix time-travel-start option
	f2fs: fix start segno of large section
	libbpf: Fix no-args func prototype BTF dumping syntax
	dma: fix call order in dmam_free_coherent
	MIPS: SMP-CPS: Fix address for GCR_ACCESS register for CM3 and later
	ipv4: Fix incorrect source address in Record Route option
	net: bonding: correctly annotate RCU in bond_should_notify_peers()
	netfilter: nft_set_pipapo_avx2: disable softinterrupts
	tipc: Return non-zero value from tipc_udp_addr2str() on error
	net: stmmac: Correct byte order of perfect_match
	net: nexthop: Initialize all fields in dumped nexthops
	bpf: Fix a segment issue when downgrading gso_size
	mISDN: Fix a use after free in hfcmulti_tx()
	apparmor: Fix null pointer deref when receiving skb during sock creation
	powerpc: fix a file leak in kvm_vcpu_ioctl_enable_cap()
	lirc: rc_dev_get_from_fd(): fix file leak
	ASoC: Intel: use soc_intel_is_byt_cr() only when IOSF_MBI is reachable
	ceph: fix incorrect kmalloc size of pagevec mempool
	nvme: split command copy into a helper
	nvme-pci: add missing condition check for existence of mapped data
	fs: don't allow non-init s_user_ns for filesystems without FS_USERNS_MOUNT
	powerpc/configs: Update defconfig with now user-visible CONFIG_FSL_IFC
	fuse: name fs_context consistently
	fuse: verify {g,u}id mount options correctly
	sysctl: always initialize i_uid/i_gid
	ext4: factor out a common helper to query extent map
	ext4: check the extent status again before inserting delalloc block
	soc: xilinx: move PM_INIT_FINALIZE to zynqmp_pm_domains driver
	drivers: soc: xilinx: check return status of get_api_version()
	driver core: Cast to (void *) with __force for __percpu pointer
	devres: Fix memory leakage caused by driver API devm_free_percpu()
	genirq: Allow the PM device to originate from irq domain
	irqchip/imx-irqsteer: Constify irq_chip struct
	irqchip/imx-irqsteer: Add runtime PM support
	irqchip/imx-irqsteer: Handle runtime power management correctly
	remoteproc: imx_rproc: ignore mapping vdev regions
	remoteproc: imx_rproc: Fix ignoring mapping vdev regions
	remoteproc: imx_rproc: Skip over memory region when node value is NULL
	drm/nouveau: prime: fix refcount underflow
	drm/vmwgfx: Fix overlay when using Screen Targets
	sched: act_ct: take care of padding in struct zones_ht_key
	net/iucv: fix use after free in iucv_sock_close()
	net/mlx5e: Add a check for the return value from mlx5_port_set_eth_ptys
	ipv6: fix ndisc_is_useropt() handling for PIO
	riscv/mm: Add handling for VM_FAULT_SIGSEGV in mm_fault_error()
	platform/chrome: cros_ec_proto: Lock device when updating MKBP version
	HID: wacom: Modify pen IDs
	protect the fetch of ->fd[fd] in do_dup2() from mispredictions
	ALSA: usb-audio: Correct surround channels in UAC1 channel map
	ALSA: hda/realtek: Add quirk for Acer Aspire E5-574G
	net: usb: sr9700: fix uninitialized variable use in sr_mdio_read
	r8169: don't increment tx_dropped in case of NETDEV_TX_BUSY
	mptcp: fix duplicate data handling
	netfilter: ipset: Add list flush to cancel_gc
	genirq: Allow irq_chip registration functions to take a const irq_chip
	irqchip/mbigen: Fix mbigen node address layout
	x86/mm: Fix pti_clone_pgtable() alignment assumption
	x86/mm: Fix pti_clone_entry_text() for i386
	sctp: move hlist_node and hashent out of sctp_ep_common
	sctp: Fix null-ptr-deref in reuseport_add_sock().
	net: usb: qmi_wwan: fix memory leak for not ip packets
	net: linkwatch: use system_unbound_wq
	Bluetooth: l2cap: always unlock channel in l2cap_conless_channel()
	net: dsa: bcm_sf2: Fix a possible memory leak in bcm_sf2_mdio_register()
	l2tp: fix lockdep splat
	net: fec: Stop PPS on driver remove
	rcutorture: Fix rcu_torture_fwd_cb_cr() data race
	md: do not delete safemode_timer in mddev_suspend
	md/raid5: avoid BUG_ON() while continue reshape after reassembling
	clocksource/drivers/sh_cmt: Address race condition for clock events
	ACPI: battery: create alarm sysfs attribute atomically
	ACPI: SBS: manage alarm sysfs attribute through psy core
	selftests/bpf: Fix send_signal test with nested CONFIG_PARAVIRT
	PCI: Add Edimax Vendor ID to pci_ids.h
	udf: prevent integer overflow in udf_bitmap_free_blocks()
	wifi: nl80211: don't give key data to userspace
	btrfs: fix bitmap leak when loading free space cache on duplicate entry
	drm/amdgpu: Fix the null pointer dereference to ras_manager
	drm/amdgpu/pm: Fix the null pointer dereference in apply_state_adjust_rules
	media: uvcvideo: Ignore empty TS packets
	media: uvcvideo: Fix the bandwdith quirk on USB 3.x
	jbd2: avoid memleak in jbd2_journal_write_metadata_buffer
	s390/sclp: Prevent release of buffer in I/O
	SUNRPC: Fix a race to wake a sync task
	sched/cputime: Fix mul_u64_u64_div_u64() precision for cputime
	ext4: fix wrong unit use in ext4_mb_find_by_goal
	arm64: cpufeature: Force HWCAP to be based on the sysreg visible to user-space
	arm64: Add Neoverse-V2 part
	arm64: cputype: Add Cortex-X4 definitions
	arm64: cputype: Add Neoverse-V3 definitions
	arm64: errata: Add workaround for Arm errata 3194386 and 3312417
	arm64: cputype: Add Cortex-X3 definitions
	arm64: cputype: Add Cortex-A720 definitions
	arm64: cputype: Add Cortex-X925 definitions
	arm64: errata: Unify speculative SSBS errata logic
	arm64: errata: Expand speculative SSBS workaround
	arm64: cputype: Add Cortex-X1C definitions
	arm64: cputype: Add Cortex-A725 definitions
	arm64: errata: Expand speculative SSBS workaround (again)
	i2c: smbus: Improve handling of stuck alerts
	ASoC: codecs: wsa881x: Correct Soundwire ports mask
	i2c: smbus: Send alert notifications to all devices if source not found
	bpf: kprobe: remove unused declaring of bpf_kprobe_override
	kprobes: Fix to check symbol prefixes correctly
	spi: spi-fsl-lpspi: Fix scldiv calculation
	ALSA: usb-audio: Re-add ScratchAmp quirk entries
	drm/client: fix null pointer dereference in drm_client_modeset_probe
	ALSA: line6: Fix racy access to midibuf
	ALSA: hda: Add HP MP9 G4 Retail System AMS to force connect list
	ALSA: hda/hdmi: Yet more pin fix for HP EliteDesk 800 G4
	usb: vhci-hcd: Do not drop references before new references are gained
	USB: serial: debug: do not echo input by default
	usb: gadget: core: Check for unset descriptor
	usb: gadget: u_serial: Set start_delayed during suspend
	scsi: ufs: core: Fix hba->last_dme_cmd_tstamp timestamp updating logic
	tick/broadcast: Move per CPU pointer access into the atomic section
	ntp: Clamp maxerror and esterror to operating range
	driver core: Fix uevent_show() vs driver detach race
	ntp: Safeguard against time_constant overflow
	scsi: mpt3sas: Remove scsi_dma_map() error messages
	scsi: mpt3sas: Avoid IOMMU page faults on REPORT ZONES
	irqchip/meson-gpio: support more than 8 channels gpio irq
	irqchip/meson-gpio: Convert meson_gpio_irq_controller::lock to 'raw_spinlock_t'
	serial: core: check uartclk for zero to avoid divide by zero
	irqchip/xilinx: Fix shift out of bounds
	genirq/irqdesc: Honor caller provided affinity in alloc_desc()
	power: supply: axp288_charger: Fix constant_charge_voltage writes
	power: supply: axp288_charger: Round constant_charge_voltage writes down
	tracing: Fix overflow in get_free_elt()
	padata: Fix possible divide-by-0 panic in padata_mt_helper()
	x86/mtrr: Check if fixed MTRRs exist before saving them
	drm/bridge: analogix_dp: properly handle zero sized AUX transactions
	drm/mgag200: Set DDC timeout in milliseconds
	mptcp: sched: check both directions for backup
	mptcp: distinguish rcv vs sent backup flag in requests
	mptcp: fix NL PM announced address accounting
	mptcp: mib: count MPJ with backup flag
	mptcp: export local_address
	mptcp: pm: fix backup support in signal endpoints
	samples: Add fs error monitoring example
	samples: Make fs-monitor depend on libc and headers
	Add gitignore file for samples/fanotify/ subdirectory
	Fix gcc 4.9 build issue in 5.10.y
	PCI/DPC: Fix use-after-free on concurrent DPC and hot-removal
	netfilter: nf_tables: set element extended ACK reporting support
	netfilter: nf_tables: use timestamp to check for set element timeout
	netfilter: nf_tables: allow clone callbacks to sleep
	netfilter: nf_tables: prefer nft_chain_validate
	drm/i915/gem: Fix Virtual Memory mapping boundaries calculation
	powerpc: Avoid nmi_enter/nmi_exit in real mode interrupt.
	arm64: cpufeature: Fix the visibility of compat hwcaps
	media: uvcvideo: Use entity get_cur in uvc_ctrl_set
	exec: Fix ToCToU between perm check and set-uid/gid usage
	nvme/pci: Add APST quirk for Lenovo N60z laptop
	vdpa: Make use of PFN_PHYS/PFN_UP/PFN_DOWN helper macro
	vhost-vdpa: switch to use vmf_insert_pfn() in the fault handler
	wifi: cfg80211: restrict NL80211_ATTR_TXQ_QUANTUM values
	ARM: dts: imx6qdl-kontron-samx6i: fix phy-mode
	media: Revert "media: dvb-usb: Fix unexpected infinite loop in dvb_usb_read_remote_control()"
	Linux 5.10.224

Change-Id: I7cd19d506c4c86df918a280598946060a494a161
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2024-09-04 11:06:25 +00:00

1305 lines
33 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/net/sunrpc/sched.c
*
* Scheduling for synchronous and asynchronous RPC requests.
*
* Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de>
*
* TCP NFS related read + write fixes
* (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
*/
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/mempool.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/freezer.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/metrics.h>
#include "sunrpc.h"
#define CREATE_TRACE_POINTS
#include <trace/events/sunrpc.h>
/*
* RPC slabs and memory pools
*/
#define RPC_BUFFER_MAXSIZE (2048)
#define RPC_BUFFER_POOLSIZE (8)
#define RPC_TASK_POOLSIZE (8)
static struct kmem_cache *rpc_task_slabp __read_mostly;
static struct kmem_cache *rpc_buffer_slabp __read_mostly;
static mempool_t *rpc_task_mempool __read_mostly;
static mempool_t *rpc_buffer_mempool __read_mostly;
static void rpc_async_schedule(struct work_struct *);
static void rpc_release_task(struct rpc_task *task);
static void __rpc_queue_timer_fn(struct work_struct *);
/*
* RPC tasks sit here while waiting for conditions to improve.
*/
static struct rpc_wait_queue delay_queue;
/*
* rpciod-related stuff
*/
struct workqueue_struct *rpciod_workqueue __read_mostly;
struct workqueue_struct *xprtiod_workqueue __read_mostly;
EXPORT_SYMBOL_GPL(xprtiod_workqueue);
unsigned long
rpc_task_timeout(const struct rpc_task *task)
{
unsigned long timeout = READ_ONCE(task->tk_timeout);
if (timeout != 0) {
unsigned long now = jiffies;
if (time_before(now, timeout))
return timeout - now;
}
return 0;
}
EXPORT_SYMBOL_GPL(rpc_task_timeout);
/*
* Disable the timer for a given RPC task. Should be called with
* queue->lock and bh_disabled in order to avoid races within
* rpc_run_timer().
*/
static void
__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (list_empty(&task->u.tk_wait.timer_list))
return;
task->tk_timeout = 0;
list_del(&task->u.tk_wait.timer_list);
if (list_empty(&queue->timer_list.list))
cancel_delayed_work(&queue->timer_list.dwork);
}
static void
rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
{
unsigned long now = jiffies;
queue->timer_list.expires = expires;
if (time_before_eq(expires, now))
expires = 0;
else
expires -= now;
mod_delayed_work(rpciod_workqueue, &queue->timer_list.dwork, expires);
}
/*
* Set up a timer for the current task.
*/
static void
__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
unsigned long timeout)
{
task->tk_timeout = timeout;
if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires))
rpc_set_queue_timer(queue, timeout);
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
{
if (queue->priority != priority) {
queue->priority = priority;
queue->nr = 1U << priority;
}
}
static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
{
rpc_set_waitqueue_priority(queue, queue->maxpriority);
}
/*
* Add a request to a queue list
*/
static void
__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
{
struct rpc_task *t;
list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_owner == task->tk_owner) {
list_add_tail(&task->u.tk_wait.links,
&t->u.tk_wait.links);
/* Cache the queue head in task->u.tk_wait.list */
task->u.tk_wait.list.next = q;
task->u.tk_wait.list.prev = NULL;
return;
}
}
INIT_LIST_HEAD(&task->u.tk_wait.links);
list_add_tail(&task->u.tk_wait.list, q);
}
/*
* Remove request from a queue list
*/
static void
__rpc_list_dequeue_task(struct rpc_task *task)
{
struct list_head *q;
struct rpc_task *t;
if (task->u.tk_wait.list.prev == NULL) {
list_del(&task->u.tk_wait.links);
return;
}
if (!list_empty(&task->u.tk_wait.links)) {
t = list_first_entry(&task->u.tk_wait.links,
struct rpc_task,
u.tk_wait.links);
/* Assume __rpc_list_enqueue_task() cached the queue head */
q = t->u.tk_wait.list.next;
list_add_tail(&t->u.tk_wait.list, q);
list_del(&task->u.tk_wait.links);
}
list_del(&task->u.tk_wait.list);
}
/*
* Add new request to a priority queue.
*/
static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
struct rpc_task *task,
unsigned char queue_priority)
{
if (unlikely(queue_priority > queue->maxpriority))
queue_priority = queue->maxpriority;
__rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
}
/*
* Add new request to wait queue.
*/
static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
struct rpc_task *task,
unsigned char queue_priority)
{
INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task, queue_priority);
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->tk_waitqueue = queue;
queue->qlen++;
/* barrier matches the read in rpc_wake_up_task_queue_locked() */
smp_wmb();
rpc_set_queued(task);
}
/*
* Remove request from a priority queue.
*/
static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
{
__rpc_list_dequeue_task(task);
}
/*
* Remove request from queue.
* Note: must be called with spin lock held.
*/
static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
{
__rpc_disable_timer(queue, task);
if (RPC_IS_PRIORITY(queue))
__rpc_remove_wait_queue_priority(task);
else
list_del(&task->u.tk_wait.list);
queue->qlen--;
}
static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
{
int i;
spin_lock_init(&queue->lock);
for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
INIT_LIST_HEAD(&queue->tasks[i]);
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
queue->timer_list.expires = 0;
INIT_DELAYED_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
{
__rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
}
EXPORT_SYMBOL_GPL(rpc_init_priority_wait_queue);
void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
{
__rpc_init_priority_wait_queue(queue, qname, 1);
}
EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
{
cancel_delayed_work_sync(&queue->timer_list.dwork);
}
EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
freezable_schedule_unsafe();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
static void rpc_task_set_debuginfo(struct rpc_task *task)
{
static atomic_t rpc_pid;
task->tk_pid = atomic_inc_return(&rpc_pid);
}
#else
static inline void rpc_task_set_debuginfo(struct rpc_task *task)
{
}
#endif
static void rpc_set_active(struct rpc_task *task)
{
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
trace_rpc_task_begin(task, NULL);
}
/*
* Mark an RPC call as having completed by clearing the 'active' bit
* and then waking up all tasks that were sleeping.
*/
static int rpc_complete_task(struct rpc_task *task)
{
void *m = &task->tk_runstate;
wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
unsigned long flags;
int ret;
trace_rpc_task_complete(task, NULL);
spin_lock_irqsave(&wq->lock, flags);
clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
ret = atomic_dec_and_test(&task->tk_count);
if (waitqueue_active(wq))
__wake_up_locked_key(wq, TASK_NORMAL, &k);
spin_unlock_irqrestore(&wq->lock, flags);
return ret;
}
/*
* Allow callers to wait for completion of an RPC call
*
* Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
* to enforce taking of the wq->lock and hence avoid races with
* rpc_complete_task().
*/
int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
{
if (action == NULL)
action = rpc_wait_bit_killable;
return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
action, TASK_KILLABLE);
}
EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
/*
* Make an RPC task runnable.
*
* Note: If the task is ASYNC, and is being made runnable after sitting on an
* rpc_wait_queue, this must be called with the queue spinlock held to protect
* the wait queue operation.
* Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(),
* which is needed to ensure that __rpc_execute() doesn't loop (due to the
* lockless RPC_IS_QUEUED() test) before we've had a chance to test
* the RPC_TASK_RUNNING flag.
*/
static void rpc_make_runnable(struct workqueue_struct *wq,
struct rpc_task *task)
{
bool need_wakeup = !rpc_test_and_set_running(task);
rpc_clear_queued(task);
if (!need_wakeup)
return;
if (RPC_IS_ASYNC(task)) {
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
queue_work(wq, &task->u.tk_work);
} else {
smp_mb__after_atomic();
wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
}
}
/*
* Prepare for sleeping on a wait queue.
* By always appending tasks to the list we ensure FIFO behavior.
* NB: An RPC task will only receive interrupt-driven events as long
* as it's on a wait queue.
*/
static void __rpc_do_sleep_on_priority(struct rpc_wait_queue *q,
struct rpc_task *task,
unsigned char queue_priority)
{
trace_rpc_task_sleep(task, q);
__rpc_add_wait_queue(q, task, queue_priority);
}
static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
struct rpc_task *task,
unsigned char queue_priority)
{
if (WARN_ON_ONCE(RPC_IS_QUEUED(task)))
return;
__rpc_do_sleep_on_priority(q, task, queue_priority);
}
static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
struct rpc_task *task, unsigned long timeout,
unsigned char queue_priority)
{
if (WARN_ON_ONCE(RPC_IS_QUEUED(task)))
return;
if (time_is_after_jiffies(timeout)) {
__rpc_do_sleep_on_priority(q, task, queue_priority);
__rpc_add_timer(q, task, timeout);
} else
task->tk_status = -ETIMEDOUT;
}
static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action)
{
if (action && !WARN_ON_ONCE(task->tk_callback != NULL))
task->tk_callback = action;
}
static bool rpc_sleep_check_activated(struct rpc_task *task)
{
/* We shouldn't ever put an inactive task to sleep */
if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) {
task->tk_status = -EIO;
rpc_put_task_async(task);
return false;
}
return true;
}
void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, unsigned long timeout)
{
if (!rpc_sleep_check_activated(task))
return;
rpc_set_tk_callback(task, action);
/*
* Protect the queue operations.
*/
spin_lock(&q->lock);
__rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action)
{
if (!rpc_sleep_check_activated(task))
return;
rpc_set_tk_callback(task, action);
WARN_ON_ONCE(task->tk_timeout != 0);
/*
* Protect the queue operations.
*/
spin_lock(&q->lock);
__rpc_sleep_on_priority(q, task, task->tk_priority);
spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
struct rpc_task *task, unsigned long timeout, int priority)
{
if (!rpc_sleep_check_activated(task))
return;
priority -= RPC_PRIORITY_LOW;
/*
* Protect the queue operations.
*/
spin_lock(&q->lock);
__rpc_sleep_on_priority_timeout(q, task, timeout, priority);
spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
int priority)
{
if (!rpc_sleep_check_activated(task))
return;
WARN_ON_ONCE(task->tk_timeout != 0);
priority -= RPC_PRIORITY_LOW;
/*
* Protect the queue operations.
*/
spin_lock(&q->lock);
__rpc_sleep_on_priority(q, task, priority);
spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
/**
* __rpc_do_wake_up_task_on_wq - wake up a single rpc_task
* @wq: workqueue on which to run task
* @queue: wait queue
* @task: task to be woken up
*
* Caller must hold queue->lock, and have cleared the task queued flag.
*/
static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
struct rpc_wait_queue *queue,
struct rpc_task *task)
{
/* Has the task been executed yet? If not, we cannot wake it up! */
if (!RPC_IS_ACTIVATED(task)) {
printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
return;
}
trace_rpc_task_wakeup(task, queue);
__rpc_remove_wait_queue(queue, task);
rpc_make_runnable(wq, task);
}
/*
* Wake up a queued task while the queue lock is being held
*/
static struct rpc_task *
rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
struct rpc_wait_queue *queue, struct rpc_task *task,
bool (*action)(struct rpc_task *, void *), void *data)
{
if (RPC_IS_QUEUED(task)) {
smp_rmb();
if (task->tk_waitqueue == queue) {
if (action == NULL || action(task, data)) {
__rpc_do_wake_up_task_on_wq(wq, queue, task);
return task;
}
}
}
return NULL;
}
/*
* Wake up a queued task while the queue lock is being held
*/
static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue,
struct rpc_task *task)
{
rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
task, NULL, NULL);
}
/*
* Wake up a task on a specific queue
*/
void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (!RPC_IS_QUEUED(task))
return;
spin_lock(&queue->lock);
rpc_wake_up_task_queue_locked(queue, task);
spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
static bool rpc_task_action_set_status(struct rpc_task *task, void *status)
{
task->tk_status = *(int *)status;
return true;
}
static void
rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue,
struct rpc_task *task, int status)
{
rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
task, rpc_task_action_set_status, &status);
}
/**
* rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status
* @queue: pointer to rpc_wait_queue
* @task: pointer to rpc_task
* @status: integer error value
*
* If @task is queued on @queue, then it is woken up, and @task->tk_status is
* set to the value of @status.
*/
void
rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
struct rpc_task *task, int status)
{
if (!RPC_IS_QUEUED(task))
return;
spin_lock(&queue->lock);
rpc_wake_up_task_queue_set_status_locked(queue, task, status);
spin_unlock(&queue->lock);
}
/*
* Wake up the next task on a priority queue.
*/
static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue)
{
struct list_head *q;
struct rpc_task *task;
/*
* Service the privileged queue.
*/
q = &queue->tasks[RPC_NR_PRIORITY - 1];
if (queue->maxpriority > RPC_PRIORITY_PRIVILEGED && !list_empty(q)) {
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto out;
}
/*
* Service a batch of tasks from a single owner.
*/
q = &queue->tasks[queue->priority];
if (!list_empty(q) && queue->nr) {
queue->nr--;
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto out;
}
/*
* Service the next queue.
*/
do {
if (q == &queue->tasks[0])
q = &queue->tasks[queue->maxpriority];
else
q = q - 1;
if (!list_empty(q)) {
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto new_queue;
}
} while (q != &queue->tasks[queue->priority]);
rpc_reset_waitqueue_priority(queue);
return NULL;
new_queue:
rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
out:
return task;
}
static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue)
{
if (RPC_IS_PRIORITY(queue))
return __rpc_find_next_queued_priority(queue);
if (!list_empty(&queue->tasks[0]))
return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list);
return NULL;
}
/*
* Wake up the first task on the wait queue.
*/
struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
struct rpc_wait_queue *queue,
bool (*func)(struct rpc_task *, void *), void *data)
{
struct rpc_task *task = NULL;
spin_lock(&queue->lock);
task = __rpc_find_next_queued(queue);
if (task != NULL)
task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
task, func, data);
spin_unlock(&queue->lock);
return task;
}
/*
* Wake up the first task on the wait queue.
*/
struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue,
bool (*func)(struct rpc_task *, void *), void *data)
{
return rpc_wake_up_first_on_wq(rpciod_workqueue, queue, func, data);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_first);
static bool rpc_wake_up_next_func(struct rpc_task *task, void *data)
{
return true;
}
/*
* Wake up the next task on the wait queue.
*/
struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue)
{
return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_next);
/**
* rpc_wake_up_locked - wake up all rpc_tasks
* @queue: rpc_wait_queue on which the tasks are sleeping
*
*/
static void rpc_wake_up_locked(struct rpc_wait_queue *queue)
{
struct rpc_task *task;
for (;;) {
task = __rpc_find_next_queued(queue);
if (task == NULL)
break;
rpc_wake_up_task_queue_locked(queue, task);
}
}
/**
* rpc_wake_up - wake up all rpc_tasks
* @queue: rpc_wait_queue on which the tasks are sleeping
*
* Grabs queue->lock
*/
void rpc_wake_up(struct rpc_wait_queue *queue)
{
spin_lock(&queue->lock);
rpc_wake_up_locked(queue);
spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up);
/**
* rpc_wake_up_status_locked - wake up all rpc_tasks and set their status value.
* @queue: rpc_wait_queue on which the tasks are sleeping
* @status: status value to set
*/
static void rpc_wake_up_status_locked(struct rpc_wait_queue *queue, int status)
{
struct rpc_task *task;
for (;;) {
task = __rpc_find_next_queued(queue);
if (task == NULL)
break;
rpc_wake_up_task_queue_set_status_locked(queue, task, status);
}
}
/**
* rpc_wake_up_status - wake up all rpc_tasks and set their status value.
* @queue: rpc_wait_queue on which the tasks are sleeping
* @status: status value to set
*
* Grabs queue->lock
*/
void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
spin_lock(&queue->lock);
rpc_wake_up_status_locked(queue, status);
spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);
static void __rpc_queue_timer_fn(struct work_struct *work)
{
struct rpc_wait_queue *queue = container_of(work,
struct rpc_wait_queue,
timer_list.dwork.work);
struct rpc_task *task, *n;
unsigned long expires, now, timeo;
spin_lock(&queue->lock);
expires = now = jiffies;
list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
timeo = task->tk_timeout;
if (time_after_eq(now, timeo)) {
trace_rpc_task_timeout(task, task->tk_action);
task->tk_status = -ETIMEDOUT;
rpc_wake_up_task_queue_locked(queue, task);
continue;
}
if (expires == now || time_after(expires, timeo))
expires = timeo;
}
if (!list_empty(&queue->timer_list.list))
rpc_set_queue_timer(queue, expires);
spin_unlock(&queue->lock);
}
static void __rpc_atrun(struct rpc_task *task)
{
if (task->tk_status == -ETIMEDOUT)
task->tk_status = 0;
}
/*
* Run a task at a later time
*/
void rpc_delay(struct rpc_task *task, unsigned long delay)
{
rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay);
}
EXPORT_SYMBOL_GPL(rpc_delay);
/*
* Helper to call task->tk_ops->rpc_call_prepare
*/
void rpc_prepare_task(struct rpc_task *task)
{
task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
}
static void
rpc_init_task_statistics(struct rpc_task *task)
{
/* Initialize retry counters */
task->tk_garb_retry = 2;
task->tk_cred_retry = 2;
/* starting timestamp */
task->tk_start = ktime_get();
}
static void
rpc_reset_task_statistics(struct rpc_task *task)
{
task->tk_timeouts = 0;
task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT);
rpc_init_task_statistics(task);
}
/*
* Helper that calls task->tk_ops->rpc_call_done if it exists
*/
void rpc_exit_task(struct rpc_task *task)
{
trace_rpc_task_end(task, task->tk_action);
task->tk_action = NULL;
if (task->tk_ops->rpc_count_stats)
task->tk_ops->rpc_count_stats(task, task->tk_calldata);
else if (task->tk_client)
rpc_count_iostats(task, task->tk_client->cl_metrics);
if (task->tk_ops->rpc_call_done != NULL) {
task->tk_ops->rpc_call_done(task, task->tk_calldata);
if (task->tk_action != NULL) {
/* Always release the RPC slot and buffer memory */
xprt_release(task);
rpc_reset_task_statistics(task);
}
}
}
void rpc_signal_task(struct rpc_task *task)
{
struct rpc_wait_queue *queue;
if (!RPC_IS_ACTIVATED(task))
return;
trace_rpc_task_signalled(task, task->tk_action);
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
smp_mb__after_atomic();
queue = READ_ONCE(task->tk_waitqueue);
if (queue)
rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
}
void rpc_exit(struct rpc_task *task, int status)
{
task->tk_status = status;
task->tk_action = rpc_exit_task;
rpc_wake_up_queued_task(task->tk_waitqueue, task);
}
EXPORT_SYMBOL_GPL(rpc_exit);
void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
{
if (ops->rpc_release != NULL)
ops->rpc_release(calldata);
}
/*
* This is the RPC `scheduler' (or rather, the finite state machine).
*/
static void __rpc_execute(struct rpc_task *task)
{
struct rpc_wait_queue *queue;
int task_is_async = RPC_IS_ASYNC(task);
int status = 0;
WARN_ON_ONCE(RPC_IS_QUEUED(task));
if (RPC_IS_QUEUED(task))
return;
for (;;) {
void (*do_action)(struct rpc_task *);
/*
* Perform the next FSM step or a pending callback.
*
* tk_action may be NULL if the task has been killed.
* In particular, note that rpc_killall_tasks may
* do this at any time, so beware when dereferencing.
*/
do_action = task->tk_action;
if (task->tk_callback) {
do_action = task->tk_callback;
task->tk_callback = NULL;
}
if (!do_action)
break;
trace_rpc_task_run_action(task, do_action);
do_action(task);
/*
* Lockless check for whether task is sleeping or not.
*/
if (!RPC_IS_QUEUED(task))
continue;
/*
* Signalled tasks should exit rather than sleep.
*/
if (RPC_SIGNALLED(task)) {
task->tk_rpc_status = -ERESTARTSYS;
rpc_exit(task, -ERESTARTSYS);
}
/*
* The queue->lock protects against races with
* rpc_make_runnable().
*
* Note that once we clear RPC_TASK_RUNNING on an asynchronous
* rpc_task, rpc_make_runnable() can assign it to a
* different workqueue. We therefore cannot assume that the
* rpc_task pointer may still be dereferenced.
*/
queue = task->tk_waitqueue;
spin_lock(&queue->lock);
if (!RPC_IS_QUEUED(task)) {
spin_unlock(&queue->lock);
continue;
}
rpc_clear_running(task);
spin_unlock(&queue->lock);
if (task_is_async)
return;
/* sync task: sleep here */
trace_rpc_task_sync_sleep(task, task->tk_action);
status = out_of_line_wait_on_bit(&task->tk_runstate,
RPC_TASK_QUEUED, rpc_wait_bit_killable,
TASK_KILLABLE);
if (status < 0) {
/*
* When a sync task receives a signal, it exits with
* -ERESTARTSYS. In order to catch any callbacks that
* clean up after sleeping on some queue, we don't
* break the loop here, but go around once more.
*/
trace_rpc_task_signalled(task, task->tk_action);
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
task->tk_rpc_status = -ERESTARTSYS;
rpc_exit(task, -ERESTARTSYS);
}
trace_rpc_task_sync_wake(task, task->tk_action);
}
/* Release all resources associated with the task */
rpc_release_task(task);
}
/*
* User-visible entry point to the scheduler.
*
* This may be called recursively if e.g. an async NFS task updates
* the attributes and finds that dirty pages must be flushed.
* NOTE: Upon exit of this function the task is guaranteed to be
* released. In particular note that tk_release() will have
* been called, so your task memory may have been freed.
*/
void rpc_execute(struct rpc_task *task)
{
bool is_async = RPC_IS_ASYNC(task);
rpc_set_active(task);
rpc_make_runnable(rpciod_workqueue, task);
if (!is_async) {
unsigned int pflags = memalloc_nofs_save();
__rpc_execute(task);
memalloc_nofs_restore(pflags);
}
}
static void rpc_async_schedule(struct work_struct *work)
{
unsigned int pflags = memalloc_nofs_save();
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
memalloc_nofs_restore(pflags);
}
/**
* rpc_malloc - allocate RPC buffer resources
* @task: RPC task
*
* A single memory region is allocated, which is split between the
* RPC call and RPC reply that this task is being used for. When
* this RPC is retired, the memory is released by calling rpc_free.
*
* To prevent rpciod from hanging, this allocator never sleeps,
* returning -ENOMEM and suppressing warning if the request cannot
* be serviced immediately. The caller can arrange to sleep in a
* way that is safe for rpciod.
*
* Most requests are 'small' (under 2KiB) and can be serviced from a
* mempool, ensuring that NFS reads and writes can always proceed,
* and that there is good locality of reference for these buffers.
*/
int rpc_malloc(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
struct rpc_buffer *buf;
gfp_t gfp = GFP_NOFS;
if (RPC_IS_ASYNC(task))
gfp = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
gfp |= __GFP_MEMALLOC;
size += sizeof(struct rpc_buffer);
if (size <= RPC_BUFFER_MAXSIZE)
buf = mempool_alloc(rpc_buffer_mempool, gfp);
else
buf = kmalloc(size, gfp);
if (!buf)
return -ENOMEM;
buf->len = size;
rqst->rq_buffer = buf->data;
rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
return 0;
}
EXPORT_SYMBOL_GPL(rpc_malloc);
/**
* rpc_free - free RPC buffer resources allocated via rpc_malloc
* @task: RPC task
*
*/
void rpc_free(struct rpc_task *task)
{
void *buffer = task->tk_rqstp->rq_buffer;
size_t size;
struct rpc_buffer *buf;
buf = container_of(buffer, struct rpc_buffer, data);
size = buf->len;
if (size <= RPC_BUFFER_MAXSIZE)
mempool_free(buf, rpc_buffer_mempool);
else
kfree(buf);
}
EXPORT_SYMBOL_GPL(rpc_free);
/*
* Creation and deletion of RPC task structures
*/
static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
{
memset(task, 0, sizeof(*task));
atomic_set(&task->tk_count, 1);
task->tk_flags = task_setup_data->flags;
task->tk_ops = task_setup_data->callback_ops;
task->tk_calldata = task_setup_data->callback_data;
INIT_LIST_HEAD(&task->tk_task);
task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
task->tk_owner = current->tgid;
/* Initialize workqueue for async tasks */
task->tk_workqueue = task_setup_data->workqueue;
task->tk_xprt = rpc_task_get_xprt(task_setup_data->rpc_client,
xprt_get(task_setup_data->rpc_xprt));
task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
if (task->tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
rpc_init_task_statistics(task);
}
static struct rpc_task *
rpc_alloc_task(void)
{
return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
}
/*
* Create a new task for the specified client.
*/
struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
{
struct rpc_task *task = setup_data->task;
unsigned short flags = 0;
if (task == NULL) {
task = rpc_alloc_task();
flags = RPC_TASK_DYNAMIC;
}
rpc_init_task(task, setup_data);
task->tk_flags |= flags;
return task;
}
/*
* rpc_free_task - release rpc task and perform cleanups
*
* Note that we free up the rpc_task _after_ rpc_release_calldata()
* in order to work around a workqueue dependency issue.
*
* Tejun Heo states:
* "Workqueue currently considers two work items to be the same if they're
* on the same address and won't execute them concurrently - ie. it
* makes a work item which is queued again while being executed wait
* for the previous execution to complete.
*
* If a work function frees the work item, and then waits for an event
* which should be performed by another work item and *that* work item
* recycles the freed work item, it can create a false dependency loop.
* There really is no reliable way to detect this short of verifying
* every memory free."
*
*/
static void rpc_free_task(struct rpc_task *task)
{
unsigned short tk_flags = task->tk_flags;
put_rpccred(task->tk_op_cred);
rpc_release_calldata(task->tk_ops, task->tk_calldata);
if (tk_flags & RPC_TASK_DYNAMIC)
mempool_free(task, rpc_task_mempool);
}
static void rpc_async_release(struct work_struct *work)
{
unsigned int pflags = memalloc_nofs_save();
rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
memalloc_nofs_restore(pflags);
}
static void rpc_release_resources_task(struct rpc_task *task)
{
xprt_release(task);
if (task->tk_msg.rpc_cred) {
if (!(task->tk_flags & RPC_TASK_CRED_NOREF))
put_cred(task->tk_msg.rpc_cred);
task->tk_msg.rpc_cred = NULL;
}
rpc_task_release_client(task);
}
static void rpc_final_put_task(struct rpc_task *task,
struct workqueue_struct *q)
{
if (q != NULL) {
INIT_WORK(&task->u.tk_work, rpc_async_release);
queue_work(q, &task->u.tk_work);
} else
rpc_free_task(task);
}
static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
{
if (atomic_dec_and_test(&task->tk_count)) {
rpc_release_resources_task(task);
rpc_final_put_task(task, q);
}
}
void rpc_put_task(struct rpc_task *task)
{
rpc_do_put_task(task, NULL);
}
EXPORT_SYMBOL_GPL(rpc_put_task);
void rpc_put_task_async(struct rpc_task *task)
{
rpc_do_put_task(task, task->tk_workqueue);
}
EXPORT_SYMBOL_GPL(rpc_put_task_async);
static void rpc_release_task(struct rpc_task *task)
{
WARN_ON_ONCE(RPC_IS_QUEUED(task));
rpc_release_resources_task(task);
/*
* Note: at this point we have been removed from rpc_clnt->cl_tasks,
* so it should be safe to use task->tk_count as a test for whether
* or not any other processes still hold references to our rpc_task.
*/
if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
/* Wake up anyone who may be waiting for task completion */
if (!rpc_complete_task(task))
return;
} else {
if (!atomic_dec_and_test(&task->tk_count))
return;
}
rpc_final_put_task(task, task->tk_workqueue);
}
int rpciod_up(void)
{
return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
}
void rpciod_down(void)
{
module_put(THIS_MODULE);
}
/*
* Start up the rpciod workqueue.
*/
static int rpciod_start(void)
{
struct workqueue_struct *wq;
/*
* Create the rpciod thread and wait for it to start.
*/
wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!wq)
goto out_failed;
rpciod_workqueue = wq;
/* Note: highpri because network receive is latency sensitive */
wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0);
if (!wq)
goto free_rpciod;
xprtiod_workqueue = wq;
return 1;
free_rpciod:
wq = rpciod_workqueue;
rpciod_workqueue = NULL;
destroy_workqueue(wq);
out_failed:
return 0;
}
static void rpciod_stop(void)
{
struct workqueue_struct *wq = NULL;
if (rpciod_workqueue == NULL)
return;
wq = rpciod_workqueue;
rpciod_workqueue = NULL;
destroy_workqueue(wq);
wq = xprtiod_workqueue;
xprtiod_workqueue = NULL;
destroy_workqueue(wq);
}
void
rpc_destroy_mempool(void)
{
rpciod_stop();
mempool_destroy(rpc_buffer_mempool);
mempool_destroy(rpc_task_mempool);
kmem_cache_destroy(rpc_task_slabp);
kmem_cache_destroy(rpc_buffer_slabp);
rpc_destroy_wait_queue(&delay_queue);
}
int
rpc_init_mempool(void)
{
/*
* The following is not strictly a mempool initialisation,
* but there is no harm in doing it here
*/
rpc_init_wait_queue(&delay_queue, "delayq");
if (!rpciod_start())
goto err_nomem;
rpc_task_slabp = kmem_cache_create("rpc_tasks",
sizeof(struct rpc_task),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (!rpc_task_slabp)
goto err_nomem;
rpc_buffer_slabp = kmem_cache_create("rpc_buffers",
RPC_BUFFER_MAXSIZE,
0, SLAB_HWCACHE_ALIGN,
NULL);
if (!rpc_buffer_slabp)
goto err_nomem;
rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE,
rpc_task_slabp);
if (!rpc_task_mempool)
goto err_nomem;
rpc_buffer_mempool = mempool_create_slab_pool(RPC_BUFFER_POOLSIZE,
rpc_buffer_slabp);
if (!rpc_buffer_mempool)
goto err_nomem;
return 0;
err_nomem:
rpc_destroy_mempool();
return -ENOMEM;
}