android_kernel_samsung_sm8650/kernel/sched/cpupri.c
Greg Kroah-Hartman 2950de8b2d Merge 6.1.56 into android14-6.1-lts
Changes in 6.1.56
	NFS: Fix error handling for O_DIRECT write scheduling
	NFS: Fix O_DIRECT locking issues
	NFS: More O_DIRECT accounting fixes for error paths
	NFS: Use the correct commit info in nfs_join_page_group()
	NFS: More fixes for nfs_direct_write_reschedule_io()
	NFS/pNFS: Report EINVAL errors from connect() to the server
	SUNRPC: Mark the cred for revalidation if the server rejects it
	NFSv4.1: use EXCHGID4_FLAG_USE_PNFS_DS for DS server
	NFSv4.1: fix pnfs MDS=DS session trunking
	media: v4l: Use correct dependency for camera sensor drivers
	media: via: Use correct dependency for camera sensor drivers
	netfs: Only call folio_start_fscache() one time for each folio
	dm: fix a race condition in retrieve_deps
	btrfs: improve error message after failure to add delayed dir index item
	btrfs: remove BUG() after failure to insert delayed dir index item
	ext4: replace the traditional ternary conditional operator with with max()/min()
	ext4: move setting of trimmed bit into ext4_try_to_trim_range()
	ext4: do not let fstrim block system suspend
	netfilter: nf_tables: don't skip expired elements during walk
	netfilter: nf_tables: GC transaction API to avoid race with control plane
	netfilter: nf_tables: adapt set backend to use GC transaction API
	netfilter: nft_set_hash: mark set element as dead when deleting from packet path
	netfilter: nf_tables: remove busy mark and gc batch API
	netfilter: nf_tables: don't fail inserts if duplicate has expired
	netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path
	netfilter: nf_tables: GC transaction race with netns dismantle
	netfilter: nf_tables: GC transaction race with abort path
	netfilter: nf_tables: use correct lock to protect gc_list
	netfilter: nf_tables: defer gc run if previous batch is still pending
	netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction
	netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention
	netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC
	netfilter: nft_set_pipapo: stop GC iteration if GC transaction allocation fails
	netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration
	netfilter: nf_tables: fix memleak when more than 255 elements expired
	ASoC: meson: spdifin: start hw on dai probe
	netfilter: nf_tables: disallow element removal on anonymous sets
	bpf: Avoid deadlock when using queue and stack maps from NMI
	ASoC: rt5640: Revert "Fix sleep in atomic context"
	ASoC: rt5640: Fix IRQ not being free-ed for HDA jack detect mode
	ALSA: hda/realtek: Splitting the UX3402 into two separate models
	netfilter: conntrack: fix extension size table
	selftests: tls: swap the TX and RX sockets in some tests
	net/core: Fix ETH_P_1588 flow dissector
	ASoC: hdaudio.c: Add missing check for devm_kstrdup
	ASoC: imx-audmix: Fix return error with devm_clk_get()
	octeon_ep: fix tx dma unmap len values in SG
	iavf: do not process adminq tasks when __IAVF_IN_REMOVE_TASK is set
	ASoC: SOF: core: Only call sof_ops_free() on remove if the probe was successful
	iavf: add iavf_schedule_aq_request() helper
	iavf: schedule a request immediately after add/delete vlan
	i40e: Fix VF VLAN offloading when port VLAN is configured
	netfilter, bpf: Adjust timeouts of non-confirmed CTs in bpf_ct_insert_entry()
	ionic: fix 16bit math issue when PAGE_SIZE >= 64KB
	igc: Fix infinite initialization loop with early XDP redirect
	ipv4: fix null-deref in ipv4_link_failure
	scsi: iscsi_tcp: restrict to TCP sockets
	powerpc/perf/hv-24x7: Update domain value check
	dccp: fix dccp_v4_err()/dccp_v6_err() again
	x86/mm, kexec, ima: Use memblock_free_late() from ima_free_kexec_buffer()
	net: hsr: Properly parse HSRv1 supervisor frames.
	platform/x86: intel_scu_ipc: Check status after timeout in busy_loop()
	platform/x86: intel_scu_ipc: Check status upon timeout in ipc_wait_for_interrupt()
	platform/x86: intel_scu_ipc: Don't override scu in intel_scu_ipc_dev_simple_command()
	platform/x86: intel_scu_ipc: Fail IPC send if still busy
	x86/srso: Fix srso_show_state() side effect
	x86/srso: Fix SBPB enablement for spec_rstack_overflow=off
	net: hns3: add cmdq check for vf periodic service task
	net: hns3: fix GRE checksum offload issue
	net: hns3: only enable unicast promisc when mac table full
	net: hns3: fix fail to delete tc flower rules during reset issue
	net: hns3: add 5ms delay before clear firmware reset irq source
	net: bridge: use DEV_STATS_INC()
	team: fix null-ptr-deref when team device type is changed
	net: rds: Fix possible NULL-pointer dereference
	netfilter: nf_tables: disable toggling dormant table state more than once
	netfilter: ipset: Fix race between IPSET_CMD_CREATE and IPSET_CMD_SWAP
	i915/pmu: Move execlist stats initialization to execlist specific setup
	locking/seqlock: Do the lockdep annotation before locking in do_write_seqcount_begin_nested()
	net: ena: Flush XDP packets on error.
	bnxt_en: Flush XDP for bnxt_poll_nitroa0()'s NAPI
	octeontx2-pf: Do xdp_do_flush() after redirects.
	igc: Expose tx-usecs coalesce setting to user
	proc: nommu: /proc/<pid>/maps: release mmap read lock
	proc: nommu: fix empty /proc/<pid>/maps
	cifs: Fix UAF in cifs_demultiplex_thread()
	gpio: tb10x: Fix an error handling path in tb10x_gpio_probe()
	i2c: mux: demux-pinctrl: check the return value of devm_kstrdup()
	i2c: mux: gpio: Add missing fwnode_handle_put()
	i2c: xiic: Correct return value check for xiic_reinit()
	ARM: dts: BCM5301X: Extend RAM to full 256MB for Linksys EA6500 V2
	ARM: dts: samsung: exynos4210-i9100: Fix LCD screen's physical size
	ARM: dts: qcom: msm8974pro-castor: correct inverted X of touchscreen
	ARM: dts: qcom: msm8974pro-castor: correct touchscreen function names
	ARM: dts: qcom: msm8974pro-castor: correct touchscreen syna,nosleep-mode
	f2fs: optimize iteration over sparse directories
	f2fs: get out of a repeat loop when getting a locked data page
	s390/pkey: fix PKEY_TYPE_EP11_AES handling in PKEY_CLR2SECK2 IOCTL
	arm64: dts: qcom: sdm845-db845c: Mark cont splash memory region as reserved
	wifi: ath11k: fix tx status reporting in encap offload mode
	wifi: ath11k: Cleanup mac80211 references on failure during tx_complete
	scsi: qla2xxx: Select qpair depending on which CPU post_cmd() gets called
	scsi: qla2xxx: Use raw_smp_processor_id() instead of smp_processor_id()
	drm/amdkfd: Flush TLB after unmapping for GFX v9.4.3
	drm/amdkfd: Insert missing TLB flush on GFX10 and later
	btrfs: reset destination buffer when read_extent_buffer() gets invalid range
	vfio/mdev: Fix a null-ptr-deref bug for mdev_unregister_parent()
	MIPS: Alchemy: only build mmc support helpers if au1xmmc is enabled
	spi: spi-gxp: BUG: Correct spi write return value
	drm/bridge: ti-sn65dsi83: Do not generate HFP/HBP/HSA and EOT packet
	bus: ti-sysc: Use fsleep() instead of usleep_range() in sysc_reset()
	bus: ti-sysc: Fix missing AM35xx SoC matching
	firmware: arm_scmi: Harden perf domain info access
	firmware: arm_scmi: Fixup perf power-cost/microwatt support
	power: supply: mt6370: Fix missing error code in mt6370_chg_toggle_cfo()
	clk: sprd: Fix thm_parents incorrect configuration
	clk: tegra: fix error return case for recalc_rate
	ARM: dts: omap: correct indentation
	ARM: dts: ti: omap: Fix bandgap thermal cells addressing for omap3/4
	ARM: dts: Unify pwm-omap-dmtimer node names
	ARM: dts: Unify pinctrl-single pin group nodes for omap4
	ARM: dts: ti: omap: motorola-mapphone: Fix abe_clkctrl warning on boot
	bus: ti-sysc: Fix SYSC_QUIRK_SWSUP_SIDLE_ACT handling for uart wake-up
	power: supply: ucs1002: fix error code in ucs1002_get_property()
	firmware: imx-dsp: Fix an error handling path in imx_dsp_setup_channels()
	xtensa: add default definition for XCHAL_HAVE_DIV32
	xtensa: iss/network: make functions static
	xtensa: boot: don't add include-dirs
	xtensa: umulsidi3: fix conditional expression
	xtensa: boot/lib: fix function prototypes
	power: supply: rk817: Fix node refcount leak
	selftests/powerpc: Use CLEAN macro to fix make warning
	selftests/powerpc: Pass make context to children
	selftests/powerpc: Fix emit_tests to work with run_kselftest.sh
	soc: imx8m: Enable OCOTP clock for imx8mm before reading registers
	arm64: dts: imx: Add imx8mm-prt8mm.dtb to build
	firmware: arm_ffa: Don't set the memory region attributes for MEM_LEND
	gpio: pmic-eic-sprd: Add can_sleep flag for PMIC EIC chip
	i2c: npcm7xx: Fix callback completion ordering
	x86/reboot: VMCLEAR active VMCSes before emergency reboot
	ceph: drop messages from MDS when unmounting
	dma-debug: don't call __dma_entry_alloc_check_leak() under free_entries_lock
	bpf: Annotate bpf_long_memcpy with data_race
	spi: sun6i: reduce DMA RX transfer width to single byte
	spi: sun6i: fix race between DMA RX transfer completion and RX FIFO drain
	nvme-fc: Prevent null pointer dereference in nvme_fc_io_getuuid()
	parisc: sba: Fix compile warning wrt list of SBA devices
	parisc: iosapic.c: Fix sparse warnings
	parisc: drivers: Fix sparse warning
	parisc: irq: Make irq_stack_union static to avoid sparse warning
	scsi: qedf: Add synchronization between I/O completions and abort
	scsi: ufs: core: Move __ufshcd_send_uic_cmd() outside host_lock
	scsi: ufs: core: Poll HCS.UCRDY before issuing a UIC command
	selftests/ftrace: Correctly enable event in instance-event.tc
	ring-buffer: Avoid softlockup in ring_buffer_resize()
	btrfs: assert delayed node locked when removing delayed item
	selftests: fix dependency checker script
	ring-buffer: Do not attempt to read past "commit"
	net/smc: bugfix for smcr v2 server connect success statistic
	ata: sata_mv: Fix incorrect string length computation in mv_dump_mem()
	platform/mellanox: mlxbf-bootctl: add NET dependency into Kconfig
	platform/x86: asus-wmi: Support 2023 ROG X16 tablet mode
	thermal/of: add missing of_node_put()
	drm/amd/display: Don't check registers, if using AUX BL control
	drm/amdgpu/soc21: don't remap HDP registers for SR-IOV
	drm/amdgpu/nbio4.3: set proper rmmio_remap.reg_offset for SR-IOV
	drm/amdgpu: Handle null atom context in VBIOS info ioctl
	riscv: errata: fix T-Head dcache.cva encoding
	scsi: pm80xx: Use phy-specific SAS address when sending PHY_START command
	scsi: pm80xx: Avoid leaking tags when processing OPC_INB_SET_CONTROLLER_CONFIG command
	smb3: correct places where ENOTSUPP is used instead of preferred EOPNOTSUPP
	ata: libata-eh: do not clear ATA_PFLAG_EH_PENDING in ata_eh_reset()
	spi: nxp-fspi: reset the FLSHxCR1 registers
	spi: stm32: add a delay before SPI disable
	ASoC: fsl: imx-pcm-rpmsg: Add SNDRV_PCM_INFO_BATCH flag
	spi: intel-pci: Add support for Granite Rapids SPI serial flash
	bpf: Clarify error expectations from bpf_clone_redirect
	ALSA: hda: intel-sdw-acpi: Use u8 type for link index
	ASoC: cs42l42: Ensure a reset pulse meets minimum pulse width.
	ASoC: cs42l42: Don't rely on GPIOD_OUT_LOW to set RESET initially low
	firmware: cirrus: cs_dsp: Only log list of algorithms in debug build
	memblock tests: fix warning: "__ALIGN_KERNEL" redefined
	memblock tests: fix warning ‘struct seq_file’ declared inside parameter list
	ASoC: imx-rpmsg: Set ignore_pmdown_time for dai_link
	media: vb2: frame_vector.c: replace WARN_ONCE with a comment
	NFSv4.1: fix zero value filehandle in post open getattr
	ASoC: SOF: Intel: MTL: Reduce the DSP init timeout
	powerpc/watchpoints: Disable preemption in thread_change_pc()
	powerpc/watchpoint: Disable pagefaults when getting user instruction
	powerpc/watchpoints: Annotate atomic context in more places
	ncsi: Propagate carrier gain/loss events to the NCSI controller
	net: hsr: Add __packed to struct hsr_sup_tlv.
	tsnep: Fix NAPI scheduling
	tsnep: Fix NAPI polling with budget 0
	LoongArch: Set all reserved memblocks on Node#0 at initialization
	fbdev/sh7760fb: Depend on FB=y
	perf build: Define YYNOMEM as YYNOABORT for bison < 3.81
	nvme-pci: factor the iod mempool creation into a helper
	nvme-pci: factor out a nvme_pci_alloc_dev helper
	nvme-pci: do not set the NUMA node of device if it has none
	wifi: ath11k: Don't drop tx_status when peer cannot be found
	scsi: qla2xxx: Fix NULL pointer dereference in target mode
	nvme-pci: always return an ERR_PTR from nvme_pci_alloc_dev
	smack: Record transmuting in smk_transmuted
	smack: Retrieve transmuting information in smack_inode_getsecurity()
	iommu/arm-smmu-v3: Fix soft lockup triggered by arm_smmu_mm_invalidate_range
	x86/sgx: Resolves SECS reclaim vs. page fault for EAUG race
	x86/srso: Add SRSO mitigation for Hygon processors
	KVM: SVM: INTERCEPT_RDTSCP is never intercepted anyway
	KVM: SVM: Fix TSC_AUX virtualization setup
	KVM: x86/mmu: Open code leaf invalidation from mmu_notifier
	KVM: x86/mmu: Do not filter address spaces in for_each_tdp_mmu_root_yield_safe()
	mptcp: fix bogus receive window shrinkage with multiple subflows
	misc: rtsx: Fix some platforms can not boot and move the l1ss judgment to probe
	Revert "tty: n_gsm: fix UAF in gsm_cleanup_mux"
	serial: 8250_port: Check IRQ data before use
	nilfs2: fix potential use after free in nilfs_gccache_submit_read_data()
	netfilter: nf_tables: disallow rule removal from chain binding
	ALSA: hda: Disable power save for solving pop issue on Lenovo ThinkCentre M70q
	LoongArch: Define relocation types for ABI v2.10
	LoongArch: numa: Fix high_memory calculation
	ata: libata-scsi: link ata port and scsi device
	ata: libata-scsi: ignore reserved bits for REPORT SUPPORTED OPERATION CODES
	io_uring/fs: remove sqe->rw_flags checking from LINKAT
	i2c: i801: unregister tco_pdev in i801_probe() error path
	ASoC: amd: yc: Fix non-functional mic on Lenovo 82QF and 82UG
	kernel/sched: Modify initial boot task idle setup
	sched/rt: Fix live lock between select_fallback_rq() and RT push
	netfilter: nf_tables: fix kdoc warnings after gc rework
	Revert "SUNRPC dont update timeout value on connection reset"
	timers: Tag (hr)timer softirq as hotplug safe
	drm/tests: Fix incorrect argument in drm_test_mm_insert_range
	arm64: defconfig: remove CONFIG_COMMON_CLK_NPCM8XX=y
	mm/damon/vaddr-test: fix memory leak in damon_do_test_apply_three_regions()
	mm/slab_common: fix slab_caches list corruption after kmem_cache_destroy()
	mm: memcontrol: fix GFP_NOFS recursion in memory.high enforcement
	ring-buffer: Update "shortest_full" in polling
	btrfs: properly report 0 avail for very full file systems
	media: uvcvideo: Fix OOB read
	bpf: Add override check to kprobe multi link attach
	bpf: Fix BTF_ID symbol generation collision
	bpf: Fix BTF_ID symbol generation collision in tools/
	net: thunderbolt: Fix TCPv6 GSO checksum calculation
	fs/smb/client: Reset password pointer to NULL
	ata: libata-core: Fix ata_port_request_pm() locking
	ata: libata-core: Fix port and device removal
	ata: libata-core: Do not register PM operations for SAS ports
	ata: libata-sata: increase PMP SRST timeout to 10s
	drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top
	power: supply: rk817: Add missing module alias
	power: supply: ab8500: Set typing and props
	fs: binfmt_elf_efpic: fix personality for ELF-FDPIC
	drm/amdkfd: Use gpu_offset for user queue's wptr
	drm/meson: fix memory leak on ->hpd_notify callback
	memcg: drop kmem.limit_in_bytes
	mm, memcg: reconsider kmem.limit_in_bytes deprecation
	ASoC: amd: yc: Fix a non-functional mic on Lenovo 82TL
	Linux 6.1.56

Change-Id: Id110614d91d6d60fb6c7622c5af82f219a84a30f
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2023-10-27 09:17:04 +00:00

318 lines
8.6 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/sched/cpupri.c
*
* CPU priority management
*
* Copyright (C) 2007-2008 Novell
*
* Author: Gregory Haskins <ghaskins@novell.com>
*
* This code tracks the priority of each CPU so that global migration
* decisions are easy to calculate. Each CPU can be in a state as follows:
*
* (INVALID), NORMAL, RT1, ... RT99, HIGHER
*
* going from the lowest priority to the highest. CPUs in the INVALID state
* are not eligible for routing. The system maintains this state with
* a 2 dimensional bitmap (the first for priority class, the second for CPUs
* in that class). Therefore a typical application without affinity
* restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
* searches). For tasks with affinity restrictions, the algorithm has a
* worst case complexity of O(min(101, nr_domcpus)), though the scenario that
* yields the worst case search is fairly contrived.
*/
/*
* p->rt_priority p->prio newpri cpupri
*
* -1 -1 (CPUPRI_INVALID)
*
* 99 0 (CPUPRI_NORMAL)
*
* 1 98 98 1
* ...
* 49 50 50 49
* 50 49 49 50
* ...
* 99 0 0 99
*
* 100 100 (CPUPRI_HIGHER)
*/
static int convert_prio(int prio)
{
int cpupri;
switch (prio) {
case CPUPRI_INVALID:
cpupri = CPUPRI_INVALID; /* -1 */
break;
case 0 ... 98:
cpupri = MAX_RT_PRIO-1 - prio; /* 1 ... 99 */
break;
case MAX_RT_PRIO-1:
cpupri = CPUPRI_NORMAL; /* 0 */
break;
case MAX_RT_PRIO:
cpupri = CPUPRI_HIGHER; /* 100 */
break;
}
return cpupri;
}
static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
struct cpumask *lowest_mask, int idx)
{
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
int skip = 0;
if (!atomic_read(&(vec)->count))
skip = 1;
/*
* When looking at the vector, we need to read the counter,
* do a memory barrier, then read the mask.
*
* Note: This is still all racy, but we can deal with it.
* Ideally, we only want to look at masks that are set.
*
* If a mask is not set, then the only thing wrong is that we
* did a little more work than necessary.
*
* If we read a zero count but the mask is set, because of the
* memory barriers, that can only happen when the highest prio
* task for a run queue has left the run queue, in which case,
* it will be followed by a pull. If the task we are processing
* fails to find a proper place to go, that pull request will
* pull this task if the run queue is running at a lower
* priority.
*/
smp_rmb();
/* Need to do the rmb for every iteration */
if (skip)
return 0;
if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
return 0;
if (lowest_mask) {
cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
/*
* We have to ensure that we have at least one bit
* still set in the array, since the map could have
* been concurrently emptied between the first and
* second reads of vec->mask. If we hit this
* condition, simply act as though we never hit this
* priority level and continue on.
*/
if (cpumask_empty(lowest_mask))
return 0;
}
return 1;
}
int cpupri_find(struct cpupri *cp, struct task_struct *p,
struct cpumask *lowest_mask)
{
return cpupri_find_fitness(cp, p, lowest_mask, NULL);
}
/**
* cpupri_find_fitness - find the best (lowest-pri) CPU in the system
* @cp: The cpupri context
* @p: The task
* @lowest_mask: A mask to fill in with selected CPUs (or NULL)
* @fitness_fn: A pointer to a function to do custom checks whether the CPU
* fits a specific criteria so that we only return those CPUs.
*
* Note: This function returns the recommended CPUs as calculated during the
* current invocation. By the time the call returns, the CPUs may have in
* fact changed priorities any number of times. While not ideal, it is not
* an issue of correctness since the normal rebalancer logic will correct
* any discrepancies created by racing against the uncertainty of the current
* priority configuration.
*
* Return: (int)bool - CPUs were found
*/
int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
struct cpumask *lowest_mask,
bool (*fitness_fn)(struct task_struct *p, int cpu))
{
int task_pri = convert_prio(p->prio);
int idx, cpu;
WARN_ON_ONCE(task_pri >= CPUPRI_NR_PRIORITIES);
for (idx = 0; idx < task_pri; idx++) {
if (!__cpupri_find(cp, p, lowest_mask, idx))
continue;
if (!lowest_mask || !fitness_fn)
return 1;
/* Ensure the capacity of the CPUs fit the task */
for_each_cpu(cpu, lowest_mask) {
if (!fitness_fn(p, cpu))
cpumask_clear_cpu(cpu, lowest_mask);
}
/*
* If no CPU at the current priority can fit the task
* continue looking
*/
if (cpumask_empty(lowest_mask))
continue;
return 1;
}
/*
* If we failed to find a fitting lowest_mask, kick off a new search
* but without taking into account any fitness criteria this time.
*
* This rule favours honouring priority over fitting the task in the
* correct CPU (Capacity Awareness being the only user now).
* The idea is that if a higher priority task can run, then it should
* run even if this ends up being on unfitting CPU.
*
* The cost of this trade-off is not entirely clear and will probably
* be good for some workloads and bad for others.
*
* The main idea here is that if some CPUs were over-committed, we try
* to spread which is what the scheduler traditionally did. Sys admins
* must do proper RT planning to avoid overloading the system if they
* really care.
*/
if (fitness_fn)
return cpupri_find(cp, p, lowest_mask);
return 0;
}
EXPORT_SYMBOL_GPL(cpupri_find_fitness);
/**
* cpupri_set - update the CPU priority setting
* @cp: The cpupri context
* @cpu: The target CPU
* @newpri: The priority (INVALID,NORMAL,RT1-RT99,HIGHER) to assign to this CPU
*
* Note: Assumes cpu_rq(cpu)->lock is locked
*
* Returns: (void)
*/
void cpupri_set(struct cpupri *cp, int cpu, int newpri)
{
int *currpri = &cp->cpu_to_pri[cpu];
int oldpri = *currpri;
int do_mb = 0;
newpri = convert_prio(newpri);
BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
if (newpri == oldpri)
return;
/*
* If the CPU was currently mapped to a different value, we
* need to map it to the new value then remove the old value.
* Note, we must add the new value first, otherwise we risk the
* cpu being missed by the priority loop in cpupri_find.
*/
if (likely(newpri != CPUPRI_INVALID)) {
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
cpumask_set_cpu(cpu, vec->mask);
/*
* When adding a new vector, we update the mask first,
* do a write memory barrier, and then update the count, to
* make sure the vector is visible when count is set.
*/
smp_mb__before_atomic();
atomic_inc(&(vec)->count);
do_mb = 1;
}
if (likely(oldpri != CPUPRI_INVALID)) {
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
/*
* Because the order of modification of the vec->count
* is important, we must make sure that the update
* of the new prio is seen before we decrement the
* old prio. This makes sure that the loop sees
* one or the other when we raise the priority of
* the run queue. We don't care about when we lower the
* priority, as that will trigger an rt pull anyway.
*
* We only need to do a memory barrier if we updated
* the new priority vec.
*/
if (do_mb)
smp_mb__after_atomic();
/*
* When removing from the vector, we decrement the counter first
* do a memory barrier and then clear the mask.
*/
atomic_dec(&(vec)->count);
smp_mb__after_atomic();
cpumask_clear_cpu(cpu, vec->mask);
}
*currpri = newpri;
}
/**
* cpupri_init - initialize the cpupri structure
* @cp: The cpupri context
*
* Return: -ENOMEM on memory allocation failure.
*/
int cpupri_init(struct cpupri *cp)
{
int i;
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
atomic_set(&vec->count, 0);
if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
goto cleanup;
}
cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL);
if (!cp->cpu_to_pri)
goto cleanup;
for_each_possible_cpu(i)
cp->cpu_to_pri[i] = CPUPRI_INVALID;
return 0;
cleanup:
for (i--; i >= 0; i--)
free_cpumask_var(cp->pri_to_cpu[i].mask);
return -ENOMEM;
}
/**
* cpupri_cleanup - clean up the cpupri structure
* @cp: The cpupri context
*/
void cpupri_cleanup(struct cpupri *cp)
{
int i;
kfree(cp->cpu_to_pri);
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
free_cpumask_var(cp->pri_to_cpu[i].mask);
}