[ Upstream commit b86eb74098a92afd789da02699b4b0dd3f73b889 ]
The asm constraint does not reflect the fact that the asm statement can
modify the value of the local variable loops. Which it does.
Specifying the wrong constraint may lead to undefined behavior, it may
clobber random stuff (e.g. local variable, important temporary value in
regs, etc.). This is especially dangerous when the compiler decides to
inline the function and since it doesn't know that the value gets
modified, it might decide to use it from a register directly without
reloading it.
Change the constraint to "+a" to denote that the first argument is an
input and an output argument.
[ bp: Fix typo, massage commit message. ]
Fixes: e01b70ef3e
("x86: fix bug in arch/i386/lib/delay.c file, delay_loop function")
Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220329104705.65256-2-ammarfaizi2@gnuweeb.org
Signed-off-by: Sasha Levin <sashal@kernel.org>
190 lines
4.0 KiB
C
190 lines
4.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Precise Delay Loops for i386
|
|
*
|
|
* Copyright (C) 1993 Linus Torvalds
|
|
* Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
|
|
* Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
|
|
*
|
|
* The __delay function must _NOT_ be inlined as its execution time
|
|
* depends wildly on alignment on many x86 processors. The additional
|
|
* jump magic is needed to get the timing stable on all the CPU's
|
|
* we have to worry about.
|
|
*/
|
|
|
|
#include <linux/export.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/timex.h>
|
|
#include <linux/preempt.h>
|
|
#include <linux/delay.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/delay.h>
|
|
#include <asm/timer.h>
|
|
#include <asm/mwait.h>
|
|
|
|
#ifdef CONFIG_SMP
|
|
# include <asm/smp.h>
|
|
#endif
|
|
|
|
/* simple loop based delay: */
|
|
static void delay_loop(unsigned long loops)
|
|
{
|
|
asm volatile(
|
|
" test %0,%0 \n"
|
|
" jz 3f \n"
|
|
" jmp 1f \n"
|
|
|
|
".align 16 \n"
|
|
"1: jmp 2f \n"
|
|
|
|
".align 16 \n"
|
|
"2: dec %0 \n"
|
|
" jnz 2b \n"
|
|
"3: dec %0 \n"
|
|
|
|
: "+a" (loops)
|
|
:
|
|
);
|
|
}
|
|
|
|
/* TSC based delay: */
|
|
static void delay_tsc(unsigned long __loops)
|
|
{
|
|
u64 bclock, now, loops = __loops;
|
|
int cpu;
|
|
|
|
preempt_disable();
|
|
cpu = smp_processor_id();
|
|
bclock = rdtsc_ordered();
|
|
for (;;) {
|
|
now = rdtsc_ordered();
|
|
if ((now - bclock) >= loops)
|
|
break;
|
|
|
|
/* Allow RT tasks to run */
|
|
preempt_enable();
|
|
rep_nop();
|
|
preempt_disable();
|
|
|
|
/*
|
|
* It is possible that we moved to another CPU, and
|
|
* since TSC's are per-cpu we need to calculate
|
|
* that. The delay must guarantee that we wait "at
|
|
* least" the amount of time. Being moved to another
|
|
* CPU could make the wait longer but we just need to
|
|
* make sure we waited long enough. Rebalance the
|
|
* counter for this CPU.
|
|
*/
|
|
if (unlikely(cpu != smp_processor_id())) {
|
|
loops -= (now - bclock);
|
|
cpu = smp_processor_id();
|
|
bclock = rdtsc_ordered();
|
|
}
|
|
}
|
|
preempt_enable();
|
|
}
|
|
|
|
/*
|
|
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
|
|
* counts with TSC frequency. The input value is the loop of the
|
|
* counter, it will exit when the timer expires.
|
|
*/
|
|
static void delay_mwaitx(unsigned long __loops)
|
|
{
|
|
u64 start, end, delay, loops = __loops;
|
|
|
|
/*
|
|
* Timer value of 0 causes MWAITX to wait indefinitely, unless there
|
|
* is a store on the memory monitored by MONITORX.
|
|
*/
|
|
if (loops == 0)
|
|
return;
|
|
|
|
start = rdtsc_ordered();
|
|
|
|
for (;;) {
|
|
delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
|
|
|
|
/*
|
|
* Use cpu_tss_rw as a cacheline-aligned, seldomly
|
|
* accessed per-cpu variable as the monitor target.
|
|
*/
|
|
__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
|
|
|
|
/*
|
|
* AMD, like Intel's MWAIT version, supports the EAX hint and
|
|
* EAX=0xf0 means, do not enter any deep C-state and we use it
|
|
* here in delay() to minimize wakeup latency.
|
|
*/
|
|
__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
|
|
|
|
end = rdtsc_ordered();
|
|
|
|
if (loops <= end - start)
|
|
break;
|
|
|
|
loops -= end - start;
|
|
|
|
start = end;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Since we calibrate only once at boot, this
|
|
* function should be set once at boot and not changed
|
|
*/
|
|
static void (*delay_fn)(unsigned long) = delay_loop;
|
|
|
|
void use_tsc_delay(void)
|
|
{
|
|
if (delay_fn == delay_loop)
|
|
delay_fn = delay_tsc;
|
|
}
|
|
|
|
void use_mwaitx_delay(void)
|
|
{
|
|
delay_fn = delay_mwaitx;
|
|
}
|
|
|
|
int read_current_timer(unsigned long *timer_val)
|
|
{
|
|
if (delay_fn == delay_tsc) {
|
|
*timer_val = rdtsc();
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void __delay(unsigned long loops)
|
|
{
|
|
delay_fn(loops);
|
|
}
|
|
EXPORT_SYMBOL(__delay);
|
|
|
|
noinline void __const_udelay(unsigned long xloops)
|
|
{
|
|
unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
|
|
int d0;
|
|
|
|
xloops *= 4;
|
|
asm("mull %%edx"
|
|
:"=d" (xloops), "=&a" (d0)
|
|
:"1" (xloops), "0" (lpj * (HZ / 4)));
|
|
|
|
__delay(++xloops);
|
|
}
|
|
EXPORT_SYMBOL(__const_udelay);
|
|
|
|
void __udelay(unsigned long usecs)
|
|
{
|
|
__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
|
|
}
|
|
EXPORT_SYMBOL(__udelay);
|
|
|
|
void __ndelay(unsigned long nsecs)
|
|
{
|
|
__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
|
|
}
|
|
EXPORT_SYMBOL(__ndelay);
|