arch/tile: fix __ndelay etc to work better
The current implementations of __ndelay and __udelay call a hypervisor service to delay, but the hypervisor service isn't actually implemented very well, and the consensus is that Linux should handle figuring this out natively and not use a hypervisor service. By converting nanoseconds to cycles, and then spinning until the cycle counter reaches the desired cycle, we get several benefits: first, we are sensitive to the actual clock speed; second, we use less power by issuing a slow SPR read once every six cycles while we delay; and third, we properly handle the case of an interrupt by exiting at the target time rather than after some number of cycles. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
This commit is contained in:
parent
04f7a3f12e
commit
1337173148
@ -38,6 +38,9 @@ static inline cycles_t get_cycles(void)
|
|||||||
|
|
||||||
cycles_t get_clock_rate(void);
|
cycles_t get_clock_rate(void);
|
||||||
|
|
||||||
|
/* Convert nanoseconds to core clock cycles. */
|
||||||
|
cycles_t ns2cycles(unsigned long nsecs);
|
||||||
|
|
||||||
/* Called at cpu initialization to set some low-level constants. */
|
/* Called at cpu initialization to set some low-level constants. */
|
||||||
void setup_clock(void);
|
void setup_clock(void);
|
||||||
|
|
||||||
|
@ -963,6 +963,11 @@ HV_ASIDRange hv_inquire_asid(int idx);
|
|||||||
|
|
||||||
|
|
||||||
/** Waits for at least the specified number of nanoseconds then returns.
|
/** Waits for at least the specified number of nanoseconds then returns.
|
||||||
|
*
|
||||||
|
* NOTE: this deprecated function currently assumes a 750 MHz clock,
|
||||||
|
* and is thus not generally suitable for use. New code should call
|
||||||
|
* hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for,
|
||||||
|
* and delay by looping while checking the cycle counter SPR.
|
||||||
*
|
*
|
||||||
* @param nanosecs The number of nanoseconds to sleep.
|
* @param nanosecs The number of nanoseconds to sleep.
|
||||||
*/
|
*/
|
||||||
|
@ -38,12 +38,6 @@ STD_ENTRY(kernel_execve)
|
|||||||
jrp lr
|
jrp lr
|
||||||
STD_ENDPROC(kernel_execve)
|
STD_ENDPROC(kernel_execve)
|
||||||
|
|
||||||
/* Delay a fixed number of cycles. */
|
|
||||||
STD_ENTRY(__delay)
|
|
||||||
{ addi r0, r0, -1; bnzt r0, . }
|
|
||||||
jrp lr
|
|
||||||
STD_ENDPROC(__delay)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't run this function directly, but instead copy it to a page
|
* We don't run this function directly, but instead copy it to a page
|
||||||
* we map into every user process. See vdso_setup().
|
* we map into every user process. See vdso_setup().
|
||||||
|
@ -224,3 +224,13 @@ int setup_profiling_timer(unsigned int multiplier)
|
|||||||
{
|
{
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use the tile timer to convert nsecs to core clock cycles, relying
|
||||||
|
* on it having the same frequency as SPR_CYCLE.
|
||||||
|
*/
|
||||||
|
cycles_t ns2cycles(unsigned long nsecs)
|
||||||
|
{
|
||||||
|
struct clock_event_device *dev = &__get_cpu_var(tile_timer);
|
||||||
|
return ((u64)nsecs * dev->mult) >> dev->shift;
|
||||||
|
}
|
||||||
|
@ -15,20 +15,31 @@
|
|||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/delay.h>
|
#include <linux/delay.h>
|
||||||
#include <linux/thread_info.h>
|
#include <linux/thread_info.h>
|
||||||
#include <asm/fixmap.h>
|
#include <asm/timex.h>
|
||||||
#include <hv/hypervisor.h>
|
|
||||||
|
|
||||||
void __udelay(unsigned long usecs)
|
void __udelay(unsigned long usecs)
|
||||||
{
|
{
|
||||||
hv_nanosleep(usecs * 1000);
|
if (usecs > ULONG_MAX / 1000) {
|
||||||
|
WARN_ON_ONCE(usecs > ULONG_MAX / 1000);
|
||||||
|
usecs = ULONG_MAX / 1000;
|
||||||
|
}
|
||||||
|
__ndelay(usecs * 1000);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__udelay);
|
EXPORT_SYMBOL(__udelay);
|
||||||
|
|
||||||
void __ndelay(unsigned long nsecs)
|
void __ndelay(unsigned long nsecs)
|
||||||
{
|
{
|
||||||
hv_nanosleep(nsecs);
|
cycles_t target = get_cycles();
|
||||||
|
target += ns2cycles(nsecs);
|
||||||
|
while (get_cycles() < target)
|
||||||
|
cpu_relax();
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__ndelay);
|
EXPORT_SYMBOL(__ndelay);
|
||||||
|
|
||||||
/* FIXME: should be declared in a header somewhere. */
|
void __delay(unsigned long cycles)
|
||||||
|
{
|
||||||
|
cycles_t target = get_cycles() + cycles;
|
||||||
|
while (get_cycles() < target)
|
||||||
|
cpu_relax();
|
||||||
|
}
|
||||||
EXPORT_SYMBOL(__delay);
|
EXPORT_SYMBOL(__delay);
|
||||||
|
Loading…
Reference in New Issue
Block a user