Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu updates from Ingo Molnar: "Three changes: - preparatory patch for AVX state tracking that computing-cluster folks would like to use for user-space batching - but we are not happy about the related ABI yet so this is only the kernel tracking side - a cleanup for CR0 handling in do_device_not_available() - plus we removed a workaround for an ancient binutils version" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/fpu: Track AVX-512 usage of tasks x86/fpu: Get rid of CONFIG_AS_FXSAVEQ x86/traps: Have read_cr0() only once in the #NM handler
This commit is contained in:
commit
35a738fb5f
@ -187,7 +187,6 @@ cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,
|
||||
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
|
||||
|
||||
# does binutils support specific instructions?
|
||||
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
|
||||
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
|
||||
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
|
||||
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
|
||||
|
@ -137,37 +137,25 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_X86_32))
|
||||
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
|
||||
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
|
||||
else
|
||||
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
|
||||
|
||||
/* See comment in copy_fxregs_to_kernel() below. */
|
||||
return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
|
||||
}
|
||||
|
||||
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_X86_32)) {
|
||||
if (IS_ENABLED(CONFIG_X86_32))
|
||||
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||
} else {
|
||||
if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
|
||||
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||
} else {
|
||||
/* See comment in copy_fxregs_to_kernel() below. */
|
||||
kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
|
||||
}
|
||||
}
|
||||
else
|
||||
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||
}
|
||||
|
||||
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_X86_32))
|
||||
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
|
||||
else
|
||||
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||
|
||||
/* See comment in copy_fxregs_to_kernel() below. */
|
||||
return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
|
||||
"m" (*fx));
|
||||
}
|
||||
|
||||
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
|
||||
@ -184,34 +172,8 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_X86_32))
|
||||
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
|
||||
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
|
||||
else
|
||||
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
|
||||
else {
|
||||
/* Using "rex64; fxsave %0" is broken because, if the memory
|
||||
* operand uses any extended registers for addressing, a second
|
||||
* REX prefix will be generated (to the assembler, rex64
|
||||
* followed by semicolon is a separate instruction), and hence
|
||||
* the 64-bitness is lost.
|
||||
*
|
||||
* Using "fxsaveq %0" would be the ideal choice, but is only
|
||||
* supported starting with gas 2.16.
|
||||
*
|
||||
* Using, as a workaround, the properly prefixed form below
|
||||
* isn't accepted by any binutils version so far released,
|
||||
* complaining that the same type of prefix is used twice if
|
||||
* an extended register is needed for addressing (fix submitted
|
||||
* to mainline 2005-11-21).
|
||||
*
|
||||
* asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
|
||||
*
|
||||
* This, however, we can work around by forcing the compiler to
|
||||
* select an addressing mode that doesn't require extended
|
||||
* registers.
|
||||
*/
|
||||
asm volatile( "rex64/fxsave (%[fx])"
|
||||
: "=m" (fpu->state.fxsave)
|
||||
: [fx] "R" (&fpu->state.fxsave));
|
||||
}
|
||||
}
|
||||
|
||||
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
|
||||
@ -414,6 +376,13 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
|
||||
{
|
||||
if (likely(use_xsave())) {
|
||||
copy_xregs_to_kernel(&fpu->state.xsave);
|
||||
|
||||
/*
|
||||
* AVX512 state is tracked here because its use is
|
||||
* known to slow the max clock speed of the core.
|
||||
*/
|
||||
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
|
||||
fpu->avx512_timestamp = jiffies;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -302,6 +302,13 @@ struct fpu {
|
||||
*/
|
||||
unsigned char initialized;
|
||||
|
||||
/*
|
||||
* @avx512_timestamp:
|
||||
*
|
||||
* Records the timestamp of AVX512 use during last context switch.
|
||||
*/
|
||||
unsigned long avx512_timestamp;
|
||||
|
||||
/*
|
||||
* @state:
|
||||
*
|
||||
|
@ -881,12 +881,12 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
||||
dotraplinkage void
|
||||
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
unsigned long cr0;
|
||||
unsigned long cr0 = read_cr0();
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
|
||||
if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
|
||||
struct math_emu_info info = { };
|
||||
|
||||
cond_local_irq_enable(regs);
|
||||
@ -898,7 +898,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
#endif
|
||||
|
||||
/* This should not happen. */
|
||||
cr0 = read_cr0();
|
||||
if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
|
||||
/* Try to fix it up and carry on. */
|
||||
write_cr0(cr0 & ~X86_CR0_TS);
|
||||
|
Loading…
Reference in New Issue
Block a user