Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fpu updates from Ingo Molnar:
 "Three changes:

   - preparatory patch for AVX state tracking that computing-cluster
     folks would like to use for user-space batching - but we are not
     happy about the related ABI yet so this is only the kernel tracking
     side

   - a cleanup for CR0 handling in do_device_not_available()

   - plus we removed a workaround for an ancient binutils version"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/fpu: Track AVX-512 usage of tasks
  x86/fpu: Get rid of CONFIG_AS_FXSAVEQ
  x86/traps: Have read_cr0() only once in the #NM handler
This commit is contained in:
Linus Torvalds 2019-03-07 17:09:28 -08:00
commit 35a738fb5f
4 changed files with 22 additions and 48 deletions

View File

@ -187,7 +187,6 @@ cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
# does binutils support specific instructions? # does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)

View File

@ -137,37 +137,25 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
{ {
if (IS_ENABLED(CONFIG_X86_32)) if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) else
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
/* See comment in copy_fxregs_to_kernel() below. */
return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
} }
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{ {
if (IS_ENABLED(CONFIG_X86_32)) { if (IS_ENABLED(CONFIG_X86_32))
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
} else { else
if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
/* See comment in copy_fxregs_to_kernel() below. */
kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
}
}
} }
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{ {
if (IS_ENABLED(CONFIG_X86_32)) if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) else
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
/* See comment in copy_fxregs_to_kernel() below. */
return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
"m" (*fx));
} }
static inline void copy_kernel_to_fregs(struct fregs_state *fx) static inline void copy_kernel_to_fregs(struct fregs_state *fx)
@ -184,34 +172,8 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
{ {
if (IS_ENABLED(CONFIG_X86_32)) if (IS_ENABLED(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) else
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
else {
/* Using "rex64; fxsave %0" is broken because, if the memory
* operand uses any extended registers for addressing, a second
* REX prefix will be generated (to the assembler, rex64
* followed by semicolon is a separate instruction), and hence
* the 64-bitness is lost.
*
* Using "fxsaveq %0" would be the ideal choice, but is only
* supported starting with gas 2.16.
*
* Using, as a workaround, the properly prefixed form below
* isn't accepted by any binutils version so far released,
* complaining that the same type of prefix is used twice if
* an extended register is needed for addressing (fix submitted
* to mainline 2005-11-21).
*
* asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
*
* This, however, we can work around by forcing the compiler to
* select an addressing mode that doesn't require extended
* registers.
*/
asm volatile( "rex64/fxsave (%[fx])"
: "=m" (fpu->state.fxsave)
: [fx] "R" (&fpu->state.fxsave));
}
} }
/* These macros all use (%edi)/(%rdi) as the single memory argument. */ /* These macros all use (%edi)/(%rdi) as the single memory argument. */
@ -414,6 +376,13 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
{ {
if (likely(use_xsave())) { if (likely(use_xsave())) {
copy_xregs_to_kernel(&fpu->state.xsave); copy_xregs_to_kernel(&fpu->state.xsave);
/*
* AVX512 state is tracked here because its use is
* known to slow the max clock speed of the core.
*/
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
fpu->avx512_timestamp = jiffies;
return 1; return 1;
} }

View File

@ -302,6 +302,13 @@ struct fpu {
*/ */
unsigned char initialized; unsigned char initialized;
/*
* @avx512_timestamp:
*
* Records the timestamp of AVX512 use during last context switch.
*/
unsigned long avx512_timestamp;
/* /*
* @state: * @state:
* *

View File

@ -881,12 +881,12 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
dotraplinkage void dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code) do_device_not_available(struct pt_regs *regs, long error_code)
{ {
unsigned long cr0; unsigned long cr0 = read_cr0();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
#ifdef CONFIG_MATH_EMULATION #ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) { if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
struct math_emu_info info = { }; struct math_emu_info info = { };
cond_local_irq_enable(regs); cond_local_irq_enable(regs);
@ -898,7 +898,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#endif #endif
/* This should not happen. */ /* This should not happen. */
cr0 = read_cr0();
if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) { if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
/* Try to fix it up and carry on. */ /* Try to fix it up and carry on. */
write_cr0(cr0 & ~X86_CR0_TS); write_cr0(cr0 & ~X86_CR0_TS);