Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu updates from Ingo Molnar: "Three changes: - preparatory patch for AVX state tracking that computing-cluster folks would like to use for user-space batching - but we are not happy about the related ABI yet so this is only the kernel tracking side - a cleanup for CR0 handling in do_device_not_available() - plus we removed a workaround for an ancient binutils version" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/fpu: Track AVX-512 usage of tasks x86/fpu: Get rid of CONFIG_AS_FXSAVEQ x86/traps: Have read_cr0() only once in the #NM handler
This commit is contained in:
commit
35a738fb5f
@ -187,7 +187,6 @@ cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,
|
|||||||
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
|
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
|
||||||
|
|
||||||
# does binutils support specific instructions?
|
# does binutils support specific instructions?
|
||||||
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
|
|
||||||
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
|
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
|
||||||
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
|
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
|
||||||
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
|
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
|
||||||
|
@ -137,37 +137,25 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
|
|||||||
{
|
{
|
||||||
if (IS_ENABLED(CONFIG_X86_32))
|
if (IS_ENABLED(CONFIG_X86_32))
|
||||||
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
|
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
|
||||||
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
|
else
|
||||||
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
|
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
|
||||||
|
|
||||||
/* See comment in copy_fxregs_to_kernel() below. */
|
|
||||||
return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
|
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
|
||||||
{
|
{
|
||||||
if (IS_ENABLED(CONFIG_X86_32)) {
|
if (IS_ENABLED(CONFIG_X86_32))
|
||||||
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
|
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||||
} else {
|
else
|
||||||
if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
|
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||||
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
|
|
||||||
} else {
|
|
||||||
/* See comment in copy_fxregs_to_kernel() below. */
|
|
||||||
kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
|
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
|
||||||
{
|
{
|
||||||
if (IS_ENABLED(CONFIG_X86_32))
|
if (IS_ENABLED(CONFIG_X86_32))
|
||||||
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
|
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||||
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
|
else
|
||||||
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
|
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
|
||||||
|
|
||||||
/* See comment in copy_fxregs_to_kernel() below. */
|
|
||||||
return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
|
|
||||||
"m" (*fx));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
|
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
|
||||||
@ -184,34 +172,8 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
|
|||||||
{
|
{
|
||||||
if (IS_ENABLED(CONFIG_X86_32))
|
if (IS_ENABLED(CONFIG_X86_32))
|
||||||
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
|
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
|
||||||
else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
|
else
|
||||||
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
|
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
|
||||||
else {
|
|
||||||
/* Using "rex64; fxsave %0" is broken because, if the memory
|
|
||||||
* operand uses any extended registers for addressing, a second
|
|
||||||
* REX prefix will be generated (to the assembler, rex64
|
|
||||||
* followed by semicolon is a separate instruction), and hence
|
|
||||||
* the 64-bitness is lost.
|
|
||||||
*
|
|
||||||
* Using "fxsaveq %0" would be the ideal choice, but is only
|
|
||||||
* supported starting with gas 2.16.
|
|
||||||
*
|
|
||||||
* Using, as a workaround, the properly prefixed form below
|
|
||||||
* isn't accepted by any binutils version so far released,
|
|
||||||
* complaining that the same type of prefix is used twice if
|
|
||||||
* an extended register is needed for addressing (fix submitted
|
|
||||||
* to mainline 2005-11-21).
|
|
||||||
*
|
|
||||||
* asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
|
|
||||||
*
|
|
||||||
* This, however, we can work around by forcing the compiler to
|
|
||||||
* select an addressing mode that doesn't require extended
|
|
||||||
* registers.
|
|
||||||
*/
|
|
||||||
asm volatile( "rex64/fxsave (%[fx])"
|
|
||||||
: "=m" (fpu->state.fxsave)
|
|
||||||
: [fx] "R" (&fpu->state.fxsave));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
|
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
|
||||||
@ -414,6 +376,13 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
|
|||||||
{
|
{
|
||||||
if (likely(use_xsave())) {
|
if (likely(use_xsave())) {
|
||||||
copy_xregs_to_kernel(&fpu->state.xsave);
|
copy_xregs_to_kernel(&fpu->state.xsave);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AVX512 state is tracked here because its use is
|
||||||
|
* known to slow the max clock speed of the core.
|
||||||
|
*/
|
||||||
|
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
|
||||||
|
fpu->avx512_timestamp = jiffies;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -302,6 +302,13 @@ struct fpu {
|
|||||||
*/
|
*/
|
||||||
unsigned char initialized;
|
unsigned char initialized;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @avx512_timestamp:
|
||||||
|
*
|
||||||
|
* Records the timestamp of AVX512 use during last context switch.
|
||||||
|
*/
|
||||||
|
unsigned long avx512_timestamp;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @state:
|
* @state:
|
||||||
*
|
*
|
||||||
|
@ -881,12 +881,12 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
|||||||
dotraplinkage void
|
dotraplinkage void
|
||||||
do_device_not_available(struct pt_regs *regs, long error_code)
|
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||||
{
|
{
|
||||||
unsigned long cr0;
|
unsigned long cr0 = read_cr0();
|
||||||
|
|
||||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||||
|
|
||||||
#ifdef CONFIG_MATH_EMULATION
|
#ifdef CONFIG_MATH_EMULATION
|
||||||
if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
|
if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
|
||||||
struct math_emu_info info = { };
|
struct math_emu_info info = { };
|
||||||
|
|
||||||
cond_local_irq_enable(regs);
|
cond_local_irq_enable(regs);
|
||||||
@ -898,7 +898,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This should not happen. */
|
/* This should not happen. */
|
||||||
cr0 = read_cr0();
|
|
||||||
if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
|
if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
|
||||||
/* Try to fix it up and carry on. */
|
/* Try to fix it up and carry on. */
|
||||||
write_cr0(cr0 & ~X86_CR0_TS);
|
write_cr0(cr0 & ~X86_CR0_TS);
|
||||||
|
Loading…
Reference in New Issue
Block a user