From 6e606e681873b37aa252486d43be4cf007544e85 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:22 +0200 Subject: [PATCH 01/40] init: Provide arch_cpu_finalize_init() commit 7725acaa4f0c04fbefb0e0d342635b967bb7d414 upstream check_bugs() has become a dumping ground for all sorts of activities to finalize the CPU initialization before running the rest of the init code. Most are empty, a few do actual bug checks, some do alternative patching and some cobble a CPU advertisement string together.... Aside of that the current implementation requires duplicated function declaration and mostly empty header files for them. Provide a new function arch_cpu_finalize_init(). Provide a generic declaration if CONFIG_ARCH_HAS_CPU_FINALIZE_INIT is selected and a stub inline otherwise. This requires a temporary #ifdef in start_kernel() which will be removed along with check_bugs() once the architectures are converted over. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224544.957805717@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 3 +++ include/linux/cpu.h | 6 ++++++ init/main.c | 5 +++++ 3 files changed, 14 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 632d60e13494..240277d5626c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -290,6 +290,9 @@ config ARCH_HAS_DMA_SET_UNCACHED config ARCH_HAS_DMA_CLEAR_UNCACHED bool +config ARCH_HAS_CPU_FINALIZE_INIT + bool + # Select if arch init_task must go in the __init_task_data section config ARCH_TASK_STRUCT_ON_STACK bool diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 24e5f237244d..ef084a2fc6d5 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -188,6 +188,12 @@ void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); void arch_cpu_idle_dead(void); +#ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT +void arch_cpu_finalize_init(void); +#else +static inline void arch_cpu_finalize_init(void) { } +#endif + int cpu_report_state(int cpu); int cpu_check_up_prepare(int cpu); void cpu_set_state_online(int cpu); diff --git a/init/main.c b/init/main.c index d8bfe61b5a88..18abf3a5d789 100644 --- a/init/main.c +++ b/init/main.c @@ -1044,7 +1044,12 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) delayacct_init(); poking_init(); + + arch_cpu_finalize_init(); + /* Temporary conditional until everything has been converted */ +#ifndef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT check_bugs(); +#endif acpi_subsystem_init(); arch_post_acpi_subsys_init(); From e5eb18e164d08986543f8259d0cc10e120fb8746 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:24 +0200 Subject: [PATCH 02/40] x86/cpu: Switch to arch_cpu_finalize_init() commit 7c7077a72674402654f3291354720cd73cdf649e upstream check_bugs() is a dumping ground for finalizing the CPU bringup. Only parts of it has to do with actual CPU bugs. Split it apart into arch_cpu_finalize_init() and cpu_select_mitigations(). Fixup the bogus 32bit comments while at it. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230613224545.019583869@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 1 + arch/x86/include/asm/bugs.h | 2 -- arch/x86/kernel/cpu/bugs.c | 51 +--------------------------------- arch/x86/kernel/cpu/common.c | 53 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/cpu.h | 1 + 5 files changed, 56 insertions(+), 52 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2284666e8c90..d1c734289bdd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -59,6 +59,7 @@ config X86 select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 92ae28389940..f25ca2d709d4 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -4,8 +4,6 @@ #include -extern void check_bugs(void); - #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c81b8b029b68..1ac943f1c99e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -9,7 +9,6 @@ * - Andrew D. Balsa (code cleanup). */ #include -#include #include #include #include @@ -27,8 +26,6 @@ #include #include #include -#include -#include #include #include #include @@ -116,21 +113,8 @@ EXPORT_SYMBOL_GPL(mds_idle_clear); DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); EXPORT_SYMBOL_GPL(mmio_stale_data_clear); -void __init check_bugs(void) +void __init cpu_select_mitigations(void) { - identify_boot_cpu(); - - /* - * identify_boot_cpu() initialized SMT support information, let the - * core code know. - */ - cpu_smt_check_topology(); - - if (!IS_ENABLED(CONFIG_SMP)) { - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); - } - /* * Read the SPEC_CTRL MSR to account for reserved bits which may * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD @@ -166,39 +150,6 @@ void __init check_bugs(void) l1tf_select_mitigation(); md_clear_select_mitigation(); srbds_select_mitigation(); - - arch_smt_update(); - -#ifdef CONFIG_X86_32 - /* - * Check whether we are able to run this kernel safely on SMP. - * - * - i386 is no longer supported. - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - */ - if (boot_cpu_data.x86 < 4) - panic("Kernel requires i486+ for 'invlpg' and other features"); - - init_utsname()->machine[1] = - '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); - - fpu__init_check_bugs(); -#else /* CONFIG_X86_64 */ - alternative_instructions(); - - /* - * Make sure the first 2MB area is not mapped by huge pages - * There are typically fixed size MTRRs in there and overlapping - * MTRRs into large pages causes slow downs. - * - * Right now we don't do that with gbpages because there seems - * very little benefit for that case. - */ - if (!direct_gbpages) - set_memory_4k((unsigned long)__va(0), 1); -#endif } /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f41781d06a5f..3e7bd3d2ce00 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -19,10 +19,13 @@ #include #include #include +#include #include #include #include +#include +#include #include #include #include @@ -58,6 +61,7 @@ #include #include #include +#include #include "cpu.h" @@ -2188,3 +2192,52 @@ void arch_smt_update(void) /* Check whether IPI broadcasting can be enabled */ apic_smt_update(); } + +void __init arch_cpu_finalize_init(void) +{ + identify_boot_cpu(); + + /* + * identify_boot_cpu() initialized SMT support information, let the + * core code know. + */ + cpu_smt_check_topology(); + + if (!IS_ENABLED(CONFIG_SMP)) { + pr_info("CPU: "); + print_cpu_info(&boot_cpu_data); + } + + cpu_select_mitigations(); + + arch_smt_update(); + + if (IS_ENABLED(CONFIG_X86_32)) { + /* + * Check whether this is a real i386 which is not longer + * supported and fixup the utsname. + */ + if (boot_cpu_data.x86 < 4) + panic("Kernel requires i486+ for 'invlpg' and other features"); + + init_utsname()->machine[1] = + '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + } + + alternative_instructions(); + + if (IS_ENABLED(CONFIG_X86_64)) { + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); + } else { + fpu__init_check_bugs(); + } +} diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 91df90abc1d9..215416e8a8de 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -78,6 +78,7 @@ extern void detect_ht(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); +void cpu_select_mitigations(void); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); From c0fff20d4efa3bdb3ef203a8ae6e703e0c010199 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:25 +0200 Subject: [PATCH 03/40] ARM: cpu: Switch to arch_cpu_finalize_init() commit ee31bb0524a2e7c99b03f50249a411cc1eaa411f upstream check_bugs() is about to be phased out. Switch over to the new arch_cpu_finalize_init() implementation. No functional change. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224545.078124882@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/arm/Kconfig | 1 + arch/arm/include/asm/bugs.h | 4 ---- arch/arm/kernel/bugs.c | 3 ++- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 985ab0b091a6..335308aff6ce 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,6 +4,7 @@ config ARM default y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h index 97a312ba0840..fe385551edec 100644 --- a/arch/arm/include/asm/bugs.h +++ b/arch/arm/include/asm/bugs.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * arch/arm/include/asm/bugs.h - * * Copyright (C) 1995-2003 Russell King */ #ifndef __ASM_BUGS_H @@ -10,10 +8,8 @@ extern void check_writebuffer_bugs(void); #ifdef CONFIG_MMU -extern void check_bugs(void); extern void check_other_bugs(void); #else -#define check_bugs() do { } while (0) #define check_other_bugs() do { } while (0) #endif diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 14c8dbbb7d2d..087bce6ec8e9 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include @@ -11,7 +12,7 @@ void check_other_bugs(void) #endif } -void __init check_bugs(void) +void __init arch_cpu_finalize_init(void) { check_writebuffer_bugs(); check_other_bugs(); From 12d93c6c98d5478128d90ad4fbdf705753a0197e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:27 +0200 Subject: [PATCH 04/40] ia64/cpu: Switch to arch_cpu_finalize_init() commit 6c38e3005621800263f117fb00d6787a76e16de7 upstream check_bugs() is about to be phased out. Switch over to the new arch_cpu_finalize_init() implementation. No functional change. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224545.137045745@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/ia64/Kconfig | 1 + arch/ia64/include/asm/bugs.h | 20 -------------------- arch/ia64/kernel/setup.c | 3 +-- 3 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 arch/ia64/include/asm/bugs.h diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 1d0579bc9d65..975b00b6034b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_DMA_MARK_CLEAN select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO diff --git a/arch/ia64/include/asm/bugs.h b/arch/ia64/include/asm/bugs.h deleted file mode 100644 index 0d6b9bded56c..000000000000 --- a/arch/ia64/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - * - * Based on . - * - * Modified 1998, 1999, 2003 - * David Mosberger-Tang , Hewlett-Packard Co. - */ -#ifndef _ASM_IA64_BUGS_H -#define _ASM_IA64_BUGS_H - -#include - -extern void check_bugs (void); - -#endif /* _ASM_IA64_BUGS_H */ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index dd595fbd8006..e7dd6756d5cb 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -1071,8 +1071,7 @@ cpu_init (void) } } -void __init -check_bugs (void) +void __init arch_cpu_finalize_init(void) { ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, (unsigned long) __end___mckinley_e9_bundles); From 1cd3fc18eb169e2f81a34eeaf8147f9395ee8a11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:30 +0200 Subject: [PATCH 05/40] m68k/cpu: Switch to arch_cpu_finalize_init() commit 9ceecc2589b9d7cef6b321339ed8de484eac4b20 upstream check_bugs() is about to be phased out. Switch over to the new arch_cpu_finalize_init() implementation. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230613224545.254342916@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/m68k/Kconfig | 1 + arch/m68k/include/asm/bugs.h | 21 --------------------- arch/m68k/kernel/setup_mm.c | 3 ++- 3 files changed, 3 insertions(+), 22 deletions(-) delete mode 100644 arch/m68k/include/asm/bugs.h diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 372e4e69c43a..ad7a4ffd65c9 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -4,6 +4,7 @@ config M68K default y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS diff --git a/arch/m68k/include/asm/bugs.h b/arch/m68k/include/asm/bugs.h deleted file mode 100644 index 745530651e0b..000000000000 --- a/arch/m68k/include/asm/bugs.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/asm-m68k/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#ifdef CONFIG_MMU -extern void check_bugs(void); /* in arch/m68k/kernel/setup.c */ -#else -static void check_bugs(void) -{ -} -#endif diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index ab8aa7be260f..f8116660417f 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -523,7 +524,7 @@ static int __init proc_hardware_init(void) module_init(proc_hardware_init); #endif -void check_bugs(void) +void __init arch_cpu_finalize_init(void) { #if defined(CONFIG_FPU) && !defined(CONFIG_M68KFPU_EMU) if (m68k_fputype == 0) { From 75bb54c951e92714a50cdc063f9953d11e8d36a2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:32 +0200 Subject: [PATCH 06/40] mips/cpu: Switch to arch_cpu_finalize_init() commit 7f066a22fe353a827a402ee2835e81f045b1574d upstream check_bugs() is about to be phased out. Switch over to the new arch_cpu_finalize_init() implementation. No functional change. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224545.312438573@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 1 + arch/mips/include/asm/bugs.h | 17 ----------------- arch/mips/kernel/setup.c | 13 +++++++++++++ 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 3442bdd4314c..57839f63074f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,6 +4,7 @@ config MIPS default y select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE if !EVA diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h index d72dc6e1cf3c..8d4cf29861b8 100644 --- a/arch/mips/include/asm/bugs.h +++ b/arch/mips/include/asm/bugs.h @@ -1,17 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * This is included by init/main.c to check for architecture-dependent bugs. - * * Copyright (C) 2007 Maciej W. Rozycki - * - * Needs: - * void check_bugs(void); */ #ifndef _ASM_BUGS_H #define _ASM_BUGS_H #include -#include #include #include @@ -30,17 +24,6 @@ static inline void check_bugs_early(void) check_bugs64_early(); } -static inline void check_bugs(void) -{ - unsigned int cpu = smp_processor_id(); - - cpu_data[cpu].udelay_val = loops_per_jiffy; - check_bugs32(); - - if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) - check_bugs64(); -} - static inline int r4k_daddiu_bug(void) { if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index b49ad569e469..b7eb7dd96e17 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -11,6 +11,8 @@ * Copyright (C) 2000, 2001, 2002, 2007 Maciej W. Rozycki */ #include +#include +#include #include #include #include @@ -829,3 +831,14 @@ static int __init setnocoherentio(char *str) } early_param("nocoherentio", setnocoherentio); #endif + +void __init arch_cpu_finalize_init(void) +{ + unsigned int cpu = smp_processor_id(); + + cpu_data[cpu].udelay_val = loops_per_jiffy; + check_bugs32(); + + if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) + check_bugs64(); +} From 3c45134b38b417d17103f1f0b9a8b32f98ac358c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:33 +0200 Subject: [PATCH 07/40] sh/cpu: Switch to arch_cpu_finalize_init() commit 01eb454e9bfe593f320ecbc9aaec60bf87cd453d upstream check_bugs() is about to be phased out. Switch over to the new arch_cpu_finalize_init() implementation. No functional change. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224545.371697797@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/sh/Kconfig | 1 + arch/sh/include/asm/bugs.h | 74 --------------------------------- arch/sh/include/asm/processor.h | 2 + arch/sh/kernel/idle.c | 1 + arch/sh/kernel/setup.c | 55 ++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 74 deletions(-) delete mode 100644 arch/sh/include/asm/bugs.h diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index b6f3d49991d3..44dffe7ce50a 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -5,6 +5,7 @@ config SUPERH select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_PTE_SPECIAL diff --git a/arch/sh/include/asm/bugs.h b/arch/sh/include/asm/bugs.h deleted file mode 100644 index fe52abb69cea..000000000000 --- a/arch/sh/include/asm/bugs.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SH_BUGS_H -#define __ASM_SH_BUGS_H - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -/* - * I don't know of any Super-H bugs yet. - */ - -#include - -extern void select_idle_routine(void); - -static void __init check_bugs(void) -{ - extern unsigned long loops_per_jiffy; - char *p = &init_utsname()->machine[2]; /* "sh" */ - - select_idle_routine(); - - current_cpu_data.loops_per_jiffy = loops_per_jiffy; - - switch (current_cpu_data.family) { - case CPU_FAMILY_SH2: - *p++ = '2'; - break; - case CPU_FAMILY_SH2A: - *p++ = '2'; - *p++ = 'a'; - break; - case CPU_FAMILY_SH3: - *p++ = '3'; - break; - case CPU_FAMILY_SH4: - *p++ = '4'; - break; - case CPU_FAMILY_SH4A: - *p++ = '4'; - *p++ = 'a'; - break; - case CPU_FAMILY_SH4AL_DSP: - *p++ = '4'; - *p++ = 'a'; - *p++ = 'l'; - *p++ = '-'; - *p++ = 'd'; - *p++ = 's'; - *p++ = 'p'; - break; - case CPU_FAMILY_UNKNOWN: - /* - * Specifically use CPU_FAMILY_UNKNOWN rather than - * default:, so we're able to have the compiler whine - * about unhandled enumerations. - */ - break; - } - - printk("CPU: %s\n", get_cpu_subtype(¤t_cpu_data)); - -#ifndef __LITTLE_ENDIAN__ - /* 'eb' means 'Endian Big' */ - *p++ = 'e'; - *p++ = 'b'; -#endif - *p = '\0'; -} -#endif /* __ASM_SH_BUGS_H */ diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 3820d698846e..97af2d9b0269 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -167,6 +167,8 @@ extern unsigned int instruction_size(unsigned int insn); #define instruction_size(insn) (2) #endif +void select_idle_routine(void); + #endif /* __ASSEMBLY__ */ #include diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index f59814983bd5..a80b2a5b25c7 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 556e463a43d2..805c02eef5ac 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -357,3 +358,57 @@ int test_mode_pin(int pin) { return sh_mv.mv_mode_pins() & pin; } + +void __init arch_cpu_finalize_init(void) +{ + char *p = &init_utsname()->machine[2]; /* "sh" */ + + select_idle_routine(); + + current_cpu_data.loops_per_jiffy = loops_per_jiffy; + + switch (current_cpu_data.family) { + case CPU_FAMILY_SH2: + *p++ = '2'; + break; + case CPU_FAMILY_SH2A: + *p++ = '2'; + *p++ = 'a'; + break; + case CPU_FAMILY_SH3: + *p++ = '3'; + break; + case CPU_FAMILY_SH4: + *p++ = '4'; + break; + case CPU_FAMILY_SH4A: + *p++ = '4'; + *p++ = 'a'; + break; + case CPU_FAMILY_SH4AL_DSP: + *p++ = '4'; + *p++ = 'a'; + *p++ = 'l'; + *p++ = '-'; + *p++ = 'd'; + *p++ = 's'; + *p++ = 'p'; + break; + case CPU_FAMILY_UNKNOWN: + /* + * Specifically use CPU_FAMILY_UNKNOWN rather than + * default:, so we're able to have the compiler whine + * about unhandled enumerations. + */ + break; + } + + pr_info("CPU: %s\n", get_cpu_subtype(¤t_cpu_data)); + +#ifndef __LITTLE_ENDIAN__ + /* 'eb' means 'Endian Big' */ + *p++ = 'e'; + *p++ = 'b'; +#endif + *p = '\0'; +} From 2edb3b39ca793bf13a123ea6a25da640be36e7a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:35 +0200 Subject: [PATCH 08/40] sparc/cpu: Switch to arch_cpu_finalize_init() commit 44ade508e3bfac45ae97864587de29eb1a881ec0 upstream check_bugs() is about to be phased out. Switch over to the new arch_cpu_finalize_init() implementation. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Sam Ravnborg Link: https://lore.kernel.org/r/20230613224545.431995857@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/sparc/Kconfig | 1 + arch/sparc/include/asm/bugs.h | 18 ------------------ arch/sparc/kernel/setup_32.c | 7 +++++++ 3 files changed, 8 insertions(+), 18 deletions(-) delete mode 100644 arch/sparc/include/asm/bugs.h diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b5ed89342059..7e2ce4c8d657 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -56,6 +56,7 @@ config SPARC config SPARC32 def_bool !64BIT select ARCH_32BIT_OFF_T + select ARCH_HAS_CPU_FINALIZE_INIT if !SMP select ARCH_HAS_SYNC_DMA_FOR_CPU select GENERIC_ATOMIC64 select CLZ_TAB diff --git a/arch/sparc/include/asm/bugs.h b/arch/sparc/include/asm/bugs.h deleted file mode 100644 index 02fa369b9c21..000000000000 --- a/arch/sparc/include/asm/bugs.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* include/asm/bugs.h: Sparc probes for various bugs. - * - * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net) - */ - -#ifdef CONFIG_SPARC32 -#include -#endif - -extern unsigned long loops_per_jiffy; - -static void __init check_bugs(void) -{ -#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP) - cpu_data(0).udelay_val = loops_per_jiffy; -#endif -} diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index eea43a1aef1b..8e2c7c1aad83 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -415,3 +415,10 @@ static int __init topology_init(void) } subsys_initcall(topology_init); + +#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP) +void __init arch_cpu_finalize_init(void) +{ + cpu_data(0).udelay_val = loops_per_jiffy; +} +#endif From b05031c2bca790afed717bc59cde2dac722efb94 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:36 +0200 Subject: [PATCH 09/40] um/cpu: Switch to arch_cpu_finalize_init() commit 9349b5cd0908f8afe95529fc7a8cbb1417df9b0c upstream check_bugs() is about to be phased out. Switch over to the new arch_cpu_finalize_init() implementation. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Richard Weinberger Link: https://lore.kernel.org/r/20230613224545.493148694@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/um/Kconfig | 1 + arch/um/include/asm/bugs.h | 7 ------- arch/um/kernel/um_arch.c | 3 ++- 3 files changed, 3 insertions(+), 8 deletions(-) delete mode 100644 arch/um/include/asm/bugs.h diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 1c57599b82fa..eb1c6880bde4 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -5,6 +5,7 @@ menu "UML-specific options" config UML bool default y + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_KCOV select ARCH_NO_PREEMPT select HAVE_ARCH_AUDITSYSCALL diff --git a/arch/um/include/asm/bugs.h b/arch/um/include/asm/bugs.h deleted file mode 100644 index 4473942a0839..000000000000 --- a/arch/um/include/asm/bugs.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_BUGS_H -#define __UM_BUGS_H - -void check_bugs(void); - -#endif diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 55930ca498e9..ca5383a8a988 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -3,6 +3,7 @@ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ +#include #include #include #include @@ -353,7 +354,7 @@ void __init setup_arch(char **cmdline_p) setup_hostinfo(host_info, sizeof host_info); } -void __init check_bugs(void) +void __init arch_cpu_finalize_init(void) { arch_check_bugs(); os_check_bugs(); From bf2fa3a9d0e65326917273d17a8e9c6880d7b97c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:38 +0200 Subject: [PATCH 10/40] init: Remove check_bugs() leftovers commit 61235b24b9cb37c13fcad5b9596d59a1afdcec30 upstream Everything is converted over to arch_cpu_finalize_init(). Remove the check_bugs() leftovers including the empty stubs in asm-generic, alpha, parisc, powerpc and xtensa. Signed-off-by: Thomas Gleixner Reviewed-by: Richard Henderson Link: https://lore.kernel.org/r/20230613224545.553215951@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/bugs.h | 20 -------------------- arch/parisc/include/asm/bugs.h | 20 -------------------- arch/powerpc/include/asm/bugs.h | 15 --------------- arch/xtensa/include/asm/bugs.h | 18 ------------------ include/asm-generic/bugs.h | 11 ----------- init/main.c | 5 ----- 6 files changed, 89 deletions(-) delete mode 100644 arch/alpha/include/asm/bugs.h delete mode 100644 arch/parisc/include/asm/bugs.h delete mode 100644 arch/powerpc/include/asm/bugs.h delete mode 100644 arch/xtensa/include/asm/bugs.h delete mode 100644 include/asm-generic/bugs.h diff --git a/arch/alpha/include/asm/bugs.h b/arch/alpha/include/asm/bugs.h deleted file mode 100644 index 78030d1c7e7e..000000000000 --- a/arch/alpha/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * include/asm-alpha/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -/* - * I don't know of any alpha bugs yet.. Nice chip - */ - -static void check_bugs(void) -{ -} diff --git a/arch/parisc/include/asm/bugs.h b/arch/parisc/include/asm/bugs.h deleted file mode 100644 index 0a7f9db6bd1c..000000000000 --- a/arch/parisc/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/asm-parisc/bugs.h - * - * Copyright (C) 1999 Mike Shaver - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#include - -static inline void check_bugs(void) -{ -// identify_cpu(&boot_cpu_data); -} diff --git a/arch/powerpc/include/asm/bugs.h b/arch/powerpc/include/asm/bugs.h deleted file mode 100644 index 01b8f6ca4dbb..000000000000 --- a/arch/powerpc/include/asm/bugs.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_BUGS_H -#define _ASM_POWERPC_BUGS_H - -/* - */ - -/* - * This file is included by 'init/main.c' to check for - * architecture-dependent bugs. - */ - -static inline void check_bugs(void) { } - -#endif /* _ASM_POWERPC_BUGS_H */ diff --git a/arch/xtensa/include/asm/bugs.h b/arch/xtensa/include/asm/bugs.h deleted file mode 100644 index 69b29d198249..000000000000 --- a/arch/xtensa/include/asm/bugs.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * include/asm-xtensa/bugs.h - * - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Xtensa processors don't have any bugs. :) - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file "COPYING" in the main directory of - * this archive for more details. - */ - -#ifndef _XTENSA_BUGS_H -#define _XTENSA_BUGS_H - -static void check_bugs(void) { } - -#endif /* _XTENSA_BUGS_H */ diff --git a/include/asm-generic/bugs.h b/include/asm-generic/bugs.h deleted file mode 100644 index 69021830f078..000000000000 --- a/include/asm-generic/bugs.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_GENERIC_BUGS_H -#define __ASM_GENERIC_BUGS_H -/* - * This file is included by 'init/main.c' to check for - * architecture-dependent bugs. - */ - -static inline void check_bugs(void) { } - -#endif /* __ASM_GENERIC_BUGS_H */ diff --git a/init/main.c b/init/main.c index 18abf3a5d789..3999d860f364 100644 --- a/init/main.c +++ b/init/main.c @@ -100,7 +100,6 @@ #include #include -#include #include #include #include @@ -1046,10 +1045,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) poking_init(); arch_cpu_finalize_init(); - /* Temporary conditional until everything has been converted */ -#ifndef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT - check_bugs(); -#endif acpi_subsystem_init(); arch_post_acpi_subsys_init(); From 09658b81d158c15112a56323d8db8fed83e8cd4a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:39 +0200 Subject: [PATCH 11/40] init: Invoke arch_cpu_finalize_init() earlier commit 9df9d2f0471b4c4702670380b8d8a45b40b23a7d upstream X86 is reworking the boot process so that initializations which are not required during early boot can be moved into the late boot process and out of the fragile and restricted initial boot phase. arch_cpu_finalize_init() is the obvious place to do such initializations, but arch_cpu_finalize_init() is invoked too late in start_kernel() e.g. for initializing the FPU completely. fork_init() requires that the FPU is initialized as the size of task_struct on X86 depends on the size of the required FPU register buffer. Fortunately none of the init calls between calibrate_delay() and arch_cpu_finalize_init() is relevant for the functionality of arch_cpu_finalize_init(). Invoke it right after calibrate_delay() where everything which is relevant for arch_cpu_finalize_init() has been set up already. No functional change intended. Signed-off-by: Thomas Gleixner Reviewed-by: Rick Edgecombe Link: https://lore.kernel.org/r/20230613224545.612182854@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- init/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 3999d860f364..845c563fa8e7 100644 --- a/init/main.c +++ b/init/main.c @@ -1016,6 +1016,9 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) late_time_init(); sched_clock_init(); calibrate_delay(); + + arch_cpu_finalize_init(); + pid_idr_init(); anon_vma_init(); #ifdef CONFIG_X86 @@ -1044,8 +1047,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) poking_init(); - arch_cpu_finalize_init(); - acpi_subsystem_init(); arch_post_acpi_subsys_init(); sfi_init_late(); From 18fcd72da1ed6166f1cbb03f713bed50c839fc22 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:41 +0200 Subject: [PATCH 12/40] init, x86: Move mem_encrypt_init() into arch_cpu_finalize_init() commit 439e17576eb47f26b78c5bbc72e344d4206d2327 upstream Invoke the X86ism mem_encrypt_init() from X86 arch_cpu_finalize_init() and remove the weak fallback from the core code. No functional change. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224545.670360645@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mem_encrypt.h | 7 ++++--- arch/x86/kernel/cpu/common.c | 11 +++++++++++ init/main.c | 11 ----------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 8f3fa55d43ce..4406d58cf7cb 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -47,14 +47,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); void __init mem_encrypt_free_decrypted_mem(void); -/* Architecture __weak replacement functions */ -void __init mem_encrypt_init(void); - void __init sev_es_init_vc_handling(void); bool sme_active(void); bool sev_active(void); bool sev_es_active(void); +void __init mem_encrypt_init(void); + #define __bss_decrypted __section(".bss..decrypted") #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -86,6 +85,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; static inline void mem_encrypt_free_decrypted_mem(void) { } +static inline void mem_encrypt_init(void) { } + #define __bss_decrypted #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3e7bd3d2ce00..a27ceb622d58 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2240,4 +2241,14 @@ void __init arch_cpu_finalize_init(void) } else { fpu__init_check_bugs(); } + + /* + * This needs to be called before any devices perform DMA + * operations that might use the SWIOTLB bounce buffers. It will + * mark the bounce buffers as decrypted so that their usage will + * not cause "plain-text" data to be decrypted when accessed. It + * must be called after late_time_init() so that Hyper-V x86/x64 + * hypercalls work when the SWIOTLB bounce buffers are decrypted. + */ + mem_encrypt_init(); } diff --git a/init/main.c b/init/main.c index 845c563fa8e7..1a3d0bea5735 100644 --- a/init/main.c +++ b/init/main.c @@ -95,7 +95,6 @@ #include #include #include -#include #include #include @@ -773,8 +772,6 @@ void __init __weak thread_stack_cache_init(void) } #endif -void __init __weak mem_encrypt_init(void) { } - void __init __weak poking_init(void) { } void __init __weak pgtable_cache_init(void) { } @@ -992,14 +989,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) */ locking_selftest(); - /* - * This needs to be called before any devices perform DMA - * operations that might use the SWIOTLB bounce buffers. It will - * mark the bounce buffers as decrypted so that their usage will - * not cause "plain-text" data to be decrypted when accessed. - */ - mem_encrypt_init(); - #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { From 7a2f42bce9ab23fb9e59fe6de45bfedb5d611eee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:43 +0200 Subject: [PATCH 13/40] x86/fpu: Remove cpuinfo argument from init functions commit 1f34bb2a24643e0087652d81078e4f616562738d upstream Nothing in the call chain requires it Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224545.783704297@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/fpu/internal.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/fpu/init.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 70b9bc5403c5..94c07151a9e5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -42,7 +42,7 @@ extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern void fpu__init_cpu(void); extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); -extern void fpu__init_system(struct cpuinfo_x86 *c); +extern void fpu__init_system(void); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); extern u64 fpu__get_supported_xfeatures_mask(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a27ceb622d58..b302b3fff891 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1418,7 +1418,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_set_core_cap_bits(c); - fpu__init_system(c); + fpu__init_system(); #ifdef CONFIG_X86_32 /* diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 3c0a621b9792..fc76816f184f 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -67,7 +67,7 @@ static bool fpu__probe_without_cpuid(void) return fsw == 0 && (fcw & 0x103f) == 0x003f; } -static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +static void fpu__init_system_early_generic(void) { if (!boot_cpu_has(X86_FEATURE_CPUID) && !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { @@ -237,9 +237,9 @@ static void __init fpu__init_system_ctx_switch(void) * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: */ -void __init fpu__init_system(struct cpuinfo_x86 *c) +void __init fpu__init_system(void) { - fpu__init_system_early_generic(c); + fpu__init_system_early_generic(); /* * The FPU has to be operational for some of the From 2462bc3ef0611646d94658ff250bb16669347361 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:45 +0200 Subject: [PATCH 14/40] x86/fpu: Mark init functions __init commit 1703db2b90c91b2eb2d699519fc505fe431dde0e upstream No point in keeping them around. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224545.841685728@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index fc76816f184f..ed73f9b40706 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -49,7 +49,7 @@ void fpu__init_cpu(void) fpu__init_cpu_xstate(); } -static bool fpu__probe_without_cpuid(void) +static bool __init fpu__probe_without_cpuid(void) { unsigned long cr0; u16 fsw, fcw; @@ -67,7 +67,7 @@ static bool fpu__probe_without_cpuid(void) return fsw == 0 && (fcw & 0x103f) == 0x003f; } -static void fpu__init_system_early_generic(void) +static void __init fpu__init_system_early_generic(void) { if (!boot_cpu_has(X86_FEATURE_CPUID) && !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { From 4ae1cbb730bd574d57d3996d4c20974972d47009 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Jun 2023 01:39:46 +0200 Subject: [PATCH 15/40] x86/fpu: Move FPU initialization into arch_cpu_finalize_init() commit b81fac906a8f9e682e513ddd95697ec7a20878d4 upstream Initializing the FPU during the early boot process is a pointless exercise. Early boot is convoluted and fragile enough. Nothing requires that the FPU is set up early. It has to be initialized before fork_init() because the task_struct size depends on the FPU register buffer size. Move the initialization to arch_cpu_finalize_init() which is the perfect place to do so. No functional change. This allows to remove quite some of the custom early command line parsing, but that's subject to the next installment. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230613224545.902376621@linutronix.de Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b302b3fff891..977edd149e40 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1418,8 +1418,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_set_core_cap_bits(c); - fpu__init_system(); - #ifdef CONFIG_X86_32 /* * Regardless of whether PCID is enumerated, the SDM says @@ -2114,8 +2112,6 @@ void cpu_init(void) doublefault_init_cpu_tss(); - fpu__init_cpu(); - if (is_uv_system()) uv_cpu_init(); @@ -2131,6 +2127,7 @@ void cpu_init_secondary(void) */ cpu_init_exception_handling(); cpu_init(); + fpu__init_cpu(); } #endif @@ -2225,6 +2222,13 @@ void __init arch_cpu_finalize_init(void) '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); } + /* + * Must be before alternatives because it might set or clear + * feature bits. + */ + fpu__init_system(); + fpu__init_cpu(); + alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { From 288a2f6bc1ce03ddb3f05fd8c79b00d5d7160b4a Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 12 Jul 2023 19:43:11 -0700 Subject: [PATCH 16/40] x86/speculation: Add Gather Data Sampling mitigation commit 8974eb588283b7d44a7c91fa09fcbaf380339f3a upstream Gather Data Sampling (GDS) is a hardware vulnerability which allows unprivileged speculative access to data which was previously stored in vector registers. Intel processors that support AVX2 and AVX512 have gather instructions that fetch non-contiguous data elements from memory. On vulnerable hardware, when a gather instruction is transiently executed and encounters a fault, stale data from architectural or internal vector registers may get transiently stored to the destination vector register allowing an attacker to infer the stale data using typical side channel techniques like cache timing attacks. This mitigation is different from many earlier ones for two reasons. First, it is enabled by default and a bit must be set to *DISABLE* it. This is the opposite of normal mitigation polarity. This means GDS can be mitigated simply by updating microcode and leaving the new control bit alone. Second, GDS has a "lock" bit. This lock bit is there because the mitigation affects the hardware security features KeyLocker and SGX. It needs to be enabled and *STAY* enabled for these features to be mitigated against GDS. The mitigation is enabled in the microcode by default. Disable it by setting gather_data_sampling=off or by disabling all mitigations with mitigations=off. The mitigation status can be checked by reading: /sys/devices/system/cpu/vulnerabilities/gather_data_sampling Signed-off-by: Daniel Sneddon Signed-off-by: Dave Hansen Acked-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 15 +- .../hw-vuln/gather_data_sampling.rst | 99 ++++++++++++++ Documentation/admin-guide/hw-vuln/index.rst | 1 + .../admin-guide/kernel-parameters.txt | 35 +++-- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 11 ++ arch/x86/kernel/cpu/bugs.c | 129 ++++++++++++++++++ arch/x86/kernel/cpu/common.c | 34 +++-- arch/x86/kernel/cpu/cpu.h | 1 + drivers/base/cpu.c | 8 ++ 10 files changed, 308 insertions(+), 26 deletions(-) create mode 100644 Documentation/admin-guide/hw-vuln/gather_data_sampling.rst diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 500d5d8937cb..bfb4f4fada33 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -501,17 +501,18 @@ Description: information about CPUs heterogeneity. cpu_capacity: capacity of cpu#. What: /sys/devices/system/cpu/vulnerabilities - /sys/devices/system/cpu/vulnerabilities/meltdown - /sys/devices/system/cpu/vulnerabilities/spectre_v1 - /sys/devices/system/cpu/vulnerabilities/spectre_v2 - /sys/devices/system/cpu/vulnerabilities/spec_store_bypass + /sys/devices/system/cpu/vulnerabilities/gather_data_sampling + /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds - /sys/devices/system/cpu/vulnerabilities/srbds - /sys/devices/system/cpu/vulnerabilities/tsx_async_abort - /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/mmio_stale_data /sys/devices/system/cpu/vulnerabilities/retbleed + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass + /sys/devices/system/cpu/vulnerabilities/spectre_v1 + /sys/devices/system/cpu/vulnerabilities/spectre_v2 + /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst new file mode 100644 index 000000000000..74dab6af7fe1 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst @@ -0,0 +1,99 @@ +.. SPDX-License-Identifier: GPL-2.0 + +GDS - Gather Data Sampling +========================== + +Gather Data Sampling is a hardware vulnerability which allows unprivileged +speculative access to data which was previously stored in vector registers. + +Problem +------- +When a gather instruction performs loads from memory, different data elements +are merged into the destination vector register. However, when a gather +instruction that is transiently executed encounters a fault, stale data from +architectural or internal vector registers may get transiently forwarded to the +destination vector register instead. This will allow a malicious attacker to +infer stale data using typical side channel techniques like cache timing +attacks. GDS is a purely sampling-based attack. + +The attacker uses gather instructions to infer the stale vector register data. +The victim does not need to do anything special other than use the vector +registers. The victim does not need to use gather instructions to be +vulnerable. + +Because the buffers are shared between Hyper-Threads cross Hyper-Thread attacks +are possible. + +Attack scenarios +---------------- +Without mitigation, GDS can infer stale data across virtually all +permission boundaries: + + Non-enclaves can infer SGX enclave data + Userspace can infer kernel data + Guests can infer data from hosts + Guest can infer guest from other guests + Users can infer data from other users + +Because of this, it is important to ensure that the mitigation stays enabled in +lower-privilege contexts like guests and when running outside SGX enclaves. + +The hardware enforces the mitigation for SGX. Likewise, VMMs should ensure +that guests are not allowed to disable the GDS mitigation. If a host erred and +allowed this, a guest could theoretically disable GDS mitigation, mount an +attack, and re-enable it. + +Mitigation mechanism +-------------------- +This issue is mitigated in microcode. The microcode defines the following new +bits: + + ================================ === ============================ + IA32_ARCH_CAPABILITIES[GDS_CTRL] R/O Enumerates GDS vulnerability + and mitigation support. + IA32_ARCH_CAPABILITIES[GDS_NO] R/O Processor is not vulnerable. + IA32_MCU_OPT_CTRL[GDS_MITG_DIS] R/W Disables the mitigation + 0 by default. + IA32_MCU_OPT_CTRL[GDS_MITG_LOCK] R/W Locks GDS_MITG_DIS=0. Writes + to GDS_MITG_DIS are ignored + Can't be cleared once set. + ================================ === ============================ + +GDS can also be mitigated on systems that don't have updated microcode by +disabling AVX. This can be done by setting "clearcpuid=avx" on the kernel +command-line. + +Mitigation control on the kernel command line +--------------------------------------------- +The mitigation can be disabled by setting "gather_data_sampling=off" or +"mitigations=off" on the kernel command line. Not specifying either will +default to the mitigation being enabled. + +GDS System Information +------------------------ +The kernel provides vulnerability status information through sysfs. For +GDS this can be accessed by the following sysfs file: + +/sys/devices/system/cpu/vulnerabilities/gather_data_sampling + +The possible values contained in this file are: + + ============================== ============================================= + Not affected Processor not vulnerable. + Vulnerable Processor vulnerable and mitigation disabled. + Vulnerable: No microcode Processor vulnerable and microcode is missing + mitigation. + Mitigation: Microcode Processor is vulnerable and mitigation is in + effect. + Mitigation: Microcode (locked) Processor is vulnerable and mitigation is in + effect and cannot be disabled. + Unknown: Dependent on + hypervisor status Running on a virtual guest processor that is + affected but with no way to know if host + processor is mitigated or vulnerable. + ============================== ============================================= + +GDS Default mitigation +---------------------- +The updated microcode will enable the mitigation by default. The kernel's +default action is to leave the mitigation enabled. diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 2adec1e6520a..245468b0f2be 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -16,3 +16,4 @@ are configurable at compile, boot or run time. multihit.rst special-register-buffer-data-sampling.rst processor_mmio_stale_data.rst + gather_data_sampling.rst diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1f4a0523dc1a..2de3ba280a92 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1447,6 +1447,20 @@ Format: off | on default: on + gather_data_sampling= + [X86,INTEL] Control the Gather Data Sampling (GDS) + mitigation. + + Gather Data Sampling is a hardware vulnerability which + allows unprivileged speculative access to data which was + previously stored in vector registers. + + This issue is mitigated by default in updated microcode. + The mitigation may have a performance impact but can be + disabled. + + off: Disable GDS mitigation. + gcov_persist= [GCOV] When non-zero (default), profiling data for kernel modules is saved and remains accessible via debugfs, even when the module is unloaded/reloaded. @@ -2887,22 +2901,23 @@ Disable all optional CPU mitigations. This improves system performance, but it may also expose users to several CPU vulnerabilities. - Equivalent to: nopti [X86,PPC] + Equivalent to: gather_data_sampling=off [X86] kpti=0 [ARM64] - nospectre_v1 [X86,PPC] - nobp=0 [S390] - nospectre_v2 [X86,PPC,S390,ARM64] - spectre_v2_user=off [X86] - spec_store_bypass_disable=off [X86,PPC] - ssbd=force-off [ARM64] + kvm.nx_huge_pages=off [X86] l1tf=off [X86] mds=off [X86] - tsx_async_abort=off [X86] - kvm.nx_huge_pages=off [X86] + mmio_stale_data=off [X86] no_entry_flush [PPC] no_uaccess_flush [PPC] - mmio_stale_data=off [X86] + nobp=0 [S390] + nopti [X86,PPC] + nospectre_v1 [X86,PPC] + nospectre_v2 [X86,PPC,S390,ARM64] retbleed=off [X86] + spec_store_bypass_disable=off [X86,PPC] + spectre_v2_user=off [X86] + ssbd=force-off [ARM64] + tsx_async_abort=off [X86] Exceptions: This does not have any effect on diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1fcda8263554..f68182300799 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -433,5 +433,6 @@ #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_GDS X86_BUG(29) /* CPU is affected by Gather Data Sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3fab152809ab..83849e37cd20 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -156,6 +156,15 @@ * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. */ +#define ARCH_CAP_GDS_CTRL BIT(25) /* + * CPU is vulnerable to Gather + * Data Sampling (GDS) and + * has controls for mitigation. + */ +#define ARCH_CAP_GDS_NO BIT(26) /* + * CPU is not vulnerable to Gather + * Data Sampling (GDS). + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -174,6 +183,8 @@ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) #define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ +#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */ +#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1ac943f1c99e..f114d4e51a86 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -45,6 +45,7 @@ static void __init md_clear_select_mitigation(void); static void __init taa_select_mitigation(void); static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); +static void __init gds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -150,6 +151,7 @@ void __init cpu_select_mitigations(void) l1tf_select_mitigation(); md_clear_select_mitigation(); srbds_select_mitigation(); + gds_select_mitigation(); } /* @@ -607,6 +609,120 @@ static int __init srbds_parse_cmdline(char *str) } early_param("srbds", srbds_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "GDS: " fmt + +enum gds_mitigations { + GDS_MITIGATION_OFF, + GDS_MITIGATION_UCODE_NEEDED, + GDS_MITIGATION_FULL, + GDS_MITIGATION_FULL_LOCKED, + GDS_MITIGATION_HYPERVISOR, +}; + +static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL; + +static const char * const gds_strings[] = { + [GDS_MITIGATION_OFF] = "Vulnerable", + [GDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [GDS_MITIGATION_FULL] = "Mitigation: Microcode", + [GDS_MITIGATION_FULL_LOCKED] = "Mitigation: Microcode (locked)", + [GDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status", +}; + +void update_gds_msr(void) +{ + u64 mcu_ctrl_after; + u64 mcu_ctrl; + + switch (gds_mitigation) { + case GDS_MITIGATION_OFF: + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + mcu_ctrl |= GDS_MITG_DIS; + break; + case GDS_MITIGATION_FULL_LOCKED: + /* + * The LOCKED state comes from the boot CPU. APs might not have + * the same state. Make sure the mitigation is enabled on all + * CPUs. + */ + case GDS_MITIGATION_FULL: + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + mcu_ctrl &= ~GDS_MITG_DIS; + break; + case GDS_MITIGATION_UCODE_NEEDED: + case GDS_MITIGATION_HYPERVISOR: + return; + }; + + wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + + /* + * Check to make sure that the WRMSR value was not ignored. Writes to + * GDS_MITG_DIS will be ignored if this processor is locked but the boot + * processor was not. + */ + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after); + WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after); +} + +static void __init gds_select_mitigation(void) +{ + u64 mcu_ctrl; + + if (!boot_cpu_has_bug(X86_BUG_GDS)) + return; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + gds_mitigation = GDS_MITIGATION_HYPERVISOR; + goto out; + } + + if (cpu_mitigations_off()) + gds_mitigation = GDS_MITIGATION_OFF; + /* Will verify below that mitigation _can_ be disabled */ + + /* No microcode */ + if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + gds_mitigation = GDS_MITIGATION_UCODE_NEEDED; + goto out; + } + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + if (mcu_ctrl & GDS_MITG_LOCKED) { + if (gds_mitigation == GDS_MITIGATION_OFF) + pr_warn("Mitigation locked. Disable failed.\n"); + + /* + * The mitigation is selected from the boot CPU. All other CPUs + * _should_ have the same state. If the boot CPU isn't locked + * but others are then update_gds_msr() will WARN() of the state + * mismatch. If the boot CPU is locked update_gds_msr() will + * ensure the other CPUs have the mitigation enabled. + */ + gds_mitigation = GDS_MITIGATION_FULL_LOCKED; + } + + update_gds_msr(); +out: + pr_info("%s\n", gds_strings[gds_mitigation]); +} + +static int __init gds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_GDS)) + return 0; + + if (!strcmp(str, "off")) + gds_mitigation = GDS_MITIGATION_OFF; + + return 0; +} +early_param("gather_data_sampling", gds_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt @@ -2286,6 +2402,11 @@ static ssize_t retbleed_show_state(char *buf) return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } +static ssize_t gds_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2335,6 +2456,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_RETBLEED: return retbleed_show_state(buf); + case X86_BUG_GDS: + return gds_show_state(buf); + default: break; } @@ -2399,4 +2523,9 @@ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, cha { return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); } + +ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_GDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 977edd149e40..d31fd6a0fbad 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1127,6 +1127,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define MMIO_SBDS BIT(2) /* CPU is affected by RETbleed, speculating where you would not expect it */ #define RETBLEED BIT(3) +/* CPU is affected by GDS */ +#define GDS BIT(4) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1139,19 +1141,21 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), @@ -1277,6 +1281,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !(ia32_cap & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + /* + * Check if CPU is vulnerable to GDS. If running in a virtual machine on + * an affected processor, the VMM may have disabled the use of GATHER by + * disabling AVX2. The only way to do this in HW is to clear XCR0[2], + * which means that AVX will be disabled. + */ + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + boot_cpu_has(X86_FEATURE_AVX)) + setup_force_cpu_bug(X86_BUG_GDS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -1795,6 +1809,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); + if (boot_cpu_has_bug(X86_BUG_GDS)) + update_gds_msr(); } static __init int setup_noclflush(char *arg) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 215416e8a8de..66e19380a899 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -82,6 +82,7 @@ void cpu_select_mitigations(void); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); +extern void update_gds_msr(void); extern u64 x86_read_arch_cap_msr(void); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 33e0526907eb..0cb8ec80ddfd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -579,6 +579,12 @@ ssize_t __weak cpu_show_retbleed(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_gds(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -590,6 +596,7 @@ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); +static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -603,6 +610,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_srbds.attr, &dev_attr_mmio_stale_data.attr, &dev_attr_retbleed.attr, + &dev_attr_gather_data_sampling.attr, NULL }; From 363c98f9cfa8124cc49b2dfc5d48666b138f7e2e Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 12 Jul 2023 19:43:12 -0700 Subject: [PATCH 17/40] x86/speculation: Add force option to GDS mitigation commit 553a5c03e90a6087e88f8ff878335ef0621536fb upstream The Gather Data Sampling (GDS) vulnerability allows malicious software to infer stale data previously stored in vector registers. This may include sensitive data such as cryptographic keys. GDS is mitigated in microcode, and systems with up-to-date microcode are protected by default. However, any affected system that is running with older microcode will still be vulnerable to GDS attacks. Since the gather instructions used by the attacker are part of the AVX2 and AVX512 extensions, disabling these extensions prevents gather instructions from being executed, thereby mitigating the system from GDS. Disabling AVX2 is sufficient, but we don't have the granularity to do this. The XCR0[2] disables AVX, with no option to just disable AVX2. Add a kernel parameter gather_data_sampling=force that will enable the microcode mitigation if available, otherwise it will disable AVX on affected systems. This option will be ignored if cmdline mitigations=off. This is a *big* hammer. It is known to break buggy userspace that uses incomplete, buggy AVX enumeration. Unfortunately, such userspace does exist in the wild: https://www.mail-archive.com/bug-coreutils@gnu.org/msg33046.html [ dhansen: add some more ominous warnings about disabling AVX ] Signed-off-by: Daniel Sneddon Signed-off-by: Dave Hansen Acked-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- .../hw-vuln/gather_data_sampling.rst | 18 +++++++++++++---- .../admin-guide/kernel-parameters.txt | 8 +++++++- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst index 74dab6af7fe1..40b7a6260010 100644 --- a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst +++ b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst @@ -60,14 +60,21 @@ bits: ================================ === ============================ GDS can also be mitigated on systems that don't have updated microcode by -disabling AVX. This can be done by setting "clearcpuid=avx" on the kernel -command-line. +disabling AVX. This can be done by setting gather_data_sampling="force" or +"clearcpuid=avx" on the kernel command-line. + +If used, these options will disable AVX use by turning on XSAVE YMM support. +However, the processor will still enumerate AVX support. Userspace that +does not follow proper AVX enumeration to check both AVX *and* XSAVE YMM +support will break. Mitigation control on the kernel command line --------------------------------------------- The mitigation can be disabled by setting "gather_data_sampling=off" or -"mitigations=off" on the kernel command line. Not specifying either will -default to the mitigation being enabled. +"mitigations=off" on the kernel command line. Not specifying either will default +to the mitigation being enabled. Specifying "gather_data_sampling=force" will +use the microcode mitigation when available or disable AVX on affected systems +where the microcode hasn't been updated to include the mitigation. GDS System Information ------------------------ @@ -83,6 +90,9 @@ The possible values contained in this file are: Vulnerable Processor vulnerable and mitigation disabled. Vulnerable: No microcode Processor vulnerable and microcode is missing mitigation. + Mitigation: AVX disabled, + no microcode Processor is vulnerable and microcode is missing + mitigation. AVX disabled as mitigation. Mitigation: Microcode Processor is vulnerable and mitigation is in effect. Mitigation: Microcode (locked) Processor is vulnerable and mitigation is in diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2de3ba280a92..074e39deef04 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1457,7 +1457,13 @@ This issue is mitigated by default in updated microcode. The mitigation may have a performance impact but can be - disabled. + disabled. On systems without the microcode mitigation + disabling AVX serves as a mitigation. + + force: Disable AVX to mitigate systems without + microcode mitigation. No effect if the microcode + mitigation is present. Known to cause crashes in + userspace with buggy AVX enumeration. off: Disable GDS mitigation. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f114d4e51a86..010d487f627c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -615,6 +615,7 @@ early_param("srbds", srbds_parse_cmdline); enum gds_mitigations { GDS_MITIGATION_OFF, GDS_MITIGATION_UCODE_NEEDED, + GDS_MITIGATION_FORCE, GDS_MITIGATION_FULL, GDS_MITIGATION_FULL_LOCKED, GDS_MITIGATION_HYPERVISOR, @@ -625,6 +626,7 @@ static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL static const char * const gds_strings[] = { [GDS_MITIGATION_OFF] = "Vulnerable", [GDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [GDS_MITIGATION_FORCE] = "Mitigation: AVX disabled, no microcode", [GDS_MITIGATION_FULL] = "Mitigation: Microcode", [GDS_MITIGATION_FULL_LOCKED] = "Mitigation: Microcode (locked)", [GDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status", @@ -650,6 +652,7 @@ void update_gds_msr(void) rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); mcu_ctrl &= ~GDS_MITG_DIS; break; + case GDS_MITIGATION_FORCE: case GDS_MITIGATION_UCODE_NEEDED: case GDS_MITIGATION_HYPERVISOR: return; @@ -684,10 +687,23 @@ static void __init gds_select_mitigation(void) /* No microcode */ if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { - gds_mitigation = GDS_MITIGATION_UCODE_NEEDED; + if (gds_mitigation == GDS_MITIGATION_FORCE) { + /* + * This only needs to be done on the boot CPU so do it + * here rather than in update_gds_msr() + */ + setup_clear_cpu_cap(X86_FEATURE_AVX); + pr_warn("Microcode update needed! Disabling AVX as mitigation.\n"); + } else { + gds_mitigation = GDS_MITIGATION_UCODE_NEEDED; + } goto out; } + /* Microcode has mitigation, use it */ + if (gds_mitigation == GDS_MITIGATION_FORCE) + gds_mitigation = GDS_MITIGATION_FULL; + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); if (mcu_ctrl & GDS_MITG_LOCKED) { if (gds_mitigation == GDS_MITIGATION_OFF) @@ -718,6 +734,8 @@ static int __init gds_parse_cmdline(char *str) if (!strcmp(str, "off")) gds_mitigation = GDS_MITIGATION_OFF; + else if (!strcmp(str, "force")) + gds_mitigation = GDS_MITIGATION_FORCE; return 0; } From 7db4ddcb8d8e356387a773728b2479d390488b1e Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 12 Jul 2023 19:43:13 -0700 Subject: [PATCH 18/40] x86/speculation: Add Kconfig option for GDS commit 53cf5797f114ba2bd86d23a862302119848eff19 upstream Gather Data Sampling (GDS) is mitigated in microcode. However, on systems that haven't received the updated microcode, disabling AVX can act as a mitigation. Add a Kconfig option that uses the microcode mitigation if available and disables AVX otherwise. Setting this option has no effect on systems not affected by GDS. This is the equivalent of setting gather_data_sampling=force. Signed-off-by: Daniel Sneddon Signed-off-by: Dave Hansen Acked-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 19 +++++++++++++++++++ arch/x86/kernel/cpu/bugs.c | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1c734289bdd..25fda0590e5d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2485,6 +2485,25 @@ config SLS against straight line speculation. The kernel image might be slightly larger. +config GDS_FORCE_MITIGATION + bool "Force GDS Mitigation" + depends on CPU_SUP_INTEL + default n + help + Gather Data Sampling (GDS) is a hardware vulnerability which allows + unprivileged speculative access to data which was previously stored in + vector registers. + + This option is equivalent to setting gather_data_sampling=force on the + command line. The microcode mitigation is used if present, otherwise + AVX is disabled as a mitigation. On affected systems that are missing + the microcode any userspace code that unconditionally uses AVX will + break with this option set. + + Setting this option on systems not vulnerable to GDS has no effect. + + If in doubt, say N. + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 010d487f627c..ed263807f62c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -621,7 +621,11 @@ enum gds_mitigations { GDS_MITIGATION_HYPERVISOR, }; +#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION) +static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE; +#else static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL; +#endif static const char * const gds_strings[] = { [GDS_MITIGATION_OFF] = "Vulnerable", From eb13cce488745176db654b20ea438f4b5b91ab9c Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 12 Jul 2023 19:43:14 -0700 Subject: [PATCH 19/40] KVM: Add GDS_NO support to KVM commit 81ac7e5d741742d650b4ed6186c4826c1a0631a7 upstream Gather Data Sampling (GDS) is a transient execution attack using gather instructions from the AVX2 and AVX512 extensions. This attack allows malicious code to infer data that was previously stored in vector registers. Systems that are not vulnerable to GDS will set the GDS_NO bit of the IA32_ARCH_CAPABILITIES MSR. This is useful for VM guests that may think they are on vulnerable systems that are, in fact, not affected. Guests that are running on affected hosts where the mitigation is enabled are protected as if they were running on an unaffected system. On all hosts that are not affected or that are mitigated, set the GDS_NO bit. Signed-off-by: Daniel Sneddon Signed-off-by: Dave Hansen Acked-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 7 +++++++ arch/x86/kvm/x86.c | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ed263807f62c..21f4bdbcfb4b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -636,6 +636,13 @@ static const char * const gds_strings[] = { [GDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status", }; +bool gds_ucode_mitigated(void) +{ + return (gds_mitigation == GDS_MITIGATION_FULL || + gds_mitigation == GDS_MITIGATION_FULL_LOCKED); +} +EXPORT_SYMBOL_GPL(gds_ucode_mitigated); + void update_gds_msr(void) { u64 mcu_ctrl_after; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 34670943f543..cf4739200566 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -255,6 +255,8 @@ static struct kmem_cache *x86_fpu_cache; static struct kmem_cache *x86_emulator_cache; +extern bool gds_ucode_mitigated(void); + /* * When called, it means the previous get/set msr reached an invalid msr. * Return true if we want to ignore/silent this failed msr access. @@ -1389,7 +1391,7 @@ static unsigned int num_msr_based_features; ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO) + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO) static u64 kvm_get_arch_capabilities(void) { @@ -1446,6 +1448,9 @@ static u64 kvm_get_arch_capabilities(void) */ } + if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated()) + data |= ARCH_CAP_GDS_NO; + return data; } From 583016037a092e4189c86bad7946c6d88669b4ca Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 3 Jul 2023 15:00:32 +0200 Subject: [PATCH 20/40] x86/xen: Fix secondary processors' FPU initialization commit fe3e0a13e597c1c8617814bf9b42ab732db5c26e upstream. Moving the call of fpu__init_cpu() from cpu_init() to start_secondary() broke Xen PV guests, as those don't call start_secondary() for APs. Call fpu__init_cpu() in Xen's cpu_bringup(), which is the Xen PV replacement of start_secondary(). Fixes: b81fac906a8f ("x86/fpu: Move FPU initialization into arch_cpu_finalize_init()") Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Boris Ostrovsky Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230703130032.22916-1-jgross@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/smp_pv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 64873937cd1d..755e939db3ed 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,7 @@ static void cpu_bringup(void) cr4_init(); cpu_init(); + fpu__init_cpu(); touch_softlockup_watchdog(); preempt_disable(); From f076d081787803b972a9939e477c6456f0c8fd70 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 9 Jan 2023 16:09:22 +0100 Subject: [PATCH 21/40] x86/mm: fix poking_init() for Xen PV guests commit 26ce6ec364f18d2915923bc05784084e54a5c4cc upstream. Commit 3f4c8211d982 ("x86/mm: Use mm_alloc() in poking_init()") broke the kernel for running as Xen PV guest. It seems as if the new address space is never activated before being used, resulting in Xen rejecting to accept the new CR3 value (the PGD isn't pinned). Fix that by adding the now missing call of paravirt_arch_dup_mmap() to poking_init(). That call was previously done by dup_mm()->dup_mmap() and it is a NOP for all cases but for Xen PV, where it is just doing the pinning of the PGD. Fixes: 3f4c8211d982 ("x86/mm: Use mm_alloc() in poking_init()") Signed-off-by: Juergen Gross Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230109150922.10578-1-jgross@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ff3b0d8fe048..2234d1c3f412 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * We need to define the tracepoints somewhere, and tlb.c @@ -807,6 +808,9 @@ void __init poking_init(void) poking_mm = copy_init_mm(); BUG_ON(!poking_mm); + /* Xen PV guests need the PGD to be pinned. */ + paravirt_arch_dup_mmap(NULL, poking_mm); + /* * Randomize the poking address, but make sure that the following page * will be mapped at the same PMD. We need 2 pages, so find space for 3, From 6ee042fd240fb669f4637f8cd89899b15911e5df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:21 +0200 Subject: [PATCH 22/40] x86/mm: Use mm_alloc() in poking_init() commit 3f4c8211d982099be693be9aa7d6fc4607dff290 upstream. Instead of duplicating init_mm, allocate a fresh mm. The advantage is that mm_alloc() has much simpler dependencies. Additionally it makes more conceptual sense, init_mm has no (and must not have) user state to duplicate. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.816175235@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 3 ++- include/linux/sched/task.h | 1 - kernel/fork.c | 5 ----- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 2234d1c3f412..dd15fdee4536 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -805,7 +806,7 @@ void __init poking_init(void) spinlock_t *ptl; pte_t *ptep; - poking_mm = copy_init_mm(); + poking_mm = mm_alloc(); BUG_ON(!poking_mm); /* Xen PV guests need the PGD to be pinned. */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 2832cc6be062..90c04195f939 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -89,7 +89,6 @@ extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); struct task_struct *fork_idle(int); -struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); int kernel_wait(pid_t pid, int *stat); diff --git a/kernel/fork.c b/kernel/fork.c index c6a289317e89..6386ab077d8a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2424,11 +2424,6 @@ struct task_struct * __init fork_idle(int cpu) return task; } -struct mm_struct *copy_init_mm(void) -{ - return dup_mm(NULL, &init_mm); -} - /* * This is like kernel_clone(), but shaved down and tailored to just * creating io_uring workers. It returns a created task, or an error pointer. From 1ff14defdfc9180bfcfd76a70463a5feb188a5db Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:18 +0200 Subject: [PATCH 23/40] mm: Move mm_cachep initialization to mm_init() commit af80602799681c78f14fbe20b6185a56020dedee upstream. In order to allow using mm_alloc() much earlier, move initializing mm_cachep into mm_init(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.751153381@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/task.h | 1 + init/main.c | 1 + kernel/fork.c | 32 ++++++++++++++++++-------------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 90c04195f939..e8304e929e28 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -63,6 +63,7 @@ extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); void __noreturn make_task_dead(int signr); +extern void mm_cache_init(void); extern void proc_caches_init(void); extern void fork_init(void); diff --git a/init/main.c b/init/main.c index 1a3d0bea5735..402d290f0fc4 100644 --- a/init/main.c +++ b/init/main.c @@ -835,6 +835,7 @@ static void __init mm_init(void) init_espfix_bsp(); /* Should be run after espfix64 is set up. */ pti_init(); + mm_cache_init(); } void __init __weak arch_call_rest_init(void) diff --git a/kernel/fork.c b/kernel/fork.c index 6386ab077d8a..31455f5ab015 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2818,10 +2818,27 @@ static void sighand_ctor(void *data) init_waitqueue_head(&sighand->signalfd_wqh); } -void __init proc_caches_init(void) +void __init mm_cache_init(void) { unsigned int mm_size; + /* + * The mm_cpumask is located at the end of mm_struct, and is + * dynamically sized based on the maximum CPU number this system + * can have, taking hotplug into account (nr_cpu_ids). + */ + mm_size = sizeof(struct mm_struct) + cpumask_size(); + + mm_cachep = kmem_cache_create_usercopy("mm_struct", + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, + offsetof(struct mm_struct, saved_auxv), + sizeof_field(struct mm_struct, saved_auxv), + NULL); +} + +void __init proc_caches_init(void) +{ sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@ -2839,19 +2856,6 @@ void __init proc_caches_init(void) SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); - /* - * The mm_cpumask is located at the end of mm_struct, and is - * dynamically sized based on the maximum CPU number this system - * can have, taking hotplug into account (nr_cpu_ids). - */ - mm_size = sizeof(struct mm_struct) + cpumask_size(); - - mm_cachep = kmem_cache_create_usercopy("mm_struct", - mm_size, ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, - offsetof(struct mm_struct, saved_auxv), - sizeof_field(struct mm_struct, saved_auxv), - NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); nsproxy_cache_init(); From 79972c2b95eca5e7d3d237d728339b21e9075629 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:25 +0200 Subject: [PATCH 24/40] x86/mm: Initialize text poking earlier commit 5b93a83649c7cba3a15eb7e8959b250841acb1b1 upstream. Move poking_init() up a bunch; specifically move it right after mm_init() which is right before ftrace_init(). This will allow simplifying ftrace text poking which currently has a bunch of exceptions for early boot. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.881703081@infradead.org Signed-off-by: Greg Kroah-Hartman --- init/main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/init/main.c b/init/main.c index 402d290f0fc4..298989b0d4e8 100644 --- a/init/main.c +++ b/init/main.c @@ -900,7 +900,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) sort_main_extable(); trap_init(); mm_init(); - + poking_init(); ftrace_init(); /* trace_printk can be enabled here */ @@ -1035,8 +1035,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) taskstats_init_early(); delayacct_init(); - poking_init(); - acpi_subsystem_init(); arch_post_acpi_subsys_init(); sfi_init_late(); From 6750468784314bc8a336f80493cd82cde2afa655 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 1 Aug 2023 07:31:07 -0700 Subject: [PATCH 25/40] Documentation/x86: Fix backwards on/off logic about YMM support commit 1b0fc0345f2852ffe54fb9ae0e12e2ee69ad6a20 upstream These options clearly turn *off* XSAVE YMM support. Correct the typo. Reported-by: Ben Hutchings Fixes: 553a5c03e90a ("x86/speculation: Add force option to GDS mitigation") Signed-off-by: Dave Hansen Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/gather_data_sampling.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst index 40b7a6260010..264bfa937f7d 100644 --- a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst +++ b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst @@ -63,7 +63,7 @@ GDS can also be mitigated on systems that don't have updated microcode by disabling AVX. This can be done by setting gather_data_sampling="force" or "clearcpuid=avx" on the kernel command-line. -If used, these options will disable AVX use by turning on XSAVE YMM support. +If used, these options will disable AVX use by turning off XSAVE YMM support. However, the processor will still enumerate AVX support. Userspace that does not follow proper AVX enumeration to check both AVX *and* XSAVE YMM support will break. From baf6d6c39e2390ef91bec12d057294dd507d1115 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 Dec 2020 11:09:36 -0600 Subject: [PATCH 26/40] x86/cpu: Add VM page flush MSR availablility as a CPUID feature commit 69372cf01290b9587d2cee8fbe161d75d55c3adc upstream. On systems that do not have hardware enforced cache coherency between encrypted and unencrypted mappings of the same physical page, the hypervisor can use the VM page flush MSR (0xc001011e) to flush the cache contents of an SEV guest page. When a small number of pages are being flushed, this can be used in place of issuing a WBINVD across all CPUs. CPUID 0x8000001f_eax[2] is used to determine if the VM page flush MSR is available. Add a CPUID feature to indicate it is supported and define the MSR. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f68182300799..fe8c66cb0789 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -237,6 +237,7 @@ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83849e37cd20..2bc4fa9ad3e5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -530,6 +530,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 82fe492121bb..9bca00693005 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, { X86_FEATURE_SEV_ES, CPUID_EAX, 3, 0x8000001f, 0 }, { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 }, + { X86_FEATURE_VM_PAGE_FLUSH, CPUID_EAX, 2, 0x8000001f, 0 }, { 0, 0, 0, 0, 0 } }; From 437fa179f2136d349fda78331fd28696e40def9d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 22 Jan 2021 12:40:46 -0800 Subject: [PATCH 27/40] x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX] commit fb35d30fe5b06cc24444f0405da8fbe0be5330d1 upstream. Collect the scattered SME/SEV related feature flags into a dedicated word. There are now five recognized features in CPUID.0x8000001F.EAX, with at least one more on the horizon (SEV-SNP). Using a dedicated word allows KVM to use its automagic CPUID adjustment logic when reporting the set of supported features to userspace. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Reviewed-by: Brijesh Singh Link: https://lkml.kernel.org/r/20210122204047.2860075-2-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 7 +++++-- arch/x86/include/asm/cpufeatures.h | 17 +++++++++++------ arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/scattered.c | 5 ----- tools/arch/x86/include/asm/disabled-features.h | 3 ++- tools/arch/x86/include/asm/required-features.h | 3 ++- 8 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f4cbc01c0bc4..b0f206681fde 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -31,6 +31,7 @@ enum cpuid_leafs CPUID_7_ECX, CPUID_8000_0007_EBX, CPUID_7_EDX, + CPUID_8000_001F_EAX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -89,8 +90,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + BUILD_BUG_ON_ZERO(NCAPINTS != 20)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -112,8 +114,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + BUILD_BUG_ON_ZERO(NCAPINTS != 20)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index fe8c66cb0789..c1058ab69d5c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NCAPINTS 20 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ +/* FREE! ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +/* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ #define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ @@ -211,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ +/* FREE! ( 7*32+20) */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -236,8 +236,6 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -394,6 +392,13 @@ #define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 16c24915210d..da21405ea11c 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -101,6 +101,7 @@ DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define DISABLED_MASK19 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 3ff0d48469f2..b2d504f11937 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -101,6 +101,7 @@ #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define REQUIRED_MASK19 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d31fd6a0fbad..28e0147d7915 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -966,6 +966,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x8000000a) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); + if (c->extended_cpuid_level >= 0x8000001f) + c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f); + init_scattered_cpuid_features(c); init_speculation_control(c); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 9bca00693005..f1cd1b6fb99e 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -41,11 +41,6 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, - { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 }, - { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, - { X86_FEATURE_SEV_ES, CPUID_EAX, 3, 0x8000001f, 0 }, - { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 }, - { X86_FEATURE_VM_PAGE_FLUSH, CPUID_EAX, 2, 0x8000001f, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index d109c5ec967c..a50f075244b4 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -104,6 +104,7 @@ DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define DISABLED_MASK19 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 3ff0d48469f2..b2d504f11937 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -101,6 +101,7 @@ #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) +#define REQUIRED_MASK19 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ From 9b7fe7c6fbc007564f97805ff45882e79f0c70d0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:16:17 -0300 Subject: [PATCH 28/40] tools headers cpufeatures: Sync with the kernel sources commit 1a9bcadd0058a3e81c1beca48e5e08dee9446a01 upstream. To pick the changes from: 3b9c723ed7cfa4e1 ("KVM: SVM: Add support for SVM instruction address check change") b85a0425d8056f3b ("Enumerate AVX Vector Neural Network instructions") fb35d30fe5b06cc2 ("x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX]") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Borislav Petkov Cc: Kyung Min Park Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Wei Huang Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/arch/x86/include/asm/cpufeatures.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index ec53f52a06a5..d06ed44c4827 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NCAPINTS 20 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ +/* FREE! ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +/* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ #define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ @@ -211,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ +/* FREE! ( 7*32+20) */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -236,7 +236,6 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -299,6 +298,7 @@ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ @@ -343,6 +343,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ @@ -389,6 +390,13 @@ #define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ + /* * BUG word(s) */ From 073a28a9b50662991e7d6956c2cf2fc5d54f28cd Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 8 Jul 2023 10:21:35 +0200 Subject: [PATCH 29/40] x86/bugs: Increase the x86 bugs vector size to two u32s Upstream commit: 0e52740ffd10c6c316837c6c128f460f1aaba1ea There was never a doubt in my mind that they would not fit into a single u32 eventually. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c1058ab69d5c..29c05123b40a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -14,7 +14,7 @@ * Defines x86 CPU feature bits */ #define NCAPINTS 20 /* N 32-bit words worth of info */ -#define NBUGINTS 1 /* N 32-bit bug flags */ +#define NBUGINTS 2 /* N 32-bit bug flags */ /* * Note: If the comment begins with a quoted string, that string is used diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d06ed44c4827..2ae4d74ee73b 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -14,7 +14,7 @@ * Defines x86 CPU feature bits */ #define NCAPINTS 20 /* N 32-bit words worth of info */ -#define NBUGINTS 1 /* N 32-bit bug flags */ +#define NBUGINTS 2 /* N 32-bit bug flags */ /* * Note: If the comment begins with a quoted string, that string is used From 34f23ba8a399ecd38b45c84da257b91d278e88aa Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 10 Jan 2023 16:46:37 -0600 Subject: [PATCH 30/40] x86/cpu, kvm: Add support for CPUID_80000021_EAX commit 8415a74852d7c24795007ee9862d25feb519007c upstream. Add support for CPUID leaf 80000021, EAX. The majority of the features will be used in the kernel and thus a separate leaf is appropriate. Include KVM's reverse_cpuid entry because features are used by VM guests, too. [ bp: Massage commit message. ] Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Acked-by: Sean Christopherson Link: https://lore.kernel.org/r/20230124163319.2277355-2-kim.phillips@amd.com [bwh: Backported to 6.1: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 7 +++++-- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kvm/cpuid.h | 1 + 6 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index b0f206681fde..cc3f62f5d551 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -32,6 +32,7 @@ enum cpuid_leafs CPUID_8000_0007_EBX, CPUID_7_EDX, CPUID_8000_001F_EAX, + CPUID_8000_0021_EAX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -91,8 +92,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 20)) + BUILD_BUG_ON_ZERO(NCAPINTS != 21)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -115,8 +117,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 20)) + BUILD_BUG_ON_ZERO(NCAPINTS != 21)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 29c05123b40a..074860357896 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 20 /* N 32-bit words worth of info */ +#define NCAPINTS 21 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index da21405ea11c..3c24378e67be 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -102,6 +102,7 @@ #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 #define DISABLED_MASK19 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) +#define DISABLED_MASK20 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index b2d504f11937..9bf60a8b9e9c 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -102,6 +102,7 @@ #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) +#define REQUIRED_MASK20 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 28e0147d7915..7f92145913e5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -969,6 +969,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x8000001f) c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f); + if (c->extended_cpuid_level >= 0x80000021) + c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021); + init_scattered_cpuid_features(c); init_speculation_control(c); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index dc921d76e42e..1ba9313d26b9 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -63,6 +63,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, + [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, }; /* From 3f9b7101bea1dcb63410c016ceb266f6e9f733c9 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 28 Jun 2023 11:02:39 +0200 Subject: [PATCH 31/40] x86/srso: Add a Speculative RAS Overflow mitigation Upstream commit: fb3bd914b3ec28f5fb697ac55c4846ac2d542855 Add a mitigation for the speculative return address stack overflow vulnerability found on AMD processors. The mitigation works by ensuring all RET instructions speculate to a controlled location, similar to how speculation is controlled in the retpoline sequence. To accomplish this, the __x86_return_thunk forces the CPU to mispredict every function return using a 'safe return' sequence. To ensure the safety of this mitigation, the kernel must ensure that the safe return sequence is itself free from attacker interference. In Zen3 and Zen4, this is accomplished by creating a BTB alias between the untraining function srso_untrain_ret_alias() and the safe return function srso_safe_ret_alias() which results in evicting a potentially poisoned BTB entry and using that safe one for all function returns. In older Zen1 and Zen2, this is accomplished using a reinterpretation technique similar to Retbleed one: srso_untrain_ret() and srso_safe_ret(). Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/index.rst | 1 + Documentation/admin-guide/hw-vuln/srso.rst | 133 ++++++++++++++++++ .../admin-guide/kernel-parameters.txt | 11 ++ arch/x86/Kconfig | 7 + arch/x86/include/asm/cpufeatures.h | 5 + arch/x86/include/asm/nospec-branch.h | 9 +- arch/x86/include/asm/processor.h | 2 + arch/x86/kernel/alternative.c | 4 +- arch/x86/kernel/cpu/amd.c | 14 ++ arch/x86/kernel/cpu/bugs.c | 106 ++++++++++++++ arch/x86/kernel/cpu/common.c | 8 +- arch/x86/kernel/vmlinux.lds.S | 32 ++++- arch/x86/lib/retpoline.S | 81 ++++++++++- drivers/base/cpu.c | 8 ++ include/linux/cpu.h | 2 + tools/objtool/arch/x86/decode.c | 5 +- 16 files changed, 419 insertions(+), 9 deletions(-) create mode 100644 Documentation/admin-guide/hw-vuln/srso.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 245468b0f2be..84742be223ff 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -17,3 +17,4 @@ are configurable at compile, boot or run time. special-register-buffer-data-sampling.rst processor_mmio_stale_data.rst gather_data_sampling.rst + srso diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst new file mode 100644 index 000000000000..2f923c805802 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/srso.rst @@ -0,0 +1,133 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Speculative Return Stack Overflow (SRSO) +======================================== + +This is a mitigation for the speculative return stack overflow (SRSO) +vulnerability found on AMD processors. The mechanism is by now the well +known scenario of poisoning CPU functional units - the Branch Target +Buffer (BTB) and Return Address Predictor (RAP) in this case - and then +tricking the elevated privilege domain (the kernel) into leaking +sensitive data. + +AMD CPUs predict RET instructions using a Return Address Predictor (aka +Return Address Stack/Return Stack Buffer). In some cases, a non-architectural +CALL instruction (i.e., an instruction predicted to be a CALL but is +not actually a CALL) can create an entry in the RAP which may be used +to predict the target of a subsequent RET instruction. + +The specific circumstances that lead to this varies by microarchitecture +but the concern is that an attacker can mis-train the CPU BTB to predict +non-architectural CALL instructions in kernel space and use this to +control the speculative target of a subsequent kernel RET, potentially +leading to information disclosure via a speculative side-channel. + +The issue is tracked under CVE-2023-20569. + +Affected processors +------------------- + +AMD Zen, generations 1-4. That is, all families 0x17 and 0x19. Older +processors have not been investigated. + +System information and options +------------------------------ + +First of all, it is required that the latest microcode be loaded for +mitigations to be effective. + +The sysfs file showing SRSO mitigation status is: + + /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow + +The possible values in this file are: + + - 'Not affected' The processor is not vulnerable + + - 'Vulnerable: no microcode' The processor is vulnerable, no + microcode extending IBPB functionality + to address the vulnerability has been + applied. + + - 'Mitigation: microcode' Extended IBPB functionality microcode + patch has been applied. It does not + address User->Kernel and Guest->Host + transitions protection but it does + address User->User and VM->VM attack + vectors. + + (spec_rstack_overflow=microcode) + + - 'Mitigation: safe RET' Software-only mitigation. It complements + the extended IBPB microcode patch + functionality by addressing User->Kernel + and Guest->Host transitions protection. + + Selected by default or by + spec_rstack_overflow=safe-ret + + - 'Mitigation: IBPB' Similar protection as "safe RET" above + but employs an IBPB barrier on privilege + domain crossings (User->Kernel, + Guest->Host). + + (spec_rstack_overflow=ibpb) + + - 'Mitigation: IBPB on VMEXIT' Mitigation addressing the cloud provider + scenario - the Guest->Host transitions + only. + + (spec_rstack_overflow=ibpb-vmexit) + +In order to exploit vulnerability, an attacker needs to: + + - gain local access on the machine + + - break kASLR + + - find gadgets in the running kernel in order to use them in the exploit + + - potentially create and pin an additional workload on the sibling + thread, depending on the microarchitecture (not necessary on fam 0x19) + + - run the exploit + +Considering the performance implications of each mitigation type, the +default one is 'Mitigation: safe RET' which should take care of most +attack vectors, including the local User->Kernel one. + +As always, the user is advised to keep her/his system up-to-date by +applying software updates regularly. + +The default setting will be reevaluated when needed and especially when +new attack vectors appear. + +As one can surmise, 'Mitigation: safe RET' does come at the cost of some +performance depending on the workload. If one trusts her/his userspace +and does not want to suffer the performance impact, one can always +disable the mitigation with spec_rstack_overflow=off. + +Similarly, 'Mitigation: IBPB' is another full mitigation type employing +an indrect branch prediction barrier after having applied the required +microcode patch for one's system. This mitigation comes also at +a performance cost. + +Mitigation: safe RET +-------------------- + +The mitigation works by ensuring all RET instructions speculate to +a controlled location, similar to how speculation is controlled in the +retpoline sequence. To accomplish this, the __x86_return_thunk forces +the CPU to mispredict every function return using a 'safe return' +sequence. + +To ensure the safety of this mitigation, the kernel must ensure that the +safe return sequence is itself free from attacker interference. In Zen3 +and Zen4, this is accomplished by creating a BTB alias between the +untraining function srso_untrain_ret_alias() and the safe return +function srso_safe_ret_alias() which results in evicting a potentially +poisoned BTB entry and using that safe one for all function returns. + +In older Zen1 and Zen2, this is accomplished using a reinterpretation +technique similar to Retbleed one: srso_untrain_ret() and +srso_safe_ret(). diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 074e39deef04..f1f7c068cf65 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5141,6 +5141,17 @@ Not specifying this option is equivalent to spectre_v2_user=auto. + spec_rstack_overflow= + [X86] Control RAS overflow mitigation on AMD Zen CPUs + + off - Disable mitigation + microcode - Enable microcode mitigation only + safe-ret - Enable sw-only safe RET mitigation (default) + ibpb - Enable mitigation by issuing IBPB on + kernel entry + ibpb-vmexit - Issue IBPB only on VMEXIT + (cloud-specific mitigation) + spec_store_bypass_disable= [HW] Control Speculative Store Bypass (SSB) Disable mitigation (Speculative Store Bypass vulnerability) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 25fda0590e5d..6dc670e36393 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2476,6 +2476,13 @@ config CPU_IBRS_ENTRY This mitigates both spectre_v2 and retbleed at great cost to performance. +config CPU_SRSO + bool "Mitigate speculative RAS overflow on AMD" + depends on CPU_SUP_AMD && X86_64 && RETHUNK + default y + help + Enable the SRSO mitigation needed on AMD Zen1-4 machines. + config SLS bool "Mitigate Straight-Line-Speculation" depends on CC_HAS_SLS && X86_64 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 074860357896..f54207b90cad 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -301,6 +301,9 @@ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+18) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -441,4 +444,6 @@ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_GDS X86_BUG(29) /* CPU is affected by Gather Data Sampling */ +/* BUG word 2 */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f14cdf951249..e882fea99e13 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -112,7 +112,7 @@ * eventually turn into it's own annotation. */ .macro ANNOTATE_UNRET_END -#ifdef CONFIG_DEBUG_ENTRY +#if (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) ANNOTATE_RETPOLINE_SAFE nop #endif @@ -179,6 +179,11 @@ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif + +#ifdef CONFIG_CPU_SRSO + ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \ + "call srso_untrain_ret_alias", X86_FEATURE_SRSO_ALIAS +#endif .endm #else /* __ASSEMBLY__ */ @@ -191,6 +196,8 @@ extern void __x86_return_thunk(void); extern void zen_untrain_ret(void); +extern void srso_untrain_ret(void); +extern void srso_untrain_ret_alias(void); extern void entry_ibpb(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 60514502ead6..6a9de5b1fe45 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -808,9 +808,11 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow); #ifdef CONFIG_CPU_SUP_AMD extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); +extern bool cpu_has_ibpb_brtype_microcode(void); #else static inline u16 amd_get_nb_id(int cpu) { return 0; } static inline u32 amd_get_nodes_per_socket(void) { return 0; } +static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } #endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e6e63a9d27cb..50b4047991ae 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -678,7 +678,9 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_feature_enabled(X86_FEATURE_RETHUNK) || + cpu_feature_enabled(X86_FEATURE_SRSO) || + cpu_feature_enabled(X86_FEATURE_SRSO_ALIAS)) return -1; bytes[i++] = RET_INSN_OPCODE; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3d99a823ffac..a5239ecbc2c6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1282,6 +1282,20 @@ void set_dr_addr_mask(unsigned long mask, int dr) } } +bool cpu_has_ibpb_brtype_microcode(void) +{ + u8 fam = boot_cpu_data.x86; + + if (fam == 0x17) { + /* Zen1/2 IBPB flushes branch type predictions too. */ + return boot_cpu_has(X86_FEATURE_AMD_IBPB); + } else if (fam == 0x19) { + return false; + } + + return false; +} + static void zenbleed_check_cpu(void *unused) { struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 21f4bdbcfb4b..e41b271abbda 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -46,6 +46,7 @@ static void __init taa_select_mitigation(void); static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init gds_select_mitigation(void); +static void __init srso_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -152,6 +153,7 @@ void __init cpu_select_mitigations(void) md_clear_select_mitigation(); srbds_select_mitigation(); gds_select_mitigation(); + srso_select_mitigation(); } /* @@ -2233,6 +2235,95 @@ static int __init l1tf_cmdline(char *str) } early_param("l1tf", l1tf_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "Speculative Return Stack Overflow: " fmt + +enum srso_mitigation { + SRSO_MITIGATION_NONE, + SRSO_MITIGATION_MICROCODE, + SRSO_MITIGATION_SAFE_RET, +}; + +enum srso_mitigation_cmd { + SRSO_CMD_OFF, + SRSO_CMD_MICROCODE, + SRSO_CMD_SAFE_RET, +}; + +static const char * const srso_strings[] = { + [SRSO_MITIGATION_NONE] = "Vulnerable", + [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode", + [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET", +}; + +static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE; +static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET; + +static int __init srso_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + srso_cmd = SRSO_CMD_OFF; + else if (!strcmp(str, "microcode")) + srso_cmd = SRSO_CMD_MICROCODE; + else if (!strcmp(str, "safe-ret")) + srso_cmd = SRSO_CMD_SAFE_RET; + else + pr_err("Ignoring unknown SRSO option (%s).", str); + + return 0; +} +early_param("spec_rstack_overflow", srso_parse_cmdline); + +#define SRSO_NOTICE "WARNING: See https://kernel.org/doc/html/latest/admin-guide/hw-vuln/srso.html for mitigation options." + +static void __init srso_select_mitigation(void) +{ + bool has_microcode; + + if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) + return; + + has_microcode = cpu_has_ibpb_brtype_microcode(); + if (!has_microcode) { + pr_warn("IBPB-extending microcode not applied!\n"); + pr_warn(SRSO_NOTICE); + } + + switch (srso_cmd) { + case SRSO_CMD_OFF: + return; + + case SRSO_CMD_MICROCODE: + if (has_microcode) { + srso_mitigation = SRSO_MITIGATION_MICROCODE; + pr_warn(SRSO_NOTICE); + } + break; + + case SRSO_CMD_SAFE_RET: + if (IS_ENABLED(CONFIG_CPU_SRSO)) { + if (boot_cpu_data.x86 == 0x19) + setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); + else + setup_force_cpu_cap(X86_FEATURE_SRSO); + srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else { + pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + return; + } + break; + + default: + break; + + } + + pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode")); +} + #undef pr_fmt #define pr_fmt(fmt) fmt @@ -2436,6 +2527,13 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t srso_show_state(char *buf) +{ + return sysfs_emit(buf, "%s%s\n", + srso_strings[srso_mitigation], + (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode")); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2488,6 +2586,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_GDS: return gds_show_state(buf); + case X86_BUG_SRSO: + return srso_show_state(buf); + default: break; } @@ -2557,4 +2658,9 @@ ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *bu { return cpu_show_common(dev, attr, buf, X86_BUG_GDS); } + +ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7f92145913e5..a924608ccf04 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1135,6 +1135,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define RETBLEED BIT(3) /* CPU is affected by GDS */ #define GDS BIT(4) +/* CPU is affected by SRSO */ +#define SRSO BIT(5) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1168,8 +1170,9 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED), + VULNBL_AMD(0x17, RETBLEED | SRSO), VULNBL_HYGON(0x18, RETBLEED), + VULNBL_AMD(0x19, SRSO), {} }; @@ -1297,6 +1300,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); + if (cpu_matches(cpu_vuln_blacklist, SRSO)) + setup_force_cpu_bug(X86_BUG_SRSO); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a21cd2381fa8..4955cb5cc001 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -133,7 +133,20 @@ SECTIONS LOCK_TEXT KPROBES_TEXT ALIGN_ENTRY_TEXT_BEGIN +#ifdef CONFIG_CPU_SRSO + *(.text.__x86.rethunk_untrain) +#endif + ENTRY_TEXT + +#ifdef CONFIG_CPU_SRSO + /* + * See the comment above srso_untrain_ret_alias()'s + * definition. + */ + . = srso_untrain_ret_alias | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20); + *(.text.__x86.rethunk_safe) +#endif ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT STATIC_CALL_TEXT @@ -142,13 +155,15 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.*) + *(.text.__x86.indirect_thunk) + *(.text.__x86.return_thunk) __indirect_thunk_end = .; #endif } :text =0xcccc /* End of text section, which should occupy whole number of pages */ _etext = .; + . = ALIGN(PAGE_SIZE); X86_ALIGN_RODATA_BEGIN @@ -502,6 +517,21 @@ INIT_PER_CPU(irq_stack_backing_store); "fixed_percpu_data is not at start of per-cpu area"); #endif +#ifdef CONFIG_RETHUNK +. = ASSERT((__ret & 0x3f) == 0, "__ret not cacheline-aligned"); +. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); +#endif + +#ifdef CONFIG_CPU_SRSO +/* + * GNU ld cannot do XOR so do: (A | B) - (A & B) in order to compute the XOR + * of the two function addresses: + */ +. = ASSERT(((srso_untrain_ret_alias | srso_safe_ret_alias) - + (srso_untrain_ret_alias & srso_safe_ret_alias)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)), + "SRSO function pair won't alias"); +#endif + #endif /* CONFIG_X86_32 */ #ifdef CONFIG_KEXEC_CORE diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 1221bb099afb..5f7eed97487e 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -9,6 +9,7 @@ #include #include #include +#include .section .text.__x86.indirect_thunk @@ -73,6 +74,45 @@ SYM_CODE_END(__x86_indirect_thunk_array) */ #ifdef CONFIG_RETHUNK +/* + * srso_untrain_ret_alias() and srso_safe_ret_alias() are placed at + * special addresses: + * + * - srso_untrain_ret_alias() is 2M aligned + * - srso_safe_ret_alias() is also in the same 2M page but bits 2, 8, 14 + * and 20 in its virtual address are set (while those bits in the + * srso_untrain_ret_alias() function are cleared). + * + * This guarantees that those two addresses will alias in the branch + * target buffer of Zen3/4 generations, leading to any potential + * poisoned entries at that BTB slot to get evicted. + * + * As a result, srso_safe_ret_alias() becomes a safe return. + */ +#ifdef CONFIG_CPU_SRSO + .section .text.__x86.rethunk_untrain + +SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) + ASM_NOP2 + lfence + jmp __x86_return_thunk +SYM_FUNC_END(srso_untrain_ret_alias) +__EXPORT_THUNK(srso_untrain_ret_alias) + + .section .text.__x86.rethunk_safe +#endif + +/* Needs a definition for the __x86_return_thunk alternative below. */ +SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) +#ifdef CONFIG_CPU_SRSO + add $8, %_ASM_SP + UNWIND_HINT_FUNC +#endif + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_safe_ret_alias) + .section .text.__x86.return_thunk /* @@ -85,7 +125,7 @@ SYM_CODE_END(__x86_indirect_thunk_array) * from re-poisioning the BTB prediction. */ .align 64 - .skip 63, 0xcc + .skip 64 - (__ret - zen_untrain_ret), 0xcc SYM_FUNC_START_NOALIGN(zen_untrain_ret); /* @@ -117,10 +157,10 @@ SYM_FUNC_START_NOALIGN(zen_untrain_ret); * evicted, __x86_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) +SYM_INNER_LABEL(__ret, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(__x86_return_thunk) +SYM_CODE_END(__ret) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -131,11 +171,44 @@ SYM_CODE_END(__x86_return_thunk) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp __x86_return_thunk + jmp __ret int3 SYM_FUNC_END(zen_untrain_ret) __EXPORT_THUNK(zen_untrain_ret) +/* + * SRSO untraining sequence for Zen1/2, similar to zen_untrain_ret() + * above. On kernel entry, srso_untrain_ret() is executed which is a + * + * movabs $0xccccccc308c48348,%rax + * + * and when the return thunk executes the inner label srso_safe_ret() + * later, it is a stack manipulation and a RET which is mispredicted and + * thus a "safe" one to use. + */ + .align 64 + .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc +SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + .byte 0x48, 0xb8 + +SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) + add $8, %_ASM_SP + ret + int3 + int3 + int3 + lfence + call srso_safe_ret + int3 +SYM_CODE_END(srso_safe_ret) +SYM_FUNC_END(srso_untrain_ret) +__EXPORT_THUNK(srso_untrain_ret) + +SYM_FUNC_START(__x86_return_thunk) + ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \ + "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS + int3 +SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) #endif /* CONFIG_RETHUNK */ diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 0cb8ec80ddfd..2db1e0e8c1a7 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -585,6 +585,12 @@ ssize_t __weak cpu_show_gds(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -597,6 +603,7 @@ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); +static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -611,6 +618,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_mmio_stale_data.attr, &dev_attr_retbleed.attr, &dev_attr_gather_data_sampling.attr, + &dev_attr_spec_rstack_overflow.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index ef084a2fc6d5..358905a85993 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -70,6 +70,8 @@ extern ssize_t cpu_show_mmio_stale_data(struct device *dev, char *buf); extern ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index d8f47704fd85..5b915ebb6116 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -652,5 +652,8 @@ bool arch_is_retpoline(struct symbol *sym) bool arch_is_rethunk(struct symbol *sym) { - return !strcmp(sym->name, "__x86_return_thunk"); + return !strcmp(sym->name, "__x86_return_thunk") || + !strcmp(sym->name, "srso_untrain_ret") || + !strcmp(sym->name, "srso_safe_ret") || + !strcmp(sym->name, "__ret"); } From df76a59feba549825f426cb1586bfa86b49c08fa Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 18 Jul 2023 11:13:40 +0200 Subject: [PATCH 32/40] x86/srso: Add IBPB_BRTYPE support Upstream commit: 79113e4060aba744787a81edb9014f2865193854 Add support for the synthetic CPUID flag which "if this bit is 1, it indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes all branch type predictions from the CPU branch predictor." This flag is there so that this capability in guests can be detected easily (otherwise one would have to track microcode revisions which is impossible for guests). It is also needed only for Zen3 and -4. The other two (Zen1 and -2) always flush branch type predictions by default. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f54207b90cad..2c43af9e836c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -402,6 +402,8 @@ #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ + /* * BUG word(s) */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e41b271abbda..90a91ef0fc64 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2286,10 +2286,20 @@ static void __init srso_select_mitigation(void) if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) return; - has_microcode = cpu_has_ibpb_brtype_microcode(); + /* + * The first check is for the kernel running as a guest in order + * for guests to verify whether IBPB is a viable mitigation. + */ + has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode(); if (!has_microcode) { pr_warn("IBPB-extending microcode not applied!\n"); pr_warn(SRSO_NOTICE); + } else { + /* + * Enable the synthetic (even if in a real CPUID leaf) + * flag for guests. + */ + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); } switch (srso_cmd) { From e47af0c255aed7da91202f26250558a8e34e1c26 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 29 Jun 2023 17:43:40 +0200 Subject: [PATCH 33/40] x86/srso: Add SRSO_NO support Upstream commit: 1b5277c0ea0b247393a9c426769fde18cff5e2f6 Add support for the CPUID flag which denotes that the CPU is not affected by SRSO. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/nospec-branch.h | 6 +++--- arch/x86/kernel/cpu/amd.c | 12 ++++++------ arch/x86/kernel/cpu/bugs.c | 24 ++++++++++++++++++++---- arch/x86/kernel/cpu/common.c | 6 ++++-- arch/x86/kvm/cpuid.c | 3 +++ 7 files changed, 39 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2c43af9e836c..b9f921a86db3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -402,7 +402,9 @@ #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2bc4fa9ad3e5..202a52e42a36 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -60,6 +60,7 @@ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e882fea99e13..84e944a23bab 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -307,11 +307,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } +extern u64 x86_pred_cmd; + static inline void indirect_branch_prediction_barrier(void) { - u64 val = PRED_CMD_IBPB; - - alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); + alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB); } /* The Intel SPEC CTRL MSR base value cache */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a5239ecbc2c6..8c257db23d90 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1286,14 +1286,14 @@ bool cpu_has_ibpb_brtype_microcode(void) { u8 fam = boot_cpu_data.x86; - if (fam == 0x17) { - /* Zen1/2 IBPB flushes branch type predictions too. */ + /* Zen1/2 IBPB flushes branch type predictions too. */ + if (fam == 0x17) return boot_cpu_has(X86_FEATURE_AMD_IBPB); - } else if (fam == 0x19) { + /* Poke the MSR bit on Zen3/4 to check its presence. */ + else if (fam == 0x19) + return !wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB); + else return false; - } - - return false; } static void zenbleed_check_cpu(void *unused) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 90a91ef0fc64..7bb6750fbd75 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); +u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; +EXPORT_SYMBOL_GPL(x86_pred_cmd); + static DEFINE_MUTEX(spec_ctrl_mutex); /* Update SPEC_CTRL MSR and its cached copy unconditionally */ @@ -2284,7 +2287,7 @@ static void __init srso_select_mitigation(void) bool has_microcode; if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) - return; + goto pred_cmd; /* * The first check is for the kernel running as a guest in order @@ -2297,9 +2300,18 @@ static void __init srso_select_mitigation(void) } else { /* * Enable the synthetic (even if in a real CPUID leaf) - * flag for guests. + * flags for guests. */ setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + setup_force_cpu_cap(X86_FEATURE_SBPB); + + /* + * Zen1/2 with SMT off aren't vulnerable after the right + * IBPB microcode has been applied. + */ + if ((boot_cpu_data.x86 < 0x19) && + (cpu_smt_control == CPU_SMT_DISABLED)) + setup_force_cpu_cap(X86_FEATURE_SRSO_NO); } switch (srso_cmd) { @@ -2322,16 +2334,20 @@ static void __init srso_select_mitigation(void) srso_mitigation = SRSO_MITIGATION_SAFE_RET; } else { pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); - return; + goto pred_cmd; } break; default: break; - } pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode")); + +pred_cmd: + if (boot_cpu_has(X86_FEATURE_SRSO_NO) || + srso_cmd == SRSO_CMD_OFF) + x86_pred_cmd = PRED_CMD_SBPB; } #undef pr_fmt diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a924608ccf04..f437efbb7e76 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1300,8 +1300,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); - if (cpu_matches(cpu_vuln_blacklist, SRSO)) - setup_force_cpu_bug(X86_BUG_SRSO); + if (!cpu_has(c, X86_FEATURE_SRSO_NO)) { + if (cpu_matches(cpu_vuln_blacklist, SRSO)) + setup_force_cpu_bug(X86_BUG_SRSO); + } if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index de4b171cb76b..8b07e48612d7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -491,6 +491,9 @@ void kvm_set_cpu_caps(void) !boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); + if (cpu_feature_enabled(X86_FEATURE_SRSO_NO)) + kvm_cpu_cap_set(X86_FEATURE_SRSO_NO); + /* * Hide all SVM features by default, SVM will set the cap bits for * features it emulates and/or exposes for L1. From 4acaea47e3bcb7cd55cc56c7fd4e5fb60eebdada Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 6 Jul 2023 15:04:35 +0200 Subject: [PATCH 34/40] x86/srso: Add IBPB Upstream commit: 233d6f68b98d480a7c42ebe78c38f79d44741ca9 Add the option to mitigate using IBPB on a kernel entry. Pull in the Retbleed alternative so that the IBPB call from there can be used. Also, if Retbleed mitigation is done using IBPB, the same mitigation can and must be used here. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 3 ++- arch/x86/kernel/cpu/bugs.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 84e944a23bab..99fbce2c1c7c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -173,7 +173,8 @@ * where we have a stack but before any RET instruction. */ .macro UNTRAIN_RET -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_CPU_SRSO) ANNOTATE_UNRET_END ALTERNATIVE_2 "", \ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7bb6750fbd75..3a996197dc46 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2245,18 +2245,21 @@ enum srso_mitigation { SRSO_MITIGATION_NONE, SRSO_MITIGATION_MICROCODE, SRSO_MITIGATION_SAFE_RET, + SRSO_MITIGATION_IBPB, }; enum srso_mitigation_cmd { SRSO_CMD_OFF, SRSO_CMD_MICROCODE, SRSO_CMD_SAFE_RET, + SRSO_CMD_IBPB, }; static const char * const srso_strings[] = { [SRSO_MITIGATION_NONE] = "Vulnerable", [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode", [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET", + [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", }; static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE; @@ -2273,6 +2276,8 @@ static int __init srso_parse_cmdline(char *str) srso_cmd = SRSO_CMD_MICROCODE; else if (!strcmp(str, "safe-ret")) srso_cmd = SRSO_CMD_SAFE_RET; + else if (!strcmp(str, "ibpb")) + srso_cmd = SRSO_CMD_IBPB; else pr_err("Ignoring unknown SRSO option (%s).", str); @@ -2314,6 +2319,14 @@ static void __init srso_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_SRSO_NO); } + if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { + if (has_microcode) { + pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n"); + srso_mitigation = SRSO_MITIGATION_IBPB; + goto pred_cmd; + } + } + switch (srso_cmd) { case SRSO_CMD_OFF: return; @@ -2338,6 +2351,16 @@ static void __init srso_select_mitigation(void) } break; + case SRSO_CMD_IBPB: + if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (has_microcode) { + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + srso_mitigation = SRSO_MITIGATION_IBPB; + } + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto pred_cmd; + } default: break; } From 384d41bea948a18288aff668b7bdf3b522b7bf73 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 7 Jul 2023 13:53:41 +0200 Subject: [PATCH 35/40] x86/srso: Add IBPB on VMEXIT Upstream commit: d893832d0e1ef41c72cdae444268c1d64a2be8ad Add the option to flush IBPB only on VMEXIT in order to protect from malicious guests but one otherwise trusts the software that runs on the hypervisor. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 19 +++++++++++++++++++ arch/x86/kvm/svm/svm.c | 4 +++- arch/x86/kvm/svm/vmenter.S | 3 +++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b9f921a86db3..2c0c495b9cb6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -303,6 +303,7 @@ #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3a996197dc46..fea905305394 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2246,6 +2246,7 @@ enum srso_mitigation { SRSO_MITIGATION_MICROCODE, SRSO_MITIGATION_SAFE_RET, SRSO_MITIGATION_IBPB, + SRSO_MITIGATION_IBPB_ON_VMEXIT, }; enum srso_mitigation_cmd { @@ -2253,6 +2254,7 @@ enum srso_mitigation_cmd { SRSO_CMD_MICROCODE, SRSO_CMD_SAFE_RET, SRSO_CMD_IBPB, + SRSO_CMD_IBPB_ON_VMEXIT, }; static const char * const srso_strings[] = { @@ -2260,6 +2262,7 @@ static const char * const srso_strings[] = { [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode", [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET", [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", + [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only" }; static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE; @@ -2278,6 +2281,8 @@ static int __init srso_parse_cmdline(char *str) srso_cmd = SRSO_CMD_SAFE_RET; else if (!strcmp(str, "ibpb")) srso_cmd = SRSO_CMD_IBPB; + else if (!strcmp(str, "ibpb-vmexit")) + srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT; else pr_err("Ignoring unknown SRSO option (%s).", str); @@ -2361,6 +2366,20 @@ static void __init srso_select_mitigation(void) pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); goto pred_cmd; } + break; + + case SRSO_CMD_IBPB_ON_VMEXIT: + if (IS_ENABLED(CONFIG_CPU_SRSO)) { + if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; + } + } else { + pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + goto pred_cmd; + } + break; + default: break; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7b2b61309d8a..8544bca6b335 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1392,7 +1392,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (sd->current_vmcb != svm->vmcb) { sd->current_vmcb = svm->vmcb; - indirect_branch_prediction_barrier(); + + if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT)) + indirect_branch_prediction_barrier(); } avic_vcpu_load(vcpu, cpu); } diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index c18d812d00cd..a8859c173258 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -137,6 +137,9 @@ SYM_FUNC_START(__svm_vcpu_run) */ UNTRAIN_RET + /* SRSO */ + ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT + /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded From 4873939c0e1cec2fd04a38ddf2c03a05e4eeb7ef Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 28 Jul 2023 17:28:43 -0500 Subject: [PATCH 36/40] x86/srso: Fix return thunks in generated code Upstream commit: 238ec850b95a02dcdff3edc86781aa913549282f Set X86_FEATURE_RETHUNK when enabling the SRSO mitigation so that generated code (e.g., ftrace, static call, eBPF) generates "jmp __x86_return_thunk" instead of RET. [ bp: Add a comment. ] Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 4 +--- arch/x86/kernel/cpu/bugs.c | 6 ++++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 50b4047991ae..e6e63a9d27cb 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -678,9 +678,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK) || - cpu_feature_enabled(X86_FEATURE_SRSO) || - cpu_feature_enabled(X86_FEATURE_SRSO_ALIAS)) + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) return -1; bytes[i++] = RET_INSN_OPCODE; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fea905305394..99bca21a3e62 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2345,6 +2345,12 @@ static void __init srso_select_mitigation(void) case SRSO_CMD_SAFE_RET: if (IS_ENABLED(CONFIG_CPU_SRSO)) { + /* + * Enable the return thunk for generated code + * like ftrace, static_call, etc. + */ + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + if (boot_cpu_data.x86 == 0x19) setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); else From 8457fb5740b14311a8941044ff4eb5a3945de9b2 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 7 Aug 2023 10:46:04 +0200 Subject: [PATCH 37/40] x86/srso: Tie SBPB bit setting to microcode patch detection commit 5a15d8348881e9371afdf9f5357a135489496955 upstream. The SBPB bit in MSR_IA32_PRED_CMD is supported only after a microcode patch has been applied so set X86_FEATURE_SBPB only then. Otherwise, guests would attempt to set that bit and #GP on the MSR write. While at it, make SMT detection more robust as some guests - depending on how and what CPUID leafs their report - lead to cpu_smt_control getting set to CPU_SMT_NOT_SUPPORTED but SRSO_NO should be set for any guest incarnation where one simply cannot do SMT, for whatever reason. Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Reported-by: Konrad Rzeszutek Wilk Reported-by: Salvatore Bonaccorso Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 19 ++++++++++++------- arch/x86/kernel/cpu/bugs.c | 7 +++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8c257db23d90..ffecf4e9444e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1284,16 +1284,21 @@ void set_dr_addr_mask(unsigned long mask, int dr) bool cpu_has_ibpb_brtype_microcode(void) { - u8 fam = boot_cpu_data.x86; - + switch (boot_cpu_data.x86) { /* Zen1/2 IBPB flushes branch type predictions too. */ - if (fam == 0x17) + case 0x17: return boot_cpu_has(X86_FEATURE_AMD_IBPB); - /* Poke the MSR bit on Zen3/4 to check its presence. */ - else if (fam == 0x19) - return !wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB); - else + case 0x19: + /* Poke the MSR bit on Zen3/4 to check its presence. */ + if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { + setup_force_cpu_cap(X86_FEATURE_SBPB); + return true; + } else { + return false; + } + default: return false; + } } static void zenbleed_check_cpu(void *unused) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 99bca21a3e62..d31639e3ce28 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2313,14 +2313,13 @@ static void __init srso_select_mitigation(void) * flags for guests. */ setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); - setup_force_cpu_cap(X86_FEATURE_SBPB); /* * Zen1/2 with SMT off aren't vulnerable after the right * IBPB microcode has been applied. */ if ((boot_cpu_data.x86 < 0x19) && - (cpu_smt_control == CPU_SMT_DISABLED)) + (!cpu_smt_possible() || (cpu_smt_control == CPU_SMT_DISABLED))) setup_force_cpu_cap(X86_FEATURE_SRSO_NO); } @@ -2393,8 +2392,8 @@ static void __init srso_select_mitigation(void) pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode")); pred_cmd: - if (boot_cpu_has(X86_FEATURE_SRSO_NO) || - srso_cmd == SRSO_CMD_OFF) + if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) && + boot_cpu_has(X86_FEATURE_SBPB)) x86_pred_cmd = PRED_CMD_SBPB; } From f9167a2d6b943f30743de6ff8163d1981c34f9a9 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 3 Aug 2023 08:41:22 +0200 Subject: [PATCH 38/40] xen/netback: Fix buffer overrun triggered by unusual packet commit 534fc31d09b706a16d83533e16b5dc855caf7576 upstream. It is possible that a guest can send a packet that contains a head + 18 slots and yet has a len <= XEN_NETBACK_TX_COPY_LEN. This causes nr_slots to underflow in xenvif_get_requests() which then causes the subsequent loop's termination condition to be wrong, causing a buffer overrun of queue->tx_map_ops. Rework the code to account for the extra frag_overflow slots. This is CVE-2023-34319 / XSA-432. Fixes: ad7f402ae4f4 ("xen/netback: Ensure protocol headers don't fall in the non-linear area") Signed-off-by: Ross Lagerwall Reviewed-by: Paul Durrant Reviewed-by: Wei Liu Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/netback.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 379ac9ca60b7..1c366ddf62bc 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -396,7 +396,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; struct xen_netif_tx_request *txp = first; - nr_slots = shinfo->nr_frags + 1; + nr_slots = shinfo->nr_frags + frag_overflow + 1; copy_count(skb) = 0; XENVIF_TX_CB(skb)->split_mask = 0; @@ -462,8 +462,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, } } - for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, gop++) { + for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; + shinfo->nr_frags++, gop++, nr_slots--) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; xenvif_tx_create_map_op(queue, pending_idx, txp, @@ -476,12 +476,12 @@ static void xenvif_get_requests(struct xenvif_queue *queue, txp++; } - if (frag_overflow) { + if (nr_slots > 0) { shinfo = skb_shinfo(nskb); frags = shinfo->frags; - for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow; + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; shinfo->nr_frags++, txp++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; @@ -492,6 +492,11 @@ static void xenvif_get_requests(struct xenvif_queue *queue, } skb_shinfo(skb)->frag_list = nskb; + } else if (nskb) { + /* A frag_list skb was allocated but it is no longer needed + * because enough slots were converted to copy ops above. + */ + kfree_skb(nskb); } (*copy_ops) = cop - queue->tx_copy_ops; From 2ae9a73819a79c68cf9d411d164d3e417a118e44 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 8 Aug 2023 19:20:48 +0200 Subject: [PATCH 39/40] x86: fix backwards merge of GDS/SRSO bit Stable-tree-only change. Due to the way the GDS and SRSO patches flowed into the stable tree, it was a 50% chance that the merge of the which value GDS and SRSO should be. Of course, I lost that bet, and chose the opposite of what Linus chose in commit 64094e7e3118 ("Merge tag 'gds-for-linus-2023-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") Fix this up by switching the values to match what is now in Linus's tree as that is the correct value to mirror. Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f437efbb7e76..c1f236030912 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1133,10 +1133,12 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define MMIO_SBDS BIT(2) /* CPU is affected by RETbleed, speculating where you would not expect it */ #define RETBLEED BIT(3) -/* CPU is affected by GDS */ -#define GDS BIT(4) +/* CPU is affected by SMT (cross-thread) return predictions */ +#define SMT_RSB BIT(4) /* CPU is affected by SRSO */ #define SRSO BIT(5) +/* CPU is affected by GDS */ +#define GDS BIT(6) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), From de5f63612d1631c89e72ecffc089f948392cf24a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 8 Aug 2023 19:57:41 +0200 Subject: [PATCH 40/40] Linux 5.10.189 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7f9e1667d87d..36047436fae3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 188 +SUBLEVEL = 189 EXTRAVERSION = NAME = Dare mighty things