Merge 99ca0edb41 ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux") into android-mainline

Steps on the way to 5.12-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I159994a2fce809f28732e1867240971937b3df31
This commit is contained in:
Greg Kroah-Hartman 2021-03-03 15:31:09 +01:00
commit 3bbd4974bd
116 changed files with 2506 additions and 1464 deletions

View File

@ -373,6 +373,12 @@
arcrimi= [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards
Format: <io>,<irq>,<nodeID>
arm64.nobti [ARM64] Unconditionally disable Branch Target
Identification support
arm64.nopauth [ARM64] Unconditionally disable Pointer Authentication
support
ataflop= [HW,M68k]
atarimouse= [HW,MOUSE] Atari Mouse
@ -945,12 +951,6 @@
causing system reset or hang due to sending
INIT from AP to BSP.
perf_v4_pmi= [X86,INTEL]
Format: <bool>
Disable Intel PMU counter freezing feature.
The feature only exists starting from
Arch Perfmon v4 (Skylake and newer).
disable_ddw [PPC/PSERIES]
Disable Dynamic DMA Window support. Use this
to workaround buggy firmware.
@ -2262,6 +2262,9 @@
kvm-arm.mode=
[KVM,ARM] Select one of KVM/arm64's modes of operation.
nvhe: Standard nVHE-based mode, without support for
protected guests.
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
Not valid if the kernel is running in EL2.

View File

@ -17,7 +17,7 @@ PMU events
----------
The PMU driver registers a single PMU device for the whole interconnect,
see /sys/bus/event_source/devices/arm_cmn. Multi-chip systems may link
see /sys/bus/event_source/devices/arm_cmn_0. Multi-chip systems may link
more than one CMN together via external CCIX links - in this situation,
each mesh counts its own events entirely independently, and additional
PMU devices will be named arm_cmn_{1..n}.

View File

@ -43,6 +43,7 @@ properties:
- arm,cortex-a75-pmu
- arm,cortex-a76-pmu
- arm,cortex-a77-pmu
- arm,cortex-a78-pmu
- arm,neoverse-e1-pmu
- arm,neoverse-n1-pmu
- brcm,vulcan-pmu

View File

@ -58,6 +58,15 @@ these functions (see arch/arm{,64}/include/asm/virt.h):
into place (arm64 only), and jump to the restart address while at HYP/EL2.
This hypercall is not expected to return to its caller.
* ::
x0 = HVC_VHE_RESTART (arm64 only)
Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling
the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU
being off, and VHE not being disabled by any other means (command line
option, for example).
Any other value of r0/x0 triggers a hypervisor-specific handling,
which is not documented here.

View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARCHRANDOM_H
#define _ASM_ARCHRANDOM_H
static inline bool __init smccc_probe_trng(void)
{
return false;
}
#endif /* _ASM_ARCHRANDOM_H */

View File

@ -95,8 +95,10 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
for (i = 0; i < count; i++) {
if (map_ops[i].status)
continue;
set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT);
if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) {
return -ENOMEM;
}
}
return 0;

View File

@ -522,7 +522,7 @@ config ARM64_ERRATUM_1024718
help
This option adds a workaround for ARM Cortex-A55 Erratum 1024718.
Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect
Affected Cortex-A55 cores (all revisions) could cause incorrect
update of the hardware dirty bit when the DBM/AP bits are updated
without a break-before-make. The workaround is to disable the usage
of hardware DBM locally on the affected cores. CPUs not affected by
@ -952,8 +952,9 @@ choice
that is selected here.
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
help
bool "Build big-endian kernel"
depends on !LD_IS_LLD || LLD_VERSION >= 130000
help
Say Y if you plan on running a kernel with a big-endian userspace.
config CPU_LITTLE_ENDIAN
@ -1132,6 +1133,10 @@ config CRASH_DUMP
For more details see Documentation/admin-guide/kdump/kdump.rst
config TRANS_TABLE
def_bool y
depends on HIBERNATION
config XEN_DOM0
def_bool y
depends on XEN

View File

@ -188,10 +188,12 @@ ifeq ($(KBUILD_EXTMOD),)
# this hack.
prepare: vdso_prepare
vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
$(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \
$(build)=arch/arm64/kernel/vdso32 \
include/generated/vdso32-offsets.h)
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso \
include/generated/vdso-offsets.h arch/arm64/kernel/vdso/vdso.so
ifdef CONFIG_COMPAT_VDSO
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 \
include/generated/vdso32-offsets.h arch/arm64/kernel/vdso32/vdso.so
endif
endif
define archhelp

View File

@ -4,10 +4,26 @@
#ifdef CONFIG_ARCH_RANDOM
#include <linux/arm-smccc.h>
#include <linux/bug.h>
#include <linux/kernel.h>
#include <asm/cpufeature.h>
#define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL
extern bool smccc_trng_available;
static inline bool __init smccc_probe_trng(void)
{
struct arm_smccc_res res;
arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_VERSION, &res);
if ((s32)res.a0 < 0)
return false;
return res.a0 >= ARM_SMCCC_TRNG_MIN_VERSION;
}
static inline bool __arm64_rndr(unsigned long *v)
{
bool ok;
@ -38,26 +54,55 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)
static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
struct arm_smccc_res res;
/*
* We prefer the SMCCC call, since its semantics (return actual
* hardware backed entropy) is closer to the idea behind this
* function here than what even the RNDRSS register provides
* (the output of a pseudo RNG freshly seeded by a TRNG).
*/
if (smccc_trng_available) {
arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 64, &res);
if ((int)res.a0 >= 0) {
*v = res.a3;
return true;
}
}
/*
* Only support the generic interface after we have detected
* the system wide capability, avoiding complexity with the
* cpufeature code and with potential scheduling between CPUs
* with and without the feature.
*/
if (!cpus_have_const_cap(ARM64_HAS_RNG))
return false;
if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
return true;
return __arm64_rndr(v);
return false;
}
static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
struct arm_smccc_res res;
unsigned long val;
bool ok = arch_get_random_seed_long(&val);
*v = val;
return ok;
if (smccc_trng_available) {
arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 32, &res);
if ((int)res.a0 >= 0) {
*v = res.a3 & GENMASK(31, 0);
return true;
}
}
if (cpus_have_const_cap(ARM64_HAS_RNG)) {
if (__arm64_rndr(&val)) {
*v = val;
return true;
}
}
return false;
}
static inline bool __init __early_cpu_has_rndr(void)
@ -72,12 +117,29 @@ arch_get_random_seed_long_early(unsigned long *v)
{
WARN_ON(system_state != SYSTEM_BOOTING);
if (!__early_cpu_has_rndr())
return false;
if (smccc_trng_available) {
struct arm_smccc_res res;
return __arm64_rndr(v);
arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 64, &res);
if ((int)res.a0 >= 0) {
*v = res.a3;
return true;
}
}
if (__early_cpu_has_rndr() && __arm64_rndr(v))
return true;
return false;
}
#define arch_get_random_seed_long_early arch_get_random_seed_long_early
#else /* !CONFIG_ARCH_RANDOM */
static inline bool __init smccc_probe_trng(void)
{
return false;
}
#endif /* CONFIG_ARCH_RANDOM */
#endif /* _ASM_ARCHRANDOM_H */

View File

@ -15,10 +15,10 @@
.macro __uaccess_ttbr0_disable, tmp1
mrs \tmp1, ttbr1_el1 // swapper_pg_dir
bic \tmp1, \tmp1, #TTBR_ASID_MASK
sub \tmp1, \tmp1, #PAGE_SIZE // reserved_pg_dir just before swapper_pg_dir
sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
isb
add \tmp1, \tmp1, #PAGE_SIZE
add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET
msr ttbr1_el1, \tmp1 // set reserved ASID
isb
.endm

View File

@ -675,6 +675,23 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endif
.endm
/*
* Set SCTLR_EL1 to the passed value, and invalidate the local icache
* in the process. This is called when setting the MMU on.
*/
.macro set_sctlr_el1, reg
msr sctlr_el1, \reg
isb
/*
* Invalidate the local I-cache so that any instructions fetched
* speculatively from the PoC are discarded, since they may have
* been dynamically patched at the PoU.
*/
ic iallu
dsb nsh
isb
.endm
/*
* Check whether to yield to another runnable task from kernel mode NEON code
* (which runs with preemption disabled).
@ -745,6 +762,22 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.Lyield_out_\@ :
.endm
/*
* Check whether preempt-disabled code should yield as soon as it
* is able. This is the case if re-enabling preemption a single
* time results in a preempt count of zero, and the TIF_NEED_RESCHED
* flag is set. (Note that the latter is stored negated in the
* top word of the thread_info::preempt_count field)
*/
.macro cond_yield, lbl:req, tmp:req
#ifdef CONFIG_PREEMPTION
get_current_task \tmp
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
cbz \tmp, \lbl
#endif
.endm
/*
* This macro emits a program property note section identifying
* architecture features which require special handling, mainly for

View File

@ -30,11 +30,6 @@
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
* VIPT I-cache.
*
* flush_cache_mm(mm)
*
* Clean and invalidate all user space cache entries
* before a change of page tables.
*
* flush_icache_range(start, end)
*
* Ensure coherency between the I-cache and the D-cache in the

View File

@ -63,6 +63,11 @@ struct arm64_ftr_bits {
s64 safe_val; /* safe value for FTR_EXACT features */
};
struct arm64_ftr_override {
u64 val;
u64 mask;
};
/*
* @arm64_ftr_reg - Feature register
* @strict_mask Bits which should match across all CPUs for sanity.
@ -74,6 +79,7 @@ struct arm64_ftr_reg {
u64 user_mask;
u64 sys_val;
u64 user_val;
struct arm64_ftr_override *override;
const struct arm64_ftr_bits *ftr_bits;
};
@ -600,6 +606,7 @@ void __init setup_cpu_features(void);
void check_local_cpu_capabilities(void);
u64 read_sanitised_ftr_reg(u32 id);
u64 __read_sysreg_by_encoding(u32 sys_id);
static inline bool cpu_supports_mixed_endian_el0(void)
{
@ -811,6 +818,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
return 8;
}
extern struct arm64_ftr_override id_aa64mmfr1_override;
extern struct arm64_ftr_override id_aa64pfr1_override;
extern struct arm64_ftr_override id_aa64isar1_override;
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);

View File

@ -32,46 +32,39 @@
* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
* EL2.
*/
.macro __init_el2_timers mode
.ifeqs "\mode", "nvhe"
.macro __init_el2_timers
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
.endif
msr cntvoff_el2, xzr // Clear virtual offset
.endm
.macro __init_el2_debug mode
.macro __init_el2_debug
mrs x1, id_aa64dfr0_el1
sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
cmp x0, #1
b.lt 1f // Skip if no PMU present
b.lt .Lskip_pmu_\@ // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
1:
.Lskip_pmu_\@:
csel x2, xzr, x0, lt // all PMU counters from EL1
/* Statistical profiling */
ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
cbz x0, 3f // Skip if SPE not present
cbz x0, .Lskip_spe_\@ // Skip if SPE not present
.ifeqs "\mode", "nvhe"
mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
cbnz x0, 2f // then permit sampling of physical
cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical
mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
1 << SYS_PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter
2:
.Lskip_spe_el2_\@:
mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
orr x2, x2, x0 // If we don't have VHE, then
// use EL1&0 translation.
.else
orr x2, x2, #MDCR_EL2_TPMS // For VHE, use EL2 translation
// and disable access from EL1
.endif
3:
.Lskip_spe_\@:
msr mdcr_el2, x2 // Configure debug traps
.endm
@ -79,9 +72,9 @@
.macro __init_el2_lor
mrs x1, id_aa64mmfr1_el1
ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
cbz x0, 1f
cbz x0, .Lskip_lor_\@
msr_s SYS_LORC_EL1, xzr
1:
.Lskip_lor_\@:
.endm
/* Stage-2 translation */
@ -93,7 +86,7 @@
.macro __init_el2_gicv3
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
cbz x0, 1f
cbz x0, .Lskip_gicv3_\@
mrs_s x0, SYS_ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
@ -103,7 +96,7 @@
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, 1f // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
1:
.Lskip_gicv3_\@:
.endm
.macro __init_el2_hstr
@ -128,14 +121,14 @@
.macro __init_el2_nvhe_sve
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x1, 1f
cbz x1, .Lskip_sve_\@
bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
1:
.Lskip_sve_\@:
.endm
.macro __init_el2_nvhe_prepare_eret
@ -145,37 +138,24 @@
/**
* Initialize EL2 registers to sane values. This should be called early on all
* cores that were booted in EL2.
* cores that were booted in EL2. Note that everything gets initialised as
* if VHE was not evailable. The kernel context will be upgraded to VHE
* if possible later on in the boot process
*
* Regs: x0, x1 and x2 are clobbered.
*/
.macro init_el2_state mode
.ifnes "\mode", "vhe"
.ifnes "\mode", "nvhe"
.error "Invalid 'mode' argument"
.endif
.endif
.macro init_el2_state
__init_el2_sctlr
__init_el2_timers \mode
__init_el2_debug \mode
__init_el2_timers
__init_el2_debug
__init_el2_lor
__init_el2_stage2
__init_el2_gicv3
__init_el2_hstr
/*
* When VHE is not in use, early init of EL2 needs to be done here.
* When VHE _is_ in use, EL1 will not be used in the host and
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
.ifeqs "\mode", "nvhe"
__init_el2_nvhe_idregs
__init_el2_nvhe_cptr
__init_el2_nvhe_sve
__init_el2_nvhe_prepare_eret
.endif
.endm
#endif /* __ARM_KVM_INIT_H__ */

View File

@ -90,18 +90,19 @@ static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif
#ifdef CONFIG_KEXEC_FILE
#define ARCH_HAS_KIMAGE_ARCH
struct kimage_arch {
void *dtb;
unsigned long dtb_mem;
phys_addr_t dtb_mem;
phys_addr_t kern_reloc;
/* Core ELF header buffer */
void *elf_headers;
unsigned long elf_headers_mem;
unsigned long elf_headers_sz;
};
#ifdef CONFIG_KEXEC_FILE
extern const struct kexec_file_ops kexec_image_ops;
struct kimage;

View File

@ -199,12 +199,6 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
#if defined(GCC_VERSION) && GCC_VERSION < 50000
#define SYM_CONSTRAINT "i"
#else
#define SYM_CONSTRAINT "S"
#endif
/*
* Obtain the PC-relative address of a kernel symbol
* s: symbol
@ -221,7 +215,7 @@ extern u32 __kvm_get_mdcr_el2(void);
typeof(s) *addr; \
asm("adrp %0, %1\n" \
"add %0, %0, :lo12:%1\n" \
: "=r" (addr) : SYM_CONSTRAINT (&s)); \
: "=r" (addr) : "S" (&s)); \
addr; \
})

View File

@ -159,6 +159,18 @@
#define IOREMAP_MAX_ORDER (PMD_SHIFT)
#endif
/*
* Open-coded (swapper_pg_dir - reserved_pg_dir) as this cannot be calculated
* until link time.
*/
#define RESERVED_SWAPPER_OFFSET (PAGE_SIZE)
/*
* Open-coded (swapper_pg_dir - tramp_pg_dir) as this cannot be calculated
* until link time.
*/
#define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE)
#ifndef __ASSEMBLY__
#include <linux/bitops.h>

View File

@ -81,16 +81,15 @@ static inline bool __cpu_uses_extended_idmap_level(void)
}
/*
* Set TCR.T0SZ to its default value (based on VA_BITS)
* Ensure TCR.T0SZ is set to the provided value.
*/
static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
unsigned long tcr;
unsigned long tcr = read_sysreg(tcr_el1);
if (!__cpu_uses_extended_idmap())
if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz)
return;
tcr = read_sysreg(tcr_el1);
tcr &= ~TCR_T0SZ_MASK;
tcr |= t0sz << TCR_T0SZ_OFFSET;
write_sysreg(tcr, tcr_el1);

View File

@ -980,7 +980,17 @@ static inline bool arch_faults_on_old_pte(void)
return !cpu_has_hw_af();
}
#define arch_faults_on_old_pte arch_faults_on_old_pte
#define arch_faults_on_old_pte arch_faults_on_old_pte
/*
* Experimentally, it's cheap to set the access flag in hardware and we
* benefit from prefaulting mappings as 'old' to start with.
*/
static inline bool arch_wants_old_prefaulted_pte(void)
{
return !arch_faults_on_old_pte();
}
#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
#endif /* !__ASSEMBLY__ */

View File

@ -76,6 +76,15 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
return ptrauth_clear_pac(ptr);
}
static __always_inline void ptrauth_enable(void)
{
if (!system_supports_address_auth())
return;
sysreg_clear_set(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |
SCTLR_ELx_ENDA | SCTLR_ELx_ENDB));
isb();
}
#define ptrauth_thread_init_user(tsk) \
ptrauth_keys_init_user(&(tsk)->thread.keys_user)
#define ptrauth_thread_init_kernel(tsk) \
@ -84,6 +93,7 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel)
#else /* CONFIG_ARM64_PTR_AUTH */
#define ptrauth_enable()
#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL)
#define ptrauth_strip_insn_pac(lr) (lr)
#define ptrauth_thread_init_user(tsk)

View File

@ -0,0 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef __ARM64_ASM_SETUP_H
#define __ARM64_ASM_SETUP_H
#include <uapi/asm/setup.h>
void *get_early_fdt_ptr(void);
void early_fdt_map(u64 dt_phys);
#endif

View File

@ -7,7 +7,26 @@
#ifdef CONFIG_SPARSEMEM
#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
#define SECTION_SIZE_BITS 30
#endif
/*
* Section size must be at least 512MB for 64K base
* page size config. Otherwise it will be less than
* (MAX_ORDER - 1) and the build process will fail.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define SECTION_SIZE_BITS 29
#else
/*
* Section size must be at least 128MB for 4K base
* page size config. Otherwise PMD based huge page
* entries could not be created for vmemmap mappings.
* 16K follows 4K for simplicity.
*/
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_ARM64_64K_PAGES */
#endif /* CONFIG_SPARSEMEM*/
#endif

View File

@ -41,6 +41,7 @@ static __always_inline void boot_init_stack_canary(void)
#endif
ptrauth_thread_init_kernel(current);
ptrauth_thread_switch_kernel(current);
ptrauth_enable();
}
#endif /* _ASM_STACKPROTECTOR_H */

View File

@ -291,7 +291,11 @@
#define SYS_PMSFCR_EL1_ST_SHIFT 18
#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL
#define SYS_PMSEVFR_EL1_RES0_8_2 \
(GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\
BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0))
#define SYS_PMSEVFR_EL1_RES0_8_3 \
(SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11)))
#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
@ -844,6 +848,9 @@
#define ID_AA64DFR0_PMUVER_8_5 0x6
#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
#define ID_AA64DFR0_PMSVER_8_2 0x1
#define ID_AA64DFR0_PMSVER_8_3 0x2
#define ID_DFR0_PERFMON_SHIFT 24
#define ID_DFR0_PERFMON_8_1 0x4

View File

@ -0,0 +1,39 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2020, Microsoft Corporation.
* Pavel Tatashin <pasha.tatashin@soleen.com>
*/
#ifndef _ASM_TRANS_TABLE_H
#define _ASM_TRANS_TABLE_H
#include <linux/bits.h>
#include <linux/types.h>
#include <asm/pgtable-types.h>
/*
* trans_alloc_page
* - Allocator that should return exactly one zeroed page, if this
* allocator fails, trans_pgd_create_copy() and trans_pgd_map_page()
* return -ENOMEM error.
*
* trans_alloc_arg
* - Passed to trans_alloc_page as an argument
*/
struct trans_pgd_info {
void * (*trans_alloc_page)(void *arg);
void *trans_alloc_arg;
};
int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd,
unsigned long start, unsigned long end);
int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
void *page, unsigned long dst_addr, pgprot_t pgprot);
int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
unsigned long *t0sz, void *page);
#endif /* _ASM_TRANS_TABLE_H */

View File

@ -87,7 +87,7 @@ static inline void __uaccess_ttbr0_disable(void)
ttbr = read_sysreg(ttbr1_el1);
ttbr &= ~TTBR_ASID_MASK;
/* reserved_pg_dir placed before swapper_pg_dir */
write_sysreg(ttbr - PAGE_SIZE, ttbr0_el1);
write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1);
isb();
/* Set reserved ASID */
write_sysreg(ttbr, ttbr1_el1);

View File

@ -35,8 +35,13 @@
*/
#define HVC_RESET_VECTORS 2
/*
* HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible
*/
#define HVC_VHE_RESTART 3
/* Max number of HYP stub hypercalls */
#define HVC_STUB_HCALL_NR 3
#define HVC_STUB_HCALL_NR 4
/* Error returned when an invalid stub number is passed into x0 */
#define HVC_STUB_ERR 0xbadca11

View File

@ -17,7 +17,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o proton-pack.o
syscall.o proton-pack.o idreg-override.o
targets += efi-entry.o
@ -59,9 +59,10 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
obj-y += probes/
head-y := head.o
extra-y += $(head-y) vmlinux.lds

View File

@ -17,7 +17,7 @@
#include <asm/sections.h>
#include <linux/stop_machine.h>
#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f)
#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)

View File

@ -99,6 +99,9 @@ int main(void)
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask));
BLANK();
#ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));

View File

@ -107,8 +107,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
}
#ifdef CONFIG_ARM64_ERRATUM_1463225
DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
static bool
has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry,
int scope)

View File

@ -352,9 +352,12 @@ static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_END,
};
static struct arm64_ftr_override __ro_after_init no_override = { };
struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
.name = "SYS_CTR_EL0",
.ftr_bits = ftr_ctr
.ftr_bits = ftr_ctr,
.override = &no_override,
};
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
@ -544,13 +547,20 @@ static const struct arm64_ftr_bits ftr_raz[] = {
ARM64_FTR_END,
};
#define ARM64_FTR_REG(id, table) { \
.sys_id = id, \
.reg = &(struct arm64_ftr_reg){ \
.name = #id, \
.ftr_bits = &((table)[0]), \
#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \
.sys_id = id, \
.reg = &(struct arm64_ftr_reg){ \
.name = #id, \
.override = (ovr), \
.ftr_bits = &((table)[0]), \
}}
#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override)
struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
static const struct __ftr_reg_entry {
u32 sys_id;
struct arm64_ftr_reg *reg;
@ -585,7 +595,8 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
&id_aa64pfr1_override),
ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */
@ -594,11 +605,13 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
&id_aa64isar1_override),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
&id_aa64mmfr1_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
/* Op1 = 0, CRn = 1, CRm = 2 */
@ -770,6 +783,33 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
u64 ftr_mask = arm64_ftr_mask(ftrp);
s64 ftr_new = arm64_ftr_value(ftrp, new);
s64 ftr_ovr = arm64_ftr_value(ftrp, reg->override->val);
if ((ftr_mask & reg->override->mask) == ftr_mask) {
s64 tmp = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new);
char *str = NULL;
if (ftr_ovr != tmp) {
/* Unsafe, remove the override */
reg->override->mask &= ~ftr_mask;
reg->override->val &= ~ftr_mask;
tmp = ftr_ovr;
str = "ignoring override";
} else if (ftr_new != tmp) {
/* Override was valid */
ftr_new = tmp;
str = "forced";
} else if (ftr_ovr == tmp) {
/* Override was the safe value */
str = "already set";
}
if (str)
pr_warn("%s[%d:%d]: %s to %llx\n",
reg->name,
ftrp->shift + ftrp->width - 1,
ftrp->shift, str, tmp);
}
val = arm64_ftr_set_value(ftrp, val, ftr_new);
@ -1115,14 +1155,17 @@ u64 read_sanitised_ftr_reg(u32 id)
EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
#define read_sysreg_case(r) \
case r: return read_sysreg_s(r)
case r: val = read_sysreg_s(r); break;
/*
* __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated.
* Read the system register on the current CPU
*/
static u64 __read_sysreg_by_encoding(u32 sys_id)
u64 __read_sysreg_by_encoding(u32 sys_id)
{
struct arm64_ftr_reg *regp;
u64 val;
switch (sys_id) {
read_sysreg_case(SYS_ID_PFR0_EL1);
read_sysreg_case(SYS_ID_PFR1_EL1);
@ -1165,6 +1208,14 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
BUG();
return 0;
}
regp = get_arm64_ftr_reg(sys_id);
if (regp) {
val &= ~regp->override->mask;
val |= (regp->override->val & regp->override->mask);
}
return val;
}
#include <linux/irqchip/arm-gic-v3.h>
@ -1455,7 +1506,7 @@ static bool cpu_has_broken_dbm(void)
/* List of CPUs which have broken DBM support. */
static const struct midr_range cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_1024718
MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0
MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
/* Kryo4xx Silver (rdpe => r1p0) */
MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
#endif

View File

@ -109,6 +109,55 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
exit_to_kernel_mode(regs);
}
#ifdef CONFIG_ARM64_ERRATUM_1463225
static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
static void cortex_a76_erratum_1463225_svc_handler(void)
{
u32 reg, val;
if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
return;
if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225)))
return;
__this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
reg = read_sysreg(mdscr_el1);
val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
write_sysreg(val, mdscr_el1);
asm volatile("msr daifclr, #8");
isb();
/* We will have taken a single-step exception by this point */
write_sysreg(reg, mdscr_el1);
__this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
}
static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{
if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa))
return false;
/*
* We've taken a dummy step exception from the kernel to ensure
* that interrupts are re-enabled on the syscall path. Return back
* to cortex_a76_erratum_1463225_svc_handler() with debug exceptions
* masked so that we can safely restore the mdscr and get on with
* handling the syscall.
*/
regs->pstate |= PSR_D_BIT;
return true;
}
#else /* CONFIG_ARM64_ERRATUM_1463225 */
static void cortex_a76_erratum_1463225_svc_handler(void) { }
static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{
return false;
}
#endif /* CONFIG_ARM64_ERRATUM_1463225 */
static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@ -186,7 +235,8 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
arm64_enter_el1_dbg(regs);
do_debug_exception(far, esr, regs);
if (!cortex_a76_erratum_1463225_debug_handler(regs))
do_debug_exception(far, esr, regs);
arm64_exit_el1_dbg(regs);
}
@ -362,6 +412,7 @@ static void noinstr el0_svc(struct pt_regs *regs)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
enter_from_user_mode();
cortex_a76_erratum_1463225_svc_handler();
do_el0_svc(regs);
}
@ -439,6 +490,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
enter_from_user_mode();
cortex_a76_erratum_1463225_svc_handler();
do_el0_svc_compat(regs);
}

View File

@ -261,16 +261,16 @@ alternative_else_nop_endif
stp lr, x21, [sp, #S_LR]
/*
* In order to be able to dump the contents of struct pt_regs at the
* time the exception was taken (in case we attempt to walk the call
* stack later), chain it together with the stack frames.
* For exceptions from EL0, terminate the callchain here.
* For exceptions from EL1, create a synthetic frame record so the
* interrupted code shows up in the backtrace.
*/
.if \el == 0
stp xzr, xzr, [sp, #S_STACKFRAME]
mov x29, xzr
.else
stp x29, x22, [sp, #S_STACKFRAME]
.endif
add x29, sp, #S_STACKFRAME
.endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
alternative_if_not ARM64_HAS_PAN
@ -805,7 +805,7 @@ SYM_CODE_END(ret_to_user)
// Move from tramp_pg_dir to swapper_pg_dir
.macro tramp_map_kernel, tmp
mrs \tmp, ttbr1_el1
add \tmp, \tmp, #(2 * PAGE_SIZE)
add \tmp, \tmp, #TRAMP_SWAPPER_OFFSET
bic \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
@ -825,7 +825,7 @@ alternative_else_nop_endif
// Move from swapper_pg_dir to tramp_pg_dir
.macro tramp_unmap_kernel, tmp
mrs \tmp, ttbr1_el1
sub \tmp, \tmp, #(2 * PAGE_SIZE)
sub \tmp, \tmp, #TRAMP_SWAPPER_OFFSET
orr \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
/*

View File

@ -404,10 +404,6 @@ SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x5, init_task
msr sp_el0, x5 // Save thread_info
#ifdef CONFIG_ARM64_PTR_AUTH
__ptrauth_keys_init_cpu x5, x6, x7, x8
#endif
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
@ -436,10 +432,12 @@ SYM_FUNC_START_LOCAL(__primary_switched)
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
mov x0, x21 // pass FDT address in x0
bl early_fdt_map // Try mapping the FDT early
bl init_feature_override // Parse cpu feature overrides
#ifdef CONFIG_RANDOMIZE_BASE
tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
b.ne 0f
mov x0, x21 // pass FDT address in x0
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
orr x23, x23, x0 // record KASLR offset
@ -447,6 +445,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
ret // to __primary_switch()
0:
#endif
bl switch_to_vhe // Prefer VHE if possible
add sp, sp, #16
mov x29, #0
mov x30, #0
@ -478,13 +477,14 @@ EXPORT_SYMBOL(kimage_vaddr)
* booted in EL1 or EL2 respectively.
*/
SYM_FUNC_START(init_kernel_el)
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq init_el2
SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
isb
mov_q x0, INIT_PSTATE_EL1
msr spsr_el1, x0
@ -493,50 +493,11 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
eret
SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
#ifdef CONFIG_ARM64_VHE
/*
* Check for VHE being present. x2 being non-zero indicates that we
* do have VHE, and that the kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#else
mov x2, xzr
#endif
cbz x2, init_el2_nvhe
/*
* When VHE _is_ in use, EL1 will not be used in the host and
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
mov_q x0, HCR_HOST_VHE_FLAGS
msr hcr_el2, x0
isb
init_el2_state vhe
isb
mov_q x0, INIT_PSTATE_EL2
msr spsr_el2, x0
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2
eret
SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL)
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
*/
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
mov_q x0, HCR_HOST_NVHE_FLAGS
msr hcr_el2, x0
isb
init_el2_state nvhe
init_el2_state
/* Hypervisor stub */
adr_l x0, __hyp_stub_vectors
@ -623,6 +584,7 @@ SYM_FUNC_START_LOCAL(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
bl switch_to_vhe
bl __cpu_secondary_check52bitva
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
@ -703,16 +665,9 @@ SYM_FUNC_START(__enable_mmu)
offset_ttbr1 x1, x3
msr ttbr1_el1, x1 // load TTBR1
isb
msr sctlr_el1, x0
isb
/*
* Invalidate the local I-cache so that any instructions fetched
* speculatively from the PoC are discarded, since they may have
* been dynamically patched at the PoU.
*/
ic iallu
dsb nsh
isb
set_sctlr_el1 x0
ret
SYM_FUNC_END(__enable_mmu)
@ -883,11 +838,7 @@ SYM_FUNC_START_LOCAL(__primary_switch)
tlbi vmalle1 // Remove any stale TLB entries
dsb nsh
msr sctlr_el1, x19 // re-enable the MMU
isb
ic iallu // flush instructions fetched
dsb nsh // via old mapping
isb
set_sctlr_el1 x19 // re-enable the MMU
bl __relocate_kernel
#endif

View File

@ -16,7 +16,6 @@
#define pr_fmt(x) "hibernate: " x
#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
@ -31,13 +30,12 @@
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/pgalloc.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
#include <asm/trans_pgd.h>
#include <asm/virt.h>
/*
@ -178,52 +176,9 @@ int arch_hibernation_header_restore(void *addr)
}
EXPORT_SYMBOL(arch_hibernation_header_restore);
static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
unsigned long dst_addr,
pgprot_t pgprot)
static void *hibernate_page_alloc(void *arg)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
pudp = (void *)get_safe_page(GFP_ATOMIC);
if (!pudp)
return -ENOMEM;
pgd_populate(&init_mm, pgdp, pudp);
}
p4dp = p4d_offset(pgdp, dst_addr);
if (p4d_none(READ_ONCE(*p4dp))) {
pudp = (void *)get_safe_page(GFP_ATOMIC);
if (!pudp)
return -ENOMEM;
p4d_populate(&init_mm, p4dp, pudp);
}
pudp = pud_offset(p4dp, dst_addr);
if (pud_none(READ_ONCE(*pudp))) {
pmdp = (void *)get_safe_page(GFP_ATOMIC);
if (!pmdp)
return -ENOMEM;
pud_populate(&init_mm, pudp, pmdp);
}
pmdp = pmd_offset(pudp, dst_addr);
if (pmd_none(READ_ONCE(*pmdp))) {
ptep = (void *)get_safe_page(GFP_ATOMIC);
if (!ptep)
return -ENOMEM;
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, dst_addr);
set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
return 0;
return (void *)get_safe_page((__force gfp_t)(unsigned long)arg);
}
/*
@ -239,11 +194,16 @@ static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
* page system.
*/
static int create_safe_exec_page(void *src_start, size_t length,
unsigned long dst_addr,
phys_addr_t *phys_dst_addr)
{
struct trans_pgd_info trans_info = {
.trans_alloc_page = hibernate_page_alloc,
.trans_alloc_arg = (__force void *)GFP_ATOMIC,
};
void *page = (void *)get_safe_page(GFP_ATOMIC);
pgd_t *trans_pgd;
phys_addr_t trans_ttbr0;
unsigned long t0sz;
int rc;
if (!page)
@ -251,13 +211,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
memcpy(page, src_start, length);
__flush_icache_range((unsigned long)page, (unsigned long)page + length);
trans_pgd = (void *)get_safe_page(GFP_ATOMIC);
if (!trans_pgd)
return -ENOMEM;
rc = trans_pgd_map_page(trans_pgd, page, dst_addr,
PAGE_KERNEL_EXEC);
rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
if (rc)
return rc;
@ -270,12 +224,15 @@ static int create_safe_exec_page(void *src_start, size_t length,
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
* runtime services), while for a userspace-driven test_resume cycle it
* points to userspace page tables (and we must point it at a zero page
* ourselves). Elsewhere we only (un)install the idmap with preemption
* disabled, so T0SZ should be as required regardless.
* ourselves).
*
* We change T0SZ as part of installing the idmap. This is undone by
* cpu_uninstall_idmap() in __cpu_suspend_exit().
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1);
__cpu_set_tcr_t0sz(t0sz);
write_sysreg(trans_ttbr0, ttbr0_el1);
isb();
*phys_dst_addr = virt_to_phys(page);
@ -462,182 +419,6 @@ int swsusp_arch_suspend(void)
return ret;
}
static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
pte_t pte = READ_ONCE(*src_ptep);
if (pte_valid(pte)) {
/*
* Resume will overwrite areas that may be marked
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
set_pte(dst_ptep, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
* the page isn't in use by the resume kernel. It may have
* been in use by the original kernel, in which case we need
* to put it back in our copy to do the restore.
*
* Before marking this entry valid, check the pfn should
* be mapped.
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
}
}
static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
unsigned long end)
{
pte_t *src_ptep;
pte_t *dst_ptep;
unsigned long addr = start;
dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
if (!dst_ptep)
return -ENOMEM;
pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
dst_ptep = pte_offset_kernel(dst_pmdp, start);
src_ptep = pte_offset_kernel(src_pmdp, start);
do {
_copy_pte(dst_ptep, src_ptep, addr);
} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
return 0;
}
static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
unsigned long end)
{
pmd_t *src_pmdp;
pmd_t *dst_pmdp;
unsigned long next;
unsigned long addr = start;
if (pud_none(READ_ONCE(*dst_pudp))) {
dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pmdp)
return -ENOMEM;
pud_populate(&init_mm, dst_pudp, dst_pmdp);
}
dst_pmdp = pmd_offset(dst_pudp, start);
src_pmdp = pmd_offset(src_pudp, start);
do {
pmd_t pmd = READ_ONCE(*src_pmdp);
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
continue;
if (pmd_table(pmd)) {
if (copy_pte(dst_pmdp, src_pmdp, addr, next))
return -ENOMEM;
} else {
set_pmd(dst_pmdp,
__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
}
} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
return 0;
}
static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
unsigned long end)
{
pud_t *dst_pudp;
pud_t *src_pudp;
unsigned long next;
unsigned long addr = start;
if (p4d_none(READ_ONCE(*dst_p4dp))) {
dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pudp)
return -ENOMEM;
p4d_populate(&init_mm, dst_p4dp, dst_pudp);
}
dst_pudp = pud_offset(dst_p4dp, start);
src_pudp = pud_offset(src_p4dp, start);
do {
pud_t pud = READ_ONCE(*src_pudp);
next = pud_addr_end(addr, end);
if (pud_none(pud))
continue;
if (pud_table(pud)) {
if (copy_pmd(dst_pudp, src_pudp, addr, next))
return -ENOMEM;
} else {
set_pud(dst_pudp,
__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
}
} while (dst_pudp++, src_pudp++, addr = next, addr != end);
return 0;
}
static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
unsigned long end)
{
p4d_t *dst_p4dp;
p4d_t *src_p4dp;
unsigned long next;
unsigned long addr = start;
dst_p4dp = p4d_offset(dst_pgdp, start);
src_p4dp = p4d_offset(src_pgdp, start);
do {
next = p4d_addr_end(addr, end);
if (p4d_none(READ_ONCE(*src_p4dp)))
continue;
if (copy_pud(dst_p4dp, src_p4dp, addr, next))
return -ENOMEM;
} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
return 0;
}
static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
unsigned long end)
{
unsigned long next;
unsigned long addr = start;
pgd_t *src_pgdp = pgd_offset_k(start);
dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
if (pgd_none(READ_ONCE(*src_pgdp)))
continue;
if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
return 0;
}
static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
unsigned long end)
{
int rc;
pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!trans_pgd) {
pr_err("Failed to allocate memory for temporary page tables.\n");
return -ENOMEM;
}
rc = copy_page_tables(trans_pgd, start, end);
if (!rc)
*dst_pgdp = trans_pgd;
return rc;
}
/*
* Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
*
@ -650,16 +431,20 @@ int swsusp_arch_resume(void)
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;
phys_addr_t phys_hibernate_exit;
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
.trans_alloc_page = hibernate_page_alloc,
.trans_alloc_arg = (void *)GFP_ATOMIC,
};
/*
* Restoring the memory image will overwrite the ttbr1 page tables.
* Create a second copy of just the linear map, and use this when
* restoring.
*/
rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END);
rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET,
PAGE_END);
if (rc)
return rc;
@ -673,19 +458,13 @@ int swsusp_arch_resume(void)
return -ENOMEM;
}
/*
* Locate the exit code in the bottom-but-one page, so that *NULL
* still has disastrous affects.
*/
hibernate_exit = (void *)PAGE_SIZE;
exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
/*
* Copy swsusp_arch_suspend_exit() to a safe page. This will generate
* a new set of ttbr0 page tables and load them.
*/
rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
(unsigned long)hibernate_exit,
&phys_hibernate_exit);
(phys_addr_t *)&hibernate_exit);
if (rc) {
pr_err("Failed to create safe executable page for hibernate_exit code.\n");
return rc;
@ -704,7 +483,7 @@ int swsusp_arch_resume(void)
* We can skip this step if we booted at EL1, or are running with VHE.
*/
if (el2_reset_needed()) {
phys_addr_t el2_vectors = phys_hibernate_exit; /* base */
phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit;
el2_vectors += hibernate_el2_vectors -
__hibernate_exit_text_start; /* offset */

View File

@ -8,9 +8,9 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
#include <asm/el2_setup.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/ptrace.h>
@ -47,10 +47,13 @@ SYM_CODE_END(__hyp_stub_vectors)
SYM_CODE_START_LOCAL(el1_sync)
cmp x0, #HVC_SET_VECTORS
b.ne 2f
b.ne 1f
msr vbar_el2, x1
b 9f
1: cmp x0, #HVC_VHE_RESTART
b.eq mutate_to_vhe
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2
@ -70,6 +73,88 @@ SYM_CODE_START_LOCAL(el1_sync)
eret
SYM_CODE_END(el1_sync)
// nVHE? No way! Give me the real thing!
SYM_CODE_START_LOCAL(mutate_to_vhe)
// Be prepared to fail
mov_q x0, HVC_STUB_ERR
// Sanity check: MMU *must* be off
mrs x1, sctlr_el2
tbnz x1, #0, 1f
// Needs to be VHE capable, obviously
mrs x1, id_aa64mmfr1_el1
ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
cbz x1, 1f
// Check whether VHE is disabled from the command line
adr_l x1, id_aa64mmfr1_override
ldr x2, [x1, FTR_OVR_VAL_OFFSET]
ldr x1, [x1, FTR_OVR_MASK_OFFSET]
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
cmp x1, xzr
and x2, x2, x1
csinv x2, x2, xzr, ne
cbz x2, 1f
// Engage the VHE magic!
mov_q x0, HCR_HOST_VHE_FLAGS
msr hcr_el2, x0
isb
// Use the EL1 allocated stack, per-cpu offset
mrs x0, sp_el1
mov sp, x0
mrs x0, tpidr_el1
msr tpidr_el2, x0
// FP configuration, vectors
mrs_s x0, SYS_CPACR_EL12
msr cpacr_el1, x0
mrs_s x0, SYS_VBAR_EL12
msr vbar_el1, x0
// Use EL2 translations for SPE and disable access from EL1
mrs x0, mdcr_el2
bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
msr mdcr_el2, x0
// Transfer the MM state from EL1 to EL2
mrs_s x0, SYS_TCR_EL12
msr tcr_el1, x0
mrs_s x0, SYS_TTBR0_EL12
msr ttbr0_el1, x0
mrs_s x0, SYS_TTBR1_EL12
msr ttbr1_el1, x0
mrs_s x0, SYS_MAIR_EL12
msr mair_el1, x0
isb
// Invalidate TLBs before enabling the MMU
tlbi vmalle1
dsb nsh
// Enable the EL2 S1 MMU, as set up from EL1
mrs_s x0, SYS_SCTLR_EL12
set_sctlr_el1 x0
// Disable the EL1 S1 MMU for a good measure
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr_s SYS_SCTLR_EL12, x0
// Hack the exception return to stay at EL2
mrs x0, spsr_el1
and x0, x0, #~PSR_MODE_MASK
mov x1, #PSR_MODE_EL2h
orr x0, x0, x1
msr spsr_el1, x0
mov x0, xzr
1: eret
SYM_CODE_END(mutate_to_vhe)
.macro invalid_vector label
SYM_CODE_START_LOCAL(\label)
b \label
@ -85,6 +170,8 @@ SYM_CODE_END(\label)
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid
.popsection
/*
* __hyp_set_vectors: Call this after boot to set the initial hypervisor
* vectors as part of hypervisor installation. On an SMP system, this should
@ -118,3 +205,27 @@ SYM_FUNC_START(__hyp_reset_vectors)
hvc #0
ret
SYM_FUNC_END(__hyp_reset_vectors)
/*
* Entry point to switch to VHE if deemed capable
*/
SYM_FUNC_START(switch_to_vhe)
#ifdef CONFIG_ARM64_VHE
// Need to have booted at EL2
adr_l x1, __boot_cpu_mode
ldr w0, [x1]
cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f
// and still be at EL1
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL1
b.ne 1f
// Turn the world upside down
mov x0, #HVC_VHE_RESTART
hvc #0
1:
#endif
ret
SYM_FUNC_END(switch_to_vhe)

View File

@ -0,0 +1,216 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Early cpufeature override framework
*
* Copyright (C) 2020 Google LLC
* Author: Marc Zyngier <maz@kernel.org>
*/
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/libfdt.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/setup.h>
#define FTR_DESC_NAME_LEN 20
#define FTR_DESC_FIELD_LEN 10
#define FTR_ALIAS_NAME_LEN 30
#define FTR_ALIAS_OPTION_LEN 80
struct ftr_set_desc {
char name[FTR_DESC_NAME_LEN];
struct arm64_ftr_override *override;
struct {
char name[FTR_DESC_FIELD_LEN];
u8 shift;
} fields[];
};
static const struct ftr_set_desc mmfr1 __initconst = {
.name = "id_aa64mmfr1",
.override = &id_aa64mmfr1_override,
.fields = {
{ "vh", ID_AA64MMFR1_VHE_SHIFT },
{}
},
};
static const struct ftr_set_desc pfr1 __initconst = {
.name = "id_aa64pfr1",
.override = &id_aa64pfr1_override,
.fields = {
{ "bt", ID_AA64PFR1_BT_SHIFT },
{}
},
};
static const struct ftr_set_desc isar1 __initconst = {
.name = "id_aa64isar1",
.override = &id_aa64isar1_override,
.fields = {
{ "gpi", ID_AA64ISAR1_GPI_SHIFT },
{ "gpa", ID_AA64ISAR1_GPA_SHIFT },
{ "api", ID_AA64ISAR1_API_SHIFT },
{ "apa", ID_AA64ISAR1_APA_SHIFT },
{}
},
};
extern struct arm64_ftr_override kaslr_feature_override;
static const struct ftr_set_desc kaslr __initconst = {
.name = "kaslr",
#ifdef CONFIG_RANDOMIZE_BASE
.override = &kaslr_feature_override,
#endif
.fields = {
{ "disabled", 0 },
{}
},
};
static const struct ftr_set_desc * const regs[] __initconst = {
&mmfr1,
&pfr1,
&isar1,
&kaslr,
};
static const struct {
char alias[FTR_ALIAS_NAME_LEN];
char feature[FTR_ALIAS_OPTION_LEN];
} aliases[] __initconst = {
{ "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
{ "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
{ "arm64.nopauth",
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
"id_aa64isar1.api=0 id_aa64isar1.apa=0" },
{ "nokaslr", "kaslr.disabled=1" },
};
static int __init find_field(const char *cmdline,
const struct ftr_set_desc *reg, int f, u64 *v)
{
char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
int len;
len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=",
reg->name, reg->fields[f].name);
if (!parameqn(cmdline, opt, len))
return -1;
return kstrtou64(cmdline + len, 0, v);
}
static void __init match_options(const char *cmdline)
{
int i;
for (i = 0; i < ARRAY_SIZE(regs); i++) {
int f;
if (!regs[i]->override)
continue;
for (f = 0; strlen(regs[i]->fields[f].name); f++) {
u64 shift = regs[i]->fields[f].shift;
u64 mask = 0xfUL << shift;
u64 v;
if (find_field(cmdline, regs[i], f, &v))
continue;
regs[i]->override->val &= ~mask;
regs[i]->override->val |= (v << shift) & mask;
regs[i]->override->mask |= mask;
return;
}
}
}
static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
{
do {
char buf[256];
size_t len;
int i;
cmdline = skip_spaces(cmdline);
for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++);
if (!len)
return;
len = min(len, ARRAY_SIZE(buf) - 1);
strncpy(buf, cmdline, len);
buf[len] = 0;
if (strcmp(buf, "--") == 0)
return;
cmdline += len;
match_options(buf);
for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
if (parameq(buf, aliases[i].alias))
__parse_cmdline(aliases[i].feature, false);
} while (1);
}
static __init void parse_cmdline(void)
{
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
const u8 *prop;
void *fdt;
int node;
fdt = get_early_fdt_ptr();
if (!fdt)
goto out;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
goto out;
prop = fdt_getprop(fdt, node, "bootargs", NULL);
if (!prop)
goto out;
__parse_cmdline(prop, true);
if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND))
return;
}
out:
__parse_cmdline(CONFIG_CMDLINE, true);
}
/* Keep checkers quiet */
void init_feature_override(void);
asmlinkage void __init init_feature_override(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(regs); i++) {
if (regs[i]->override) {
regs[i]->override->val = 0;
regs[i]->override->mask = 0;
}
}
parse_cmdline();
for (i = 0; i < ARRAY_SIZE(regs); i++) {
if (regs[i]->override)
__flush_dcache_area(regs[i]->override,
sizeof(*regs[i]->override));
}
}

View File

@ -19,6 +19,7 @@
#include <asm/memory.h>
#include <asm/mmu.h>
#include <asm/sections.h>
#include <asm/setup.h>
enum kaslr_status {
KASLR_ENABLED,
@ -50,39 +51,7 @@ static __init u64 get_kaslr_seed(void *fdt)
return ret;
}
static __init bool cmdline_contains_nokaslr(const u8 *cmdline)
{
const u8 *str;
str = strstr(cmdline, "nokaslr");
return str == cmdline || (str > cmdline && *(str - 1) == ' ');
}
static __init bool is_kaslr_disabled_cmdline(void *fdt)
{
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
int node;
const u8 *prop;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
goto out;
prop = fdt_getprop(fdt, node, "bootargs", NULL);
if (!prop)
goto out;
if (cmdline_contains_nokaslr(prop))
return true;
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
goto out;
return false;
}
out:
return cmdline_contains_nokaslr(CONFIG_CMDLINE);
}
struct arm64_ftr_override kaslr_feature_override __initdata;
/*
* This routine will be executed with the kernel mapped at its default virtual
@ -92,12 +61,11 @@ static __init bool is_kaslr_disabled_cmdline(void *fdt)
* containing function pointers) to be reinitialized, and zero-initialized
* .bss variables will be reset to 0.
*/
u64 __init kaslr_early_init(u64 dt_phys)
u64 __init kaslr_early_init(void)
{
void *fdt;
u64 seed, offset, mask, module_range;
unsigned long raw;
int size;
/*
* Set a reasonable default for module_alloc_base in case
@ -111,8 +79,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
* and proceed with KASLR disabled. We will make another
* attempt at mapping the FDT in setup_machine()
*/
early_fixmap_init();
fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
fdt = get_early_fdt_ptr();
if (!fdt) {
kaslr_status = KASLR_DISABLED_FDT_REMAP;
return 0;
@ -127,7 +94,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
* Check if 'nokaslr' appears on the command line, and
* return 0 if that is the case.
*/
if (is_kaslr_disabled_cmdline(fdt)) {
if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
kaslr_status = KASLR_DISABLED_CMDLINE;
return 0;
}

View File

@ -42,6 +42,7 @@ static void _kexec_image_info(const char *func, int line,
pr_debug(" start: %lx\n", kimage->start);
pr_debug(" head: %lx\n", kimage->head);
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
@ -58,6 +59,23 @@ void machine_kexec_cleanup(struct kimage *kimage)
/* Empty routine needed to avoid build errors. */
}
int machine_kexec_post_load(struct kimage *kimage)
{
void *reloc_code = page_to_virt(kimage->control_code_page);
memcpy(reloc_code, arm64_relocate_new_kernel,
arm64_relocate_new_kernel_size);
kimage->arch.kern_reloc = __pa(reloc_code);
kexec_image_info(kimage);
/* Flush the reloc_code in preparation for its execution. */
__flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
arm64_relocate_new_kernel_size);
return 0;
}
/**
* machine_kexec_prepare - Prepare for a kexec reboot.
*
@ -67,8 +85,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
*/
int machine_kexec_prepare(struct kimage *kimage)
{
kexec_image_info(kimage);
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
return -EBUSY;
@ -143,8 +159,6 @@ static void kexec_segment_flush(const struct kimage *kimage)
*/
void machine_kexec(struct kimage *kimage)
{
phys_addr_t reboot_code_buffer_phys;
void *reboot_code_buffer;
bool in_kexec_crash = (kimage == kexec_crash_image);
bool stuck_cpus = cpus_are_stuck_in_kernel();
@ -155,31 +169,6 @@ void machine_kexec(struct kimage *kimage)
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
"Some CPUs may be stale, kdump will be unreliable.\n");
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
kexec_image_info(kimage);
/*
* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
* after the kernel is shut down.
*/
memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
arm64_relocate_new_kernel_size);
/* Flush the reboot_code_buffer in preparation for its execution. */
__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
/*
* Although we've killed off the secondary CPUs, we don't update
* the online mask if we're handling a crash kernel and consequently
* need to avoid flush_icache_range(), which will attempt to IPI
* the offline CPUs. Therefore, we must use the __* variant here.
*/
__flush_icache_range((uintptr_t)reboot_code_buffer,
(uintptr_t)reboot_code_buffer +
arm64_relocate_new_kernel_size);
/* Flush the kimage list and its buffers. */
kexec_list_flush(kimage);
@ -193,7 +182,7 @@ void machine_kexec(struct kimage *kimage)
/*
* cpu_soft_restart will shutdown the MMU, disable data caches, then
* transfer control to the reboot_code_buffer which contains a copy of
* transfer control to the kern_reloc which contains a copy of
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
* uses physical addressing to relocate the new image to its final
* position and transfers control to the image entry point when the
@ -203,12 +192,8 @@ void machine_kexec(struct kimage *kimage)
* userspace (kexec-tools).
* In kexec_file case, the kernel starts directly without purgatory.
*/
cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start,
#ifdef CONFIG_KEXEC_FILE
kimage->arch.dtb_mem);
#else
0);
#endif
cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start,
kimage->arch.dtb_mem);
BUG(); /* Should never get here. */
}

View File

@ -131,7 +131,7 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
}
#endif
#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b))
static int cmp_rela(const void *a, const void *b)
{

View File

@ -280,7 +280,7 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj,
return 0;
}
static struct attribute_group armv8_pmuv3_events_attr_group = {
static const struct attribute_group armv8_pmuv3_events_attr_group = {
.name = "events",
.attrs = armv8_pmuv3_event_attrs,
.is_visible = armv8pmu_event_attr_is_visible,
@ -300,7 +300,7 @@ static struct attribute *armv8_pmuv3_format_attrs[] = {
NULL,
};
static struct attribute_group armv8_pmuv3_format_attr_group = {
static const struct attribute_group armv8_pmuv3_format_attr_group = {
.name = "format",
.attrs = armv8_pmuv3_format_attrs,
};
@ -322,7 +322,7 @@ static struct attribute *armv8_pmuv3_caps_attrs[] = {
NULL,
};
static struct attribute_group armv8_pmuv3_caps_attr_group = {
static const struct attribute_group armv8_pmuv3_caps_attr_group = {
.name = "caps",
.attrs = armv8_pmuv3_caps_attrs,
};
@ -810,7 +810,7 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
{
int idx;
for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) {
for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
if (!test_and_set_bit(idx, cpuc->used_mask))
return idx;
}
@ -1188,6 +1188,12 @@ static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu)
armv8_pmuv3_map_event);
}
static int armv8_a78_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a78",
armv8_pmuv3_map_event);
}
static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1",
@ -1225,6 +1231,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init},
{.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init},
{.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init},
{.compatible = "arm,cortex-a78-pmu", .data = armv8_a78_pmu_init},
{.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init},
{.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},

View File

@ -301,7 +301,7 @@ void __show_regs(struct pt_regs *regs)
}
}
void show_regs(struct pt_regs * regs)
void show_regs(struct pt_regs *regs)
{
__show_regs(regs);
dump_backtrace(regs, NULL, KERN_DEFAULT);
@ -592,7 +592,7 @@ unsigned long get_wchan(struct task_struct *p)
ret = frame.pc;
goto out;
}
} while (count ++ < 16);
} while (count++ < 16);
out:
put_task_stack(p);

View File

@ -194,6 +194,7 @@ static void ptrace_hbptriggered(struct perf_event *bp,
}
arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger,
desc);
return;
}
#endif
arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);

View File

@ -17,28 +17,24 @@
/*
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
*
* The memory that the old kernel occupies may be overwritten when coping the
* The memory that the old kernel occupies may be overwritten when copying the
* new image to its final location. To assure that the
* arm64_relocate_new_kernel routine which does that copy is not overwritten,
* all code and data needed by arm64_relocate_new_kernel must be between the
* symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
* machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
* control_code_page, a special page which has been set up to be preserved
* during the copy operation.
* safe memory that has been set up to be preserved during the copy operation.
*/
SYM_CODE_START(arm64_relocate_new_kernel)
/* Setup the list loop variables. */
mov x18, x2 /* x18 = dtb address */
mov x17, x1 /* x17 = kimage_start */
mov x16, x0 /* x16 = kimage_head */
raw_dcache_line_size x15, x0 /* x15 = dcache line size */
mov x14, xzr /* x14 = entry ptr */
mov x13, xzr /* x13 = copy dest */
/* Check if the new image needs relocation. */
tbnz x16, IND_DONE_BIT, .Ldone
raw_dcache_line_size x15, x1 /* x15 = dcache line size */
.Lloop:
and x12, x16, PAGE_MASK /* x12 = addr */
@ -47,44 +43,28 @@ SYM_CODE_START(arm64_relocate_new_kernel)
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
/* Invalidate dest page to PoC. */
mov x0, x13
add x20, x0, #PAGE_SIZE
mov x2, x13
add x20, x2, #PAGE_SIZE
sub x1, x15, #1
bic x0, x0, x1
2: dc ivac, x0
add x0, x0, x15
cmp x0, x20
bic x2, x2, x1
2: dc ivac, x2
add x2, x2, x15
cmp x2, x20
b.lo 2b
dsb sy
mov x20, x13
mov x21, x12
copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
/* dest += PAGE_SIZE */
add x13, x13, PAGE_SIZE
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
b .Lnext
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
/* ptr = addr */
mov x14, x12
mov x14, x12 /* ptr = addr */
b .Lnext
.Ltest_destination:
tbz x16, IND_DESTINATION_BIT, .Lnext
/* dest = addr */
mov x13, x12
mov x13, x12 /* dest = addr */
.Lnext:
/* entry = *ptr++ */
ldr x16, [x14], #8
/* while (!(entry & DONE)) */
tbz x16, IND_DONE_BIT, .Lloop
ldr x16, [x14], #8 /* entry = *ptr++ */
tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */
.Ldone:
/* wait for writes from copy_page to finish */
dsb nsh

View File

@ -168,6 +168,21 @@ static void __init smp_build_mpidr_hash(void)
pr_warn("Large number of MPIDR hash buckets detected\n");
}
static void *early_fdt_ptr __initdata;
void __init *get_early_fdt_ptr(void)
{
return early_fdt_ptr;
}
asmlinkage void __init early_fdt_map(u64 dt_phys)
{
int fdt_size;
early_fixmap_init();
early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
}
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;

View File

@ -100,6 +100,7 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
bl init_kernel_el
bl switch_to_vhe
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir

View File

@ -44,6 +44,10 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
unsigned long fp = frame->fp;
struct stack_info info;
/* Terminal record; nothing to unwind */
if (!fp)
return -EINVAL;
if (fp & 0xf)
return -EINVAL;
@ -104,15 +108,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
frame->pc = ptrauth_strip_insn_pac(frame->pc);
/*
* Frames created upon entry from EL0 have NULL FP and PC values, so
* don't bother reporting these. Frames created by __noreturn functions
* might have a valid FP even if PC is bogus, so only terminate where
* both are NULL.
*/
if (!frame->fp && !frame->pc)
return -EINVAL;
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);

View File

@ -65,35 +65,6 @@ static inline bool has_syscall_work(unsigned long flags)
int syscall_trace_enter(struct pt_regs *regs);
void syscall_trace_exit(struct pt_regs *regs);
#ifdef CONFIG_ARM64_ERRATUM_1463225
DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
static void cortex_a76_erratum_1463225_svc_handler(void)
{
u32 reg, val;
if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
return;
if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225)))
return;
__this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
reg = read_sysreg(mdscr_el1);
val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
write_sysreg(val, mdscr_el1);
asm volatile("msr daifclr, #8");
isb();
/* We will have taken a single-step exception by this point */
write_sysreg(reg, mdscr_el1);
__this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
}
#else
static void cortex_a76_erratum_1463225_svc_handler(void) { }
#endif /* CONFIG_ARM64_ERRATUM_1463225 */
static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
const syscall_fn_t syscall_table[])
{
@ -120,7 +91,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* (Similarly for HVC and SMC elsewhere.)
*/
cortex_a76_erratum_1463225_svc_handler();
local_daif_restore(DAIF_PROCCTX);
if (flags & _TIF_MTE_ASYNC_FAULT) {

View File

@ -199,76 +199,38 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
return 0;
}
static inline bool
enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
if (!policy) {
pr_debug("CPU%d: No cpufreq policy found.\n", cpu);
return false;
}
if (cpumask_subset(policy->related_cpus, valid_cpus))
cpumask_or(amu_fie_cpus, policy->related_cpus,
amu_fie_cpus);
cpufreq_cpu_put(policy);
return true;
}
static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
static int __init init_amu_fie(void)
static void amu_fie_setup(const struct cpumask *cpus)
{
bool invariance_status = topology_scale_freq_invariant();
cpumask_var_t valid_cpus;
bool have_policy = false;
int ret = 0;
bool invariant;
int cpu;
if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL))
return -ENOMEM;
/* We are already set since the last insmod of cpufreq driver */
if (unlikely(cpumask_subset(cpus, amu_fie_cpus)))
return;
if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
ret = -ENOMEM;
goto free_valid_mask;
}
for_each_present_cpu(cpu) {
for_each_cpu(cpu, cpus) {
if (!freq_counters_valid(cpu) ||
freq_inv_set_max_ratio(cpu,
cpufreq_get_hw_max_freq(cpu) * 1000,
arch_timer_get_rate()))
continue;
cpumask_set_cpu(cpu, valid_cpus);
have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
return;
}
/*
* If we are not restricted by cpufreq policies, we only enable
* the use of the AMU feature for FIE if all CPUs support AMU.
* Otherwise, enable_policy_freq_counters has already enabled
* policy cpus.
*/
if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask))
cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus);
cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
if (!cpumask_empty(amu_fie_cpus)) {
pr_info("CPUs[%*pbl]: counters will be used for FIE.",
cpumask_pr_args(amu_fie_cpus));
static_branch_enable(&amu_fie_key);
}
invariant = topology_scale_freq_invariant();
/*
* If the system is not fully invariant after AMU init, disable
* partial use of counters for frequency invariance.
*/
if (!topology_scale_freq_invariant())
static_branch_disable(&amu_fie_key);
/* We aren't fully invariant yet */
if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask))
return;
static_branch_enable(&amu_fie_key);
pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
cpumask_pr_args(cpus));
/*
* Task scheduler behavior depends on frequency invariance support,
@ -276,15 +238,50 @@ static int __init init_amu_fie(void)
* a result of counter initialisation and use, retrigger the build of
* scheduling domains to ensure the information is propagated properly.
*/
if (invariance_status != topology_scale_freq_invariant())
if (!invariant)
rebuild_sched_domains_energy();
}
free_valid_mask:
free_cpumask_var(valid_cpus);
static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_policy *policy = data;
if (val == CPUFREQ_CREATE_POLICY)
amu_fie_setup(policy->related_cpus);
/*
* We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU
* counters don't have any dependency on cpufreq driver once we have
* initialized AMU support and enabled invariance. The AMU counters will
* keep on working just fine in the absence of the cpufreq driver, and
* for the CPUs for which there are no counters available, the last set
* value of freq_scale will remain valid as that is the frequency those
* CPUs are running at.
*/
return 0;
}
static struct notifier_block init_amu_fie_notifier = {
.notifier_call = init_amu_fie_callback,
};
static int __init init_amu_fie(void)
{
int ret;
if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL))
return -ENOMEM;
ret = cpufreq_register_notifier(&init_amu_fie_notifier,
CPUFREQ_POLICY_NOTIFIER);
if (ret)
free_cpumask_var(amu_fie_cpus);
return ret;
}
late_initcall_sync(init_amu_fie);
core_initcall(init_amu_fie);
bool arch_freq_counters_available(const struct cpumask *cpus)
{

View File

@ -45,7 +45,7 @@
#include <asm/system_misc.h>
#include <asm/sysreg.h>
static const char *handler[]= {
static const char *handler[] = {
"Synchronous Abort",
"IRQ",
"FIQ",

View File

@ -44,7 +44,6 @@ endif
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
obj-y += vdso.o
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)

View File

@ -13,4 +13,4 @@
LC_ALL=C
sed -n -e 's/^00*/0/' -e \
's/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p'
's/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2 0x\1/p'

View File

@ -155,7 +155,6 @@ c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
obj-y += vdso.o
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)

View File

@ -316,3 +316,11 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned")
ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET,
"RESERVED_SWAPPER_OFFSET is wrong!")
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
"TRAMP_SWAPPER_OFFSET is wrong!")
#endif

View File

@ -1967,6 +1967,9 @@ static int __init early_kvm_mode_cfg(char *arg)
return 0;
}
if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode()))
return 0;
return -EINVAL;
}
early_param("kvm-arm.mode", early_kvm_mode_cfg);

View File

@ -191,7 +191,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
2: msr SPsel, #1 // We want to use SP_EL{1,2}
/* Initialize EL2 CPU state to sane values. */
init_el2_state nvhe // Clobbers x0..x2
init_el2_state // Clobbers x0..x2
/* Enable MMU, set vectors and stack. */
mov x0, x28

View File

@ -6,6 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_ARM64_MTE) += mteswap.o

View File

@ -564,7 +564,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
mmap_read_lock(mm);
} else {
/*
* The above down_read_trylock() might have succeeded in which
* The above mmap_read_trylock() might have succeeded in which
* case, we'll have missed the might_sleep() from down_read().
*/
might_sleep();
@ -875,44 +875,12 @@ static void debug_exception_exit(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(debug_exception_exit);
#ifdef CONFIG_ARM64_ERRATUM_1463225
DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{
if (user_mode(regs))
return 0;
if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa))
return 0;
/*
* We've taken a dummy step exception from the kernel to ensure
* that interrupts are re-enabled on the syscall path. Return back
* to cortex_a76_erratum_1463225_svc_handler() with debug exceptions
* masked so that we can safely restore the mdscr and get on with
* handling the syscall.
*/
regs->pstate |= PSR_D_BIT;
return 1;
}
#else
static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{
return 0;
}
#endif /* CONFIG_ARM64_ERRATUM_1463225 */
NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_debug_fault_info(esr);
unsigned long pc = instruction_pointer(regs);
if (cortex_a76_erratum_1463225_debug_handler(regs))
return;
debug_exception_enter(regs);
if (user_mode(regs) && !is_ttbr0_addr(pc))

View File

@ -5,20 +5,11 @@
* Copyright (C) 2012 ARM Ltd.
*/
#include <linux/elf.h>
#include <linux/fs.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/export.h>
#include <linux/shm.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/io.h>
#include <linux/personality.h>
#include <linux/random.h>
#include <linux/memblock.h>
#include <linux/types.h>
#include <asm/cputype.h>
#include <asm/page.h>
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go

View File

@ -628,7 +628,7 @@ static bool arm64_early_this_cpu_has_bti(void)
if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
return false;
pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1);
return cpuid_feature_extract_unsigned_field(pfr1,
ID_AA64PFR1_BT_SHIFT);
}
@ -1094,6 +1094,7 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
return vmemmap_populate_basepages(start, end, node, altmap);
}
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
@ -1107,6 +1108,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
pud_t *pudp;
pmd_t *pmdp;
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
do {
next = pmd_addr_end(addr, end);

View File

@ -291,17 +291,7 @@ skip_pgd:
/* We're done: fire up the MMU again */
mrs x17, sctlr_el1
orr x17, x17, #SCTLR_ELx_M
msr sctlr_el1, x17
isb
/*
* Invalidate the local I-cache so that any instructions fetched
* speculatively from the PoC are discarded, since they may have
* been dynamically patched at the PoU.
*/
ic iallu
dsb nsh
isb
set_sctlr_el1 x17
/* Set the flag to zero to indicate that we're all done */
str wzr, [flag_ptr]
@ -464,8 +454,8 @@ SYM_FUNC_START(__cpu_setup)
#endif
msr mair_el1, x5
/*
* Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
* both user and kernel.
* Set/prepare TCR and TTBR. TCR_EL1.T1SZ gets further
* adjusted if the kernel is compiled with 52bit VA support.
*/
mov_q x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \

View File

@ -324,6 +324,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
st = (struct pg_state){
.seq = s,
.marker = info->markers,
.level = -1,
.ptdump = {
.note_page = note_page,
.range = (struct ptdump_range[]){

324
arch/arm64/mm/trans_pgd.c Normal file
View File

@ -0,0 +1,324 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Transitional page tables for kexec and hibernate
*
* This file derived from: arch/arm64/kernel/hibernate.c
*
* Copyright (c) 2020, Microsoft Corporation.
* Pavel Tatashin <pasha.tatashin@soleen.com>
*
*/
/*
* Transitional tables are used during system transferring from one world to
* another: such as during hibernate restore, and kexec reboots. During these
* phases one cannot rely on page table not being overwritten. This is because
* hibernate and kexec can overwrite the current page tables during transition.
*/
#include <asm/trans_pgd.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <linux/suspend.h>
#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
static void *trans_alloc(struct trans_pgd_info *info)
{
return info->trans_alloc_page(info->trans_alloc_arg);
}
static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
pte_t pte = READ_ONCE(*src_ptep);
if (pte_valid(pte)) {
/*
* Resume will overwrite areas that may be marked
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
set_pte(dst_ptep, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
* the page isn't in use by the resume kernel. It may have
* been in use by the original kernel, in which case we need
* to put it back in our copy to do the restore.
*
* Before marking this entry valid, check the pfn should
* be mapped.
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
}
}
static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp,
pmd_t *src_pmdp, unsigned long start, unsigned long end)
{
pte_t *src_ptep;
pte_t *dst_ptep;
unsigned long addr = start;
dst_ptep = trans_alloc(info);
if (!dst_ptep)
return -ENOMEM;
pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
dst_ptep = pte_offset_kernel(dst_pmdp, start);
src_ptep = pte_offset_kernel(src_pmdp, start);
do {
_copy_pte(dst_ptep, src_ptep, addr);
} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
return 0;
}
static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp,
pud_t *src_pudp, unsigned long start, unsigned long end)
{
pmd_t *src_pmdp;
pmd_t *dst_pmdp;
unsigned long next;
unsigned long addr = start;
if (pud_none(READ_ONCE(*dst_pudp))) {
dst_pmdp = trans_alloc(info);
if (!dst_pmdp)
return -ENOMEM;
pud_populate(NULL, dst_pudp, dst_pmdp);
}
dst_pmdp = pmd_offset(dst_pudp, start);
src_pmdp = pmd_offset(src_pudp, start);
do {
pmd_t pmd = READ_ONCE(*src_pmdp);
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
continue;
if (pmd_table(pmd)) {
if (copy_pte(info, dst_pmdp, src_pmdp, addr, next))
return -ENOMEM;
} else {
set_pmd(dst_pmdp,
__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
}
} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
return 0;
}
static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp,
p4d_t *src_p4dp, unsigned long start,
unsigned long end)
{
pud_t *dst_pudp;
pud_t *src_pudp;
unsigned long next;
unsigned long addr = start;
if (p4d_none(READ_ONCE(*dst_p4dp))) {
dst_pudp = trans_alloc(info);
if (!dst_pudp)
return -ENOMEM;
p4d_populate(NULL, dst_p4dp, dst_pudp);
}
dst_pudp = pud_offset(dst_p4dp, start);
src_pudp = pud_offset(src_p4dp, start);
do {
pud_t pud = READ_ONCE(*src_pudp);
next = pud_addr_end(addr, end);
if (pud_none(pud))
continue;
if (pud_table(pud)) {
if (copy_pmd(info, dst_pudp, src_pudp, addr, next))
return -ENOMEM;
} else {
set_pud(dst_pudp,
__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
}
} while (dst_pudp++, src_pudp++, addr = next, addr != end);
return 0;
}
static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp,
pgd_t *src_pgdp, unsigned long start,
unsigned long end)
{
p4d_t *dst_p4dp;
p4d_t *src_p4dp;
unsigned long next;
unsigned long addr = start;
dst_p4dp = p4d_offset(dst_pgdp, start);
src_p4dp = p4d_offset(src_pgdp, start);
do {
next = p4d_addr_end(addr, end);
if (p4d_none(READ_ONCE(*src_p4dp)))
continue;
if (copy_pud(info, dst_p4dp, src_p4dp, addr, next))
return -ENOMEM;
} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
return 0;
}
static int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp,
unsigned long start, unsigned long end)
{
unsigned long next;
unsigned long addr = start;
pgd_t *src_pgdp = pgd_offset_k(start);
dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
if (pgd_none(READ_ONCE(*src_pgdp)))
continue;
if (copy_p4d(info, dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
return 0;
}
/*
* Create trans_pgd and copy linear map.
* info: contains allocator and its argument
* dst_pgdp: new page table that is created, and to which map is copied.
* start: Start of the interval (inclusive).
* end: End of the interval (exclusive).
*
* Returns 0 on success, and -ENOMEM on failure.
*/
int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp,
unsigned long start, unsigned long end)
{
int rc;
pgd_t *trans_pgd = trans_alloc(info);
if (!trans_pgd) {
pr_err("Failed to allocate memory for temporary page tables.\n");
return -ENOMEM;
}
rc = copy_page_tables(info, trans_pgd, start, end);
if (!rc)
*dst_pgdp = trans_pgd;
return rc;
}
/*
* Add map entry to trans_pgd for a base-size page at PTE level.
* info: contains allocator and its argument
* trans_pgd: page table in which new map is added.
* page: page to be mapped.
* dst_addr: new VA address for the page
* pgprot: protection for the page.
*
* Returns 0 on success, and -ENOMEM on failure.
*/
int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
void *page, unsigned long dst_addr, pgprot_t pgprot)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
p4dp = trans_alloc(info);
if (!pgdp)
return -ENOMEM;
pgd_populate(NULL, pgdp, p4dp);
}
p4dp = p4d_offset(pgdp, dst_addr);
if (p4d_none(READ_ONCE(*p4dp))) {
pudp = trans_alloc(info);
if (!pudp)
return -ENOMEM;
p4d_populate(NULL, p4dp, pudp);
}
pudp = pud_offset(p4dp, dst_addr);
if (pud_none(READ_ONCE(*pudp))) {
pmdp = trans_alloc(info);
if (!pmdp)
return -ENOMEM;
pud_populate(NULL, pudp, pmdp);
}
pmdp = pmd_offset(pudp, dst_addr);
if (pmd_none(READ_ONCE(*pmdp))) {
ptep = trans_alloc(info);
if (!ptep)
return -ENOMEM;
pmd_populate_kernel(NULL, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, dst_addr);
set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot));
return 0;
}
/*
* The page we want to idmap may be outside the range covered by VA_BITS that
* can be built using the kernel's p?d_populate() helpers. As a one off, for a
* single page, we build these page tables bottom up and just assume that will
* need the maximum T0SZ.
*
* Returns 0 on success, and -ENOMEM on failure.
* On success trans_ttbr0 contains page table with idmapped page, t0sz is set to
* maximum T0SZ for this page.
*/
int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
unsigned long *t0sz, void *page)
{
phys_addr_t dst_addr = virt_to_phys(page);
unsigned long pfn = __phys_to_pfn(dst_addr);
int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47;
int bits_mapped = PAGE_SHIFT - 4;
unsigned long level_mask, prev_level_entry, *levels[4];
int this_level, index, level_lsb, level_msb;
dst_addr &= PAGE_MASK;
prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_EXEC));
for (this_level = 3; this_level >= 0; this_level--) {
levels[this_level] = trans_alloc(info);
if (!levels[this_level])
return -ENOMEM;
level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level);
level_msb = min(level_lsb + bits_mapped, max_msb);
level_mask = GENMASK_ULL(level_msb, level_lsb);
index = (dst_addr & level_mask) >> level_lsb;
*(levels[this_level] + index) = prev_level_entry;
pfn = virt_to_pfn(levels[this_level]);
prev_level_entry = pte_val(pfn_pte(pfn,
__pgprot(PMD_TYPE_TABLE)));
if (level_msb == max_msb)
break;
}
*trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn));
*t0sz = TCR_T0SZ(max_msb + 1);
return 0;
}

View File

@ -2195,7 +2195,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
ppmu->get_mem_weight)
ppmu->get_mem_weight(&data.weight);
ppmu->get_mem_weight(&data.weight.full);
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);

View File

@ -81,6 +81,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@ -253,6 +255,8 @@ static bool check_hw_exists(void)
if (ret)
goto msr_fail;
for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
if (fixed_counter_disabled(i))
continue;
if (val & (0x03 << i*4)) {
bios_fail = 1;
val_fail = val;
@ -665,6 +669,12 @@ void x86_pmu_disable_all(void)
}
}
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
return static_call(x86_pmu_guest_get_msrs)(nr);
}
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
/*
* There may be PMI landing after enabled=0. The PMI hitting could be before or
* after disable_all.
@ -1523,6 +1533,8 @@ void perf_event_print_debug(void)
cpu, idx, prev_left);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
if (fixed_counter_disabled(idx))
continue;
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@ -1923,6 +1935,8 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
}
static void _x86_pmu_read(struct perf_event *event)
@ -1930,6 +1944,13 @@ static void _x86_pmu_read(struct perf_event *event)
x86_perf_event_update(event);
}
static inline struct perf_guest_switch_msr *
perf_guest_get_msrs_nop(int *nr)
{
*nr = 0;
return NULL;
}
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
@ -1995,12 +2016,17 @@ static int __init init_hw_perf_events(void)
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... fixed-purpose events: %lu\n",
hweight64((((1ULL << x86_pmu.num_counters_fixed) - 1)
<< INTEL_PMC_IDX_FIXED) & x86_pmu.intel_ctrl));
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
if (!x86_pmu.read)
x86_pmu.read = _x86_pmu_read;
if (!x86_pmu.guest_get_msrs)
x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;
x86_pmu_static_call_update();
/*

View File

@ -275,6 +275,55 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
};
static struct event_constraint intel_spr_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
INTEL_EVENT_CONSTRAINT(0x2e, 0xff),
INTEL_EVENT_CONSTRAINT(0x3c, 0xff),
/*
* Generally event codes < 0x90 are restricted to counters 0-3.
* The 0x2E and 0x3C are exception, which has no restriction.
*/
INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),
INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf),
INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1),
INTEL_EVENT_CONSTRAINT(0xce, 0x1),
INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
/*
* Generally event codes >= 0x90 are likely to have no restrictions.
* The exception are defined as above.
*/
INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff),
EVENT_CONSTRAINT_END
};
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
@ -314,11 +363,15 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2");
EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
EVENT_ATTR_STR(topdown-heavy-ops, td_heavy_ops, "event=0x00,umask=0x84");
EVENT_ATTR_STR(topdown-br-mispredict, td_br_mispredict, "event=0x00,umask=0x85");
EVENT_ATTR_STR(topdown-fetch-lat, td_fetch_lat, "event=0x00,umask=0x86");
EVENT_ATTR_STR(topdown-mem-bound, td_mem_bound, "event=0x00,umask=0x87");
static struct attribute *snb_events_attrs[] = {
EVENT_PTR(td_slots_issued),
@ -384,6 +437,108 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
static __initconst const u64 spr_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0,
[ C(RESULT_MISS) ] = 0xe124,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_MISS) ] = 0xe424,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x12a,
[ C(RESULT_MISS) ] = 0x12a,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x12a,
[ C(RESULT_MISS) ] = 0x12a,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0,
[ C(RESULT_MISS) ] = 0xe12,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0,
[ C(RESULT_MISS) ] = 0xe13,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = 0xe11,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x4c4,
[ C(RESULT_MISS) ] = 0x4c5,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x12a,
[ C(RESULT_MISS) ] = 0x12a,
},
},
};
static __initconst const u64 spr_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x10001,
[ C(RESULT_MISS) ] = 0x3fbfc00001,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x3f3ffc0002,
[ C(RESULT_MISS) ] = 0x3f3fc00002,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x10c000001,
[ C(RESULT_MISS) ] = 0x3fb3000001,
},
},
};
/*
* Notes on the events:
* - data reads do not include code reads (comparable to earlier tables)
@ -2134,18 +2289,6 @@ static void intel_tfa_pmu_enable_all(int added)
intel_pmu_enable_all(added);
}
static void enable_counter_freeze(void)
{
update_debugctlmsr(get_debugctlmsr() |
DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
}
static void disable_counter_freeze(void)
{
update_debugctlmsr(get_debugctlmsr() &
~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
}
static inline u64 intel_pmu_get_status(void)
{
u64 status;
@ -2337,8 +2480,8 @@ static void __icl_update_topdown_event(struct perf_event *event,
}
}
static void update_saved_topdown_regs(struct perf_event *event,
u64 slots, u64 metrics)
static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
u64 metrics, int metric_end)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *other;
@ -2347,7 +2490,7 @@ static void update_saved_topdown_regs(struct perf_event *event,
event->hw.saved_slots = slots;
event->hw.saved_metric = metrics;
for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
if (!is_topdown_idx(idx))
continue;
other = cpuc->events[idx];
@ -2362,7 +2505,8 @@ static void update_saved_topdown_regs(struct perf_event *event,
* The PERF_METRICS and Fixed counter 3 are read separately. The values may be
* modify by a NMI. PMU has to be disabled before calling this function.
*/
static u64 icl_update_topdown_event(struct perf_event *event)
static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *other;
@ -2378,7 +2522,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
/* read PERF_METRICS */
rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
if (!is_topdown_idx(idx))
continue;
other = cpuc->events[idx];
@ -2404,7 +2548,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
* Don't need to reset the PERF_METRICS and Fixed counter 3.
* Because the values will be restored in next schedule in.
*/
update_saved_topdown_regs(event, slots, metrics);
update_saved_topdown_regs(event, slots, metrics, metric_end);
reset = false;
}
@ -2413,12 +2557,18 @@ static u64 icl_update_topdown_event(struct perf_event *event)
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
wrmsrl(MSR_PERF_METRICS, 0);
if (event)
update_saved_topdown_regs(event, 0, 0);
update_saved_topdown_regs(event, 0, 0, metric_end);
}
return slots;
}
static u64 icl_update_topdown_event(struct perf_event *event)
{
return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
x86_pmu.num_topdown_events - 1);
}
static void intel_pmu_read_topdown_event(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@ -2573,8 +2723,11 @@ static void intel_pmu_reset(void)
wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
if (fixed_counter_disabled(idx))
continue;
wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
}
if (ds)
ds->bts_index = ds->bts_buffer_base;
@ -2709,95 +2862,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
return handled;
}
static bool disable_counter_freezing = true;
static int __init intel_perf_counter_freezing_setup(char *s)
{
bool res;
if (kstrtobool(s, &res))
return -EINVAL;
disable_counter_freezing = !res;
return 1;
}
__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);
/*
* Simplified handler for Arch Perfmon v4:
* - We rely on counter freezing/unfreezing to enable/disable the PMU.
* This is done automatically on PMU ack.
* - Ack the PMU only after the APIC.
*/
static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int handled = 0;
bool bts = false;
u64 status;
int pmu_enabled = cpuc->enabled;
int loops = 0;
/* PMU has been disabled because of counter freezing */
cpuc->enabled = 0;
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
bts = true;
intel_bts_disable_local();
handled = intel_pmu_drain_bts_buffer();
handled += intel_bts_interrupt();
}
status = intel_pmu_get_status();
if (!status)
goto done;
again:
intel_pmu_lbr_read();
if (++loops > 100) {
static bool warned;
if (!warned) {
WARN(1, "perfevents: irq loop stuck!\n");
perf_event_print_debug();
warned = true;
}
intel_pmu_reset();
goto done;
}
handled += handle_pmi_common(regs, status);
done:
/* Ack the PMI in the APIC */
apic_write(APIC_LVTPC, APIC_DM_NMI);
/*
* The counters start counting immediately while ack the status.
* Make it as close as possible to IRET. This avoids bogus
* freezing on Skylake CPUs.
*/
if (status) {
intel_pmu_ack_status(status);
} else {
/*
* CPU may issues two PMIs very close to each other.
* When the PMI handler services the first one, the
* GLOBAL_STATUS is already updated to reflect both.
* When it IRETs, the second PMI is immediately
* handled and it sees clear status. At the meantime,
* there may be a third PMI, because the freezing bit
* isn't set since the ack in first PMI handlers.
* Double check if there is more work to be done.
*/
status = intel_pmu_get_status();
if (status)
goto again;
}
if (bts)
intel_bts_enable_local();
cpuc->enabled = pmu_enabled;
return handled;
}
/*
* This handler is triggered by the local APIC, so the APIC IRQ handling
* rules apply:
@ -3563,6 +3627,26 @@ static int core_pmu_hw_config(struct perf_event *event)
return intel_pmu_bts_config(event);
}
#define INTEL_TD_METRIC_AVAILABLE_MAX (INTEL_TD_METRIC_RETIRING + \
((x86_pmu.num_topdown_events - 1) << 8))
static bool is_available_metric_event(struct perf_event *event)
{
return is_metric_event(event) &&
event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX;
}
static inline bool is_mem_loads_event(struct perf_event *event)
{
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01);
}
static inline bool is_mem_loads_aux_event(struct perf_event *event)
{
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
}
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@ -3636,7 +3720,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (event->attr.config & X86_ALL_EVENT_FLAGS)
return -EINVAL;
if (is_metric_event(event)) {
if (is_available_metric_event(event)) {
struct perf_event *leader = event->group_leader;
/* The metric events don't support sampling. */
@ -3665,6 +3749,33 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
}
/*
* The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR
* doesn't function quite right. As a work-around it needs to always be
* co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82).
* The actual count of this second event is irrelevant it just needs
* to be active to make the first event function correctly.
*
* In a group, the auxiliary event must be in front of the load latency
* event. The rule is to simplify the implementation of the check.
* That's because perf cannot have a complete group at the moment.
*/
if (x86_pmu.flags & PMU_FL_MEM_LOADS_AUX &&
(event->attr.sample_type & PERF_SAMPLE_DATA_SRC) &&
is_mem_loads_event(event)) {
struct perf_event *leader = event->group_leader;
struct perf_event *sibling = NULL;
if (!is_mem_loads_aux_event(leader)) {
for_each_sibling_event(sibling, leader) {
if (is_mem_loads_aux_event(sibling))
break;
}
if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list))
return -ENODATA;
}
}
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
return 0;
@ -3680,26 +3791,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
return 0;
}
#ifdef CONFIG_RETPOLINE
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
#endif
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
#ifdef CONFIG_RETPOLINE
if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
return intel_guest_get_msrs(nr);
else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
return core_guest_get_msrs(nr);
#endif
if (x86_pmu.guest_get_msrs)
return x86_pmu.guest_get_msrs(nr);
*nr = 0;
return NULL;
}
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@ -3864,6 +3955,29 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return hsw_get_event_constraints(cpuc, idx, event);
}
static struct event_constraint *
spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;
c = icl_get_event_constraints(cpuc, idx, event);
/*
* The :ppp indicates the Precise Distribution (PDist) facility, which
* is only supported on the GP counter 0. If a :ppp event which is not
* available on the GP counter 0, error out.
*/
if (event->attr.precise_ip == 3) {
if (c->idxmsk64 & BIT_ULL(0))
return &counter0_constraint;
return &emptyconstraint;
}
return c;
}
static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@ -3953,6 +4067,14 @@ static u64 nhm_limit_period(struct perf_event *event, u64 left)
return max(left, 32ULL);
}
static u64 spr_limit_period(struct perf_event *event, u64 left)
{
if (event->attr.precise_ip == 3)
return max(left, 128ULL);
return left;
}
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@ -4094,9 +4216,6 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.version > 1)
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
if (x86_pmu.counter_freezing)
enable_counter_freeze();
/* Disable perf metrics if any added CPU doesn't support it. */
if (x86_pmu.intel_cap.perf_metrics) {
union perf_capabilities perf_cap;
@ -4167,9 +4286,6 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
static void intel_pmu_cpu_dying(int cpu)
{
fini_debug_store_on_cpu(cpu);
if (x86_pmu.counter_freezing)
disable_counter_freeze();
}
void intel_cpuc_finish(struct cpu_hw_events *cpuc)
@ -4397,6 +4513,9 @@ static const struct x86_cpu_desc isolation_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e),
@ -4561,39 +4680,6 @@ static __init void intel_nehalem_quirk(void)
}
}
static const struct x86_cpu_desc counter_freezing_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D, 1, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006),
{}
};
static bool intel_counter_freezing_broken(void)
{
return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes);
}
static __init void intel_counter_freezing_quirk(void)
{
/* Check if it's already disabled */
if (disable_counter_freezing)
return;
/*
* If the system starts with the wrong ucode, leave the
* counter-freezing feature permanently disabled.
*/
if (intel_counter_freezing_broken()) {
pr_info("PMU counter freezing disabled due to CPU errata,"
"please upgrade microcode\n");
x86_pmu.counter_freezing = false;
x86_pmu.handle_irq = intel_pmu_handle_irq;
}
}
/*
* enable software workaround for errata:
* SNB: BJ122
@ -4703,6 +4789,42 @@ static struct attribute *icl_tsx_events_attrs[] = {
NULL,
};
EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2");
EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82");
static struct attribute *spr_events_attrs[] = {
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_spr),
EVENT_PTR(mem_ld_aux),
NULL,
};
static struct attribute *spr_td_events_attrs[] = {
EVENT_PTR(slots),
EVENT_PTR(td_retiring),
EVENT_PTR(td_bad_spec),
EVENT_PTR(td_fe_bound),
EVENT_PTR(td_be_bound),
EVENT_PTR(td_heavy_ops),
EVENT_PTR(td_br_mispredict),
EVENT_PTR(td_fetch_lat),
EVENT_PTR(td_mem_bound),
NULL,
};
static struct attribute *spr_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_abort),
EVENT_PTR(tx_commit),
EVENT_PTR(tx_capacity_read),
EVENT_PTR(tx_capacity_write),
EVENT_PTR(tx_conflict),
EVENT_PTR(cycles_t),
EVENT_PTR(cycles_ct),
NULL,
};
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@ -4926,7 +5048,7 @@ __init int intel_pmu_init(void)
union cpuid10_eax eax;
union cpuid10_ebx ebx;
struct event_constraint *c;
unsigned int unused;
unsigned int fixed_mask;
struct extra_reg *er;
bool pmem = false;
int version, i;
@ -4948,7 +5070,7 @@ __init int intel_pmu_init(void)
* Check whether the Architectural PerfMon supports
* Branch Misses Retired hw_event or not.
*/
cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full);
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
return -ENODEV;
@ -4972,15 +5094,15 @@ __init int intel_pmu_init(void)
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events, when not running in a hypervisor:
*/
if (version > 1) {
if (version > 1 && version < 5) {
int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
x86_pmu.num_counters_fixed =
max((int)edx.split.num_counters_fixed, assume);
}
if (version >= 4)
x86_pmu.counter_freezing = !disable_counter_freezing;
fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
} else if (version >= 5)
x86_pmu.num_counters_fixed = fls(fixed_mask);
if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;
@ -5109,7 +5231,6 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_ATOM_GOLDMONT_D:
x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@ -5136,7 +5257,6 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@ -5483,12 +5603,50 @@ __init int intel_pmu_init(void)
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem);
x86_pmu.num_topdown_events = 4;
x86_pmu.update_topdown_event = icl_update_topdown_event;
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
pr_cont("Icelake events, ");
name = "icelake";
break;
case INTEL_FAM6_SAPPHIRERAPIDS_X:
pmem = true;
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
x86_pmu.event_constraints = intel_spr_event_constraints;
x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
x86_pmu.extra_regs = intel_spr_extra_regs;
x86_pmu.limit_period = spr_limit_period;
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.pebs_block = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = spr_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_skl_attr = skl_format_attr;
mem_attr = spr_events_attrs;
td_attr = spr_td_events_attrs;
tsx_attr = spr_tsx_events_attrs;
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem);
x86_pmu.num_topdown_events = 8;
x86_pmu.update_topdown_event = icl_update_topdown_event;
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
pr_cont("Sapphire Rapids events, ");
name = "sapphire_rapids";
break;
default:
switch (x86_pmu.version) {
case 1:
@ -5531,8 +5689,7 @@ __init int intel_pmu_init(void)
x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
}
x86_pmu.intel_ctrl |=
((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
x86_pmu.intel_ctrl |= (u64)fixed_mask << INTEL_PMC_IDX_FIXED;
/* AnyThread may be deprecated on arch perfmon v5 or later */
if (x86_pmu.intel_cap.anythread_deprecated)
@ -5549,13 +5706,22 @@ __init int intel_pmu_init(void)
* events to the generic counters.
*/
if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
/*
* Disable topdown slots and metrics events,
* if slots event is not in CPUID.
*/
if (!(INTEL_PMC_MSK_FIXED_SLOTS & x86_pmu.intel_ctrl))
c->idxmsk64 = 0;
c->weight = hweight64(c->idxmsk64);
continue;
}
if (c->cmask == FIXED_EVENT_FLAGS
&& c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
if (c->cmask == FIXED_EVENT_FLAGS) {
/* Disabled fixed counters which are not in CPUID */
c->idxmsk64 &= x86_pmu.intel_ctrl;
if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES)
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
}
c->idxmsk64 &=
~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
@ -5601,13 +5767,6 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}
/*
* For arch perfmon 4 use counter freezing to avoid
* several MSR accesses in the PMI.
*/
if (x86_pmu.counter_freezing)
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
if (x86_pmu.intel_cap.perf_metrics)
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;

View File

@ -36,7 +36,9 @@ union intel_x86_pebs_dse {
unsigned int ld_dse:4;
unsigned int ld_stlb_miss:1;
unsigned int ld_locked:1;
unsigned int ld_reserved:26;
unsigned int ld_data_blk:1;
unsigned int ld_addr_blk:1;
unsigned int ld_reserved:24;
};
struct {
unsigned int st_l1d_hit:1;
@ -45,6 +47,12 @@ union intel_x86_pebs_dse {
unsigned int st_locked:1;
unsigned int st_reserved2:26;
};
struct {
unsigned int st_lat_dse:4;
unsigned int st_lat_stlb_miss:1;
unsigned int st_lat_locked:1;
unsigned int ld_reserved3:26;
};
};
@ -198,6 +206,63 @@ static u64 load_latency_data(u64 status)
if (dse.ld_locked)
val |= P(LOCK, LOCKED);
/*
* Ice Lake and earlier models do not support block infos.
*/
if (!x86_pmu.pebs_block) {
val |= P(BLK, NA);
return val;
}
/*
* bit 6: load was blocked since its data could not be forwarded
* from a preceding store
*/
if (dse.ld_data_blk)
val |= P(BLK, DATA);
/*
* bit 7: load was blocked due to potential address conflict with
* a preceding store
*/
if (dse.ld_addr_blk)
val |= P(BLK, ADDR);
if (!dse.ld_data_blk && !dse.ld_addr_blk)
val |= P(BLK, NA);
return val;
}
static u64 store_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
u64 val;
dse.val = status;
/*
* use the mapping table for bit 0-3
*/
val = pebs_data_source[dse.st_lat_dse];
/*
* bit 4: TLB access
* 0 = did not miss 2nd level TLB
* 1 = missed 2nd level TLB
*/
if (dse.st_lat_stlb_miss)
val |= P(TLB, MISS) | P(TLB, L2);
else
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
/*
* bit 5: locked prefix
*/
if (dse.st_lat_locked)
val |= P(LOCK, LOCKED);
val |= P(BLK, NA);
return val;
}
@ -870,6 +935,28 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
struct event_constraint intel_spr_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
/*
* Everything else is handled by PMU_FL_PEBS_ALL, because we
* need the full constraints from the main table.
*/
EVENT_CONSTRAINT_END
};
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@ -960,7 +1047,8 @@ static void adaptive_pebs_record_size_update(void)
}
#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_WEIGHT_TYPE | \
PERF_SAMPLE_TRANSACTION | \
PERF_SAMPLE_DATA_PAGE_SIZE)
@ -987,7 +1075,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
(attr->sample_regs_intr & PEBS_GP_REGS);
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
((attr->config & INTEL_ARCH_EVENT_MASK) ==
x86_pmu.rtm_abort_event);
@ -1331,6 +1419,8 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
if (fl & PERF_X86_EVENT_PEBS_LDLAT)
val = load_latency_data(aux);
else if (fl & PERF_X86_EVENT_PEBS_STLAT)
val = store_latency_data(aux);
else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
val = precise_datala_hsw(event, aux);
else if (fst)
@ -1369,8 +1459,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
/*
* Use latency for weight (only avail with PEBS-LL)
*/
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
data->weight = pebs->lat;
if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
data->weight.full = pebs->lat;
/*
* data.data_src encodes the data source
@ -1462,8 +1552,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
data->weight = intel_get_tsx_weight(pebs->tsx_tuning);
if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
if (sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
@ -1507,6 +1597,9 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
#endif
}
#define PEBS_LATENCY_MASK 0xffff
#define PEBS_CACHE_LATENCY_OFFSET 32
/*
* With adaptive PEBS the layout depends on what fields are configured.
*/
@ -1577,9 +1670,27 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
}
if (format_size & PEBS_DATACFG_MEMINFO) {
if (sample_type & PERF_SAMPLE_WEIGHT)
data->weight = meminfo->latency ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
u64 weight = meminfo->latency;
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
data->weight.var2_w = weight & PEBS_LATENCY_MASK;
weight >>= PEBS_CACHE_LATENCY_OFFSET;
}
/*
* Although meminfo::latency is defined as a u64,
* only the lower 32 bits include the valid data
* in practice on Ice Lake and earlier platforms.
*/
if (sample_type & PERF_SAMPLE_WEIGHT) {
data->weight.full = weight ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
} else {
data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
}
}
if (sample_type & PERF_SAMPLE_DATA_SRC)
data->data_src.val = get_data_src(event, meminfo->aux);

View File

@ -31,21 +31,21 @@ struct event_constraint uncore_constraint_empty =
MODULE_LICENSE("GPL");
int uncore_pcibus_to_physid(struct pci_bus *bus)
int uncore_pcibus_to_dieid(struct pci_bus *bus)
{
struct pci2phy_map *map;
int phys_id = -1;
int die_id = -1;
raw_spin_lock(&pci2phy_map_lock);
list_for_each_entry(map, &pci2phy_map_head, list) {
if (map->segment == pci_domain_nr(bus)) {
phys_id = map->pbus_to_physid[bus->number];
die_id = map->pbus_to_dieid[bus->number];
break;
}
}
raw_spin_unlock(&pci2phy_map_lock);
return phys_id;
return die_id;
}
static void uncore_free_pcibus_map(void)
@ -86,7 +86,7 @@ struct pci2phy_map *__find_pci2phy_map(int segment)
alloc = NULL;
map->segment = segment;
for (i = 0; i < 256; i++)
map->pbus_to_physid[i] = -1;
map->pbus_to_dieid[i] = -1;
list_add_tail(&map->list, &pci2phy_map_head);
end:
@ -332,7 +332,6 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
uncore_pmu_init_hrtimer(box);
box->cpu = -1;
box->pci_phys_id = -1;
box->dieid = -1;
/* set default hrtimer timeout */
@ -993,18 +992,11 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
/*
* Get the die information of a PCI device.
* @pdev: The PCI device.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to.
*/
static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
int *phys_id, int *die)
static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
{
*phys_id = uncore_pcibus_to_physid(pdev->bus);
if (*phys_id < 0)
return -ENODEV;
*die = (topology_max_die_per_package() > 1) ? *phys_id :
topology_phys_to_logical_pkg(*phys_id);
*die = uncore_pcibus_to_dieid(pdev->bus);
if (*die < 0)
return -EINVAL;
@ -1046,13 +1038,12 @@ uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
* @pdev: The PCI device.
* @type: The corresponding PMU type of the device.
* @pmu: The corresponding PMU of the device.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to.
*/
static int uncore_pci_pmu_register(struct pci_dev *pdev,
struct intel_uncore_type *type,
struct intel_uncore_pmu *pmu,
int phys_id, int die)
int die)
{
struct intel_uncore_box *box;
int ret;
@ -1070,7 +1061,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev,
WARN_ON_ONCE(pmu->func_id != pdev->devfn);
atomic_inc(&box->refcnt);
box->pci_phys_id = phys_id;
box->dieid = die;
box->pci_dev = pdev;
box->pmu = pmu;
@ -1097,9 +1087,9 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
{
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu = NULL;
int phys_id, die, ret;
int die, ret;
ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
ret = uncore_pci_get_dev_die_info(pdev, &die);
if (ret)
return ret;
@ -1132,7 +1122,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
ret = uncore_pci_pmu_register(pdev, type, pmu, die);
pci_set_drvdata(pdev, pmu->boxes[die]);
@ -1142,17 +1132,12 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
/*
* Unregister the PMU of a PCI device
* @pmu: The corresponding PMU is unregistered.
* @phys_id: The physical socket id which the device maps to.
* @die: The die id which the device maps to.
*/
static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu,
int phys_id, int die)
static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
{
struct intel_uncore_box *box = pmu->boxes[die];
if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
return;
pmu->boxes[die] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
@ -1164,9 +1149,9 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
int i, phys_id, die;
int i, die;
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
if (uncore_pci_get_dev_die_info(pdev, &die))
return;
box = pci_get_drvdata(pdev);
@ -1185,7 +1170,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
uncore_pci_pmu_unregister(pmu, phys_id, die);
uncore_pci_pmu_unregister(pmu, die);
}
static int uncore_bus_notify(struct notifier_block *nb,
@ -1194,7 +1179,7 @@ static int uncore_bus_notify(struct notifier_block *nb,
struct device *dev = data;
struct pci_dev *pdev = to_pci_dev(dev);
struct intel_uncore_pmu *pmu;
int phys_id, die;
int die;
/* Unregister the PMU when the device is going to be deleted. */
if (action != BUS_NOTIFY_DEL_DEVICE)
@ -1204,10 +1189,10 @@ static int uncore_bus_notify(struct notifier_block *nb,
if (!pmu)
return NOTIFY_DONE;
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
if (uncore_pci_get_dev_die_info(pdev, &die))
return NOTIFY_DONE;
uncore_pci_pmu_unregister(pmu, phys_id, die);
uncore_pci_pmu_unregister(pmu, die);
return NOTIFY_OK;
}
@ -1224,7 +1209,7 @@ static void uncore_pci_sub_driver_init(void)
struct pci_dev *pci_sub_dev;
bool notify = false;
unsigned int devfn;
int phys_id, die;
int die;
while (ids && ids->vendor) {
pci_sub_dev = NULL;
@ -1244,12 +1229,11 @@ static void uncore_pci_sub_driver_init(void)
if (!pmu)
continue;
if (uncore_pci_get_dev_die_info(pci_sub_dev,
&phys_id, &die))
if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
continue;
if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
phys_id, die))
die))
notify = true;
}
ids++;

View File

@ -124,7 +124,6 @@ struct intel_uncore_extra_reg {
};
struct intel_uncore_box {
int pci_phys_id;
int dieid; /* Logical die ID */
int n_active; /* number of active events */
int n_events;
@ -173,11 +172,11 @@ struct freerunning_counters {
struct pci2phy_map {
struct list_head list;
int segment;
int pbus_to_physid[256];
int pbus_to_dieid[256];
};
struct pci2phy_map *__find_pci2phy_map(int segment);
int uncore_pcibus_to_physid(struct pci_bus *bus);
int uncore_pcibus_to_dieid(struct pci_bus *bus);
ssize_t uncore_event_show(struct device *dev,
struct device_attribute *attr, char *buf);

View File

@ -657,7 +657,7 @@ int snb_pci2phy_map_init(int devid)
pci_dev_put(dev);
return -ENOMEM;
}
map->pbus_to_physid[bus] = 0;
map->pbus_to_dieid[bus] = 0;
raw_spin_unlock(&pci2phy_map_lock);
pci_dev_put(dev);

View File

@ -1359,7 +1359,7 @@ static struct pci_driver snbep_uncore_pci_driver = {
static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse)
{
struct pci_dev *ubox_dev = NULL;
int i, bus, nodeid, segment;
int i, bus, nodeid, segment, die_id;
struct pci2phy_map *map;
int err = 0;
u32 config = 0;
@ -1370,36 +1370,77 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
if (!ubox_dev)
break;
bus = ubox_dev->bus->number;
/* get the Node ID of the local register */
err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
if (err)
break;
nodeid = config & NODE_ID_MASK;
/* get the Node ID mapping */
err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
if (err)
break;
segment = pci_domain_nr(ubox_dev->bus);
raw_spin_lock(&pci2phy_map_lock);
map = __find_pci2phy_map(segment);
if (!map) {
raw_spin_unlock(&pci2phy_map_lock);
err = -ENOMEM;
break;
}
/*
* every three bits in the Node ID mapping register maps
* to a particular node.
* The nodeid and idmap registers only contain enough
* information to handle 8 nodes. On systems with more
* than 8 nodes, we need to rely on NUMA information,
* filled in from BIOS supplied information, to determine
* the topology.
*/
for (i = 0; i < 8; i++) {
if (nodeid == ((config >> (3 * i)) & 0x7)) {
map->pbus_to_physid[bus] = i;
if (nr_node_ids <= 8) {
/* get the Node ID of the local register */
err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
if (err)
break;
nodeid = config & NODE_ID_MASK;
/* get the Node ID mapping */
err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
if (err)
break;
segment = pci_domain_nr(ubox_dev->bus);
raw_spin_lock(&pci2phy_map_lock);
map = __find_pci2phy_map(segment);
if (!map) {
raw_spin_unlock(&pci2phy_map_lock);
err = -ENOMEM;
break;
}
/*
* every three bits in the Node ID mapping register maps
* to a particular node.
*/
for (i = 0; i < 8; i++) {
if (nodeid == ((config >> (3 * i)) & 0x7)) {
if (topology_max_die_per_package() > 1)
die_id = i;
else
die_id = topology_phys_to_logical_pkg(i);
map->pbus_to_dieid[bus] = die_id;
break;
}
}
raw_spin_unlock(&pci2phy_map_lock);
} else {
int node = pcibus_to_node(ubox_dev->bus);
int cpu;
segment = pci_domain_nr(ubox_dev->bus);
raw_spin_lock(&pci2phy_map_lock);
map = __find_pci2phy_map(segment);
if (!map) {
raw_spin_unlock(&pci2phy_map_lock);
err = -ENOMEM;
break;
}
die_id = -1;
for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
if (c->initialized && cpu_to_node(cpu) == node) {
map->pbus_to_dieid[bus] = die_id = c->logical_die_id;
break;
}
}
raw_spin_unlock(&pci2phy_map_lock);
if (WARN_ON_ONCE(die_id == -1)) {
err = -EINVAL;
break;
}
}
raw_spin_unlock(&pci2phy_map_lock);
}
if (!err) {
@ -1412,17 +1453,17 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
i = -1;
if (reverse) {
for (bus = 255; bus >= 0; bus--) {
if (map->pbus_to_physid[bus] >= 0)
i = map->pbus_to_physid[bus];
if (map->pbus_to_dieid[bus] >= 0)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_physid[bus] = i;
map->pbus_to_dieid[bus] = i;
}
} else {
for (bus = 0; bus <= 255; bus++) {
if (map->pbus_to_physid[bus] >= 0)
i = map->pbus_to_physid[bus];
if (map->pbus_to_dieid[bus] >= 0)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_physid[bus] = i;
map->pbus_to_dieid[bus] = i;
}
}
}
@ -4646,19 +4687,14 @@ int snr_uncore_pci_init(void)
static struct pci_dev *snr_uncore_get_mc_dev(int id)
{
struct pci_dev *mc_dev = NULL;
int phys_id, pkg;
int pkg;
while (1) {
mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev);
if (!mc_dev)
break;
phys_id = uncore_pcibus_to_physid(mc_dev->bus);
if (phys_id < 0)
continue;
pkg = topology_phys_to_logical_pkg(phys_id);
if (pkg < 0)
continue;
else if (pkg == id)
pkg = uncore_pcibus_to_dieid(mc_dev->bus);
if (pkg == id)
break;
}
return mc_dev;

View File

@ -80,6 +80,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */
static inline bool is_topdown_count(struct perf_event *event)
{
@ -443,6 +444,10 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
#define INTEL_PSD_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT)
#define INTEL_PST_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
@ -682,8 +687,7 @@ struct x86_pmu {
/* PMI handler bits */
unsigned int late_ack :1,
enabled_ack :1,
counter_freezing :1;
enabled_ack :1;
/*
* sysfs attrs
*/
@ -724,7 +728,8 @@ struct x86_pmu {
pebs_broken :1,
pebs_prec_dist :1,
pebs_no_tlb :1,
pebs_no_isolation :1;
pebs_no_isolation :1,
pebs_block :1;
int pebs_record_size;
int pebs_buffer_size;
int max_pebs_events;
@ -776,6 +781,7 @@ struct x86_pmu {
/*
* Intel perf metrics
*/
int num_topdown_events;
u64 (*update_topdown_event)(struct perf_event *event);
int (*set_topdown_event_period)(struct perf_event *event);
@ -871,6 +877,8 @@ do { \
#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
#define PMU_FL_TFA 0x20 /* deal with TSX force abort */
#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@ -1060,6 +1068,11 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
static inline bool fixed_counter_disabled(int i)
{
return !(x86_pmu.intel_ctrl >> (i + INTEL_PMC_IDX_FIXED));
}
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
@ -1157,6 +1170,8 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
extern struct event_constraint intel_icl_pebs_event_constraints[];
extern struct event_constraint intel_spr_pebs_event_constraints[];
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_add(struct perf_event *event);

View File

@ -28,6 +28,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
for (bit = 0; bit < cnt; bit++) {
if (!msr[bit].no_check) {
struct attribute_group *grp = msr[bit].grp;
u64 mask;
/* skip entry with no group */
if (!grp)
@ -44,8 +45,12 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
/* Virt sucks; you cannot tell if a R/O MSR is present :/ */
if (rdmsrl_safe(msr[bit].msr, &val))
continue;
mask = msr[bit].mask;
if (!mask)
mask = ~0ULL;
/* Disable zero counters if requested. */
if (!zero && !val)
if (!zero && !(val & mask))
continue;
grp->is_visible = NULL;

View File

@ -4,10 +4,11 @@
#include <linux/sysfs.h>
struct perf_msr {
u64 msr;
struct attribute_group *grp;
u64 msr;
struct attribute_group *grp;
bool (*test)(int idx, void *data);
bool no_check;
bool no_check;
u64 mask;
};
unsigned long

View File

@ -454,16 +454,9 @@ static struct attribute *rapl_events_cores[] = {
NULL,
};
static umode_t
rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i)
{
return 0;
}
static struct attribute_group rapl_events_cores_group = {
.name = "events",
.attrs = rapl_events_cores,
.is_visible = rapl_not_visible,
};
static struct attribute *rapl_events_pkg[] = {
@ -476,7 +469,6 @@ static struct attribute *rapl_events_pkg[] = {
static struct attribute_group rapl_events_pkg_group = {
.name = "events",
.attrs = rapl_events_pkg,
.is_visible = rapl_not_visible,
};
static struct attribute *rapl_events_ram[] = {
@ -489,7 +481,6 @@ static struct attribute *rapl_events_ram[] = {
static struct attribute_group rapl_events_ram_group = {
.name = "events",
.attrs = rapl_events_ram,
.is_visible = rapl_not_visible,
};
static struct attribute *rapl_events_gpu[] = {
@ -502,7 +493,6 @@ static struct attribute *rapl_events_gpu[] = {
static struct attribute_group rapl_events_gpu_group = {
.name = "events",
.attrs = rapl_events_gpu,
.is_visible = rapl_not_visible,
};
static struct attribute *rapl_events_psys[] = {
@ -515,7 +505,6 @@ static struct attribute *rapl_events_psys[] = {
static struct attribute_group rapl_events_psys_group = {
.name = "events",
.attrs = rapl_events_psys,
.is_visible = rapl_not_visible,
};
static bool test_msr(int idx, void *data)
@ -523,12 +512,23 @@ static bool test_msr(int idx, void *data)
return test_bit(idx, (unsigned long *) data);
}
/* Only lower 32bits of the MSR represents the energy counter */
#define RAPL_MSR_MASK 0xFFFFFFFF
static struct perf_msr intel_rapl_msrs[] = {
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr },
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr },
[PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr },
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr },
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, false, RAPL_MSR_MASK },
};
static struct perf_msr intel_rapl_spr_msrs[] = {
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, true, RAPL_MSR_MASK },
};
/*
@ -761,7 +761,7 @@ static struct rapl_model model_spr = {
BIT(PERF_RAPL_PSYS),
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
.rapl_msrs = intel_rapl_msrs,
.rapl_msrs = intel_rapl_spr_msrs,
};
static struct rapl_model model_amd_fam17h = {

View File

@ -58,14 +58,17 @@ struct arch_specific_insn {
/* copy of the original instruction */
kprobe_opcode_t *insn;
/*
* boostable = false: This instruction type is not boostable.
* boostable = true: This instruction has been boosted: we have
* boostable = 0: This instruction type is not boostable.
* boostable = 1: This instruction has been boosted: we have
* added a relative jump after the instruction copy in insn,
* so no single-step and fixup are needed (unless there's
* a post_handler).
*/
bool boostable;
bool if_modifier;
unsigned boostable:1;
unsigned if_modifier:1;
unsigned is_call:1;
unsigned is_pushf:1;
unsigned is_abs_ip:1;
/* Number of bytes of text poked */
int tp_len;
};

View File

@ -261,8 +261,12 @@ struct x86_pmu_capability {
#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND
#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \
#define INTEL_PMC_IDX_TD_HEAVY_OPS (INTEL_PMC_IDX_METRIC_BASE + 4)
#define INTEL_PMC_IDX_TD_BR_MISPREDICT (INTEL_PMC_IDX_METRIC_BASE + 5)
#define INTEL_PMC_IDX_TD_FETCH_LAT (INTEL_PMC_IDX_METRIC_BASE + 6)
#define INTEL_PMC_IDX_TD_MEM_BOUND (INTEL_PMC_IDX_METRIC_BASE + 7)
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_MEM_BOUND
#define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \
INTEL_PMC_MSK_FIXED_SLOTS)
/*
@ -280,8 +284,14 @@ struct x86_pmu_capability {
#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND
#define INTEL_TD_METRIC_NUM 4
/* Level 2 metrics */
#define INTEL_TD_METRIC_HEAVY_OPS 0x8400 /* Heavy Operations metric */
#define INTEL_TD_METRIC_BR_MISPREDICT 0x8500 /* Branch Mispredict metric */
#define INTEL_TD_METRIC_FETCH_LAT 0x8600 /* Fetch Latency metric */
#define INTEL_TD_METRIC_MEM_BOUND 0x8700 /* Memory bound metric */
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
#define INTEL_TD_METRIC_NUM 8
static inline bool is_metric_idx(int idx)
{
@ -483,11 +493,7 @@ static inline void perf_check_microcode(void) { }
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
#else
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
*nr = 0;
return NULL;
}
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
{
return -1;

View File

@ -132,26 +132,6 @@ void synthesize_relcall(void *dest, void *from, void *to)
}
NOKPROBE_SYMBOL(synthesize_relcall);
/*
* Skip the prefixes of the instruction.
*/
static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
{
insn_attr_t attr;
attr = inat_get_opcode_attribute((insn_byte_t)*insn);
while (inat_is_legacy_prefix(attr)) {
insn++;
attr = inat_get_opcode_attribute((insn_byte_t)*insn);
}
#ifdef CONFIG_X86_64
if (inat_is_rex_prefix(attr))
insn++;
#endif
return insn;
}
NOKPROBE_SYMBOL(skip_prefixes);
/*
* Returns non-zero if INSN is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
@ -311,25 +291,6 @@ static int can_probe(unsigned long paddr)
return (addr == paddr);
}
/*
* Returns non-zero if opcode modifies the interrupt flag.
*/
static int is_IF_modifier(kprobe_opcode_t *insn)
{
/* Skip prefixes */
insn = skip_prefixes(insn);
switch (*insn) {
case 0xfa: /* cli */
case 0xfb: /* sti */
case 0xcf: /* iret/iretd */
case 0x9d: /* popf/popfd */
return 1;
}
return 0;
}
/*
* Copy an instruction with recovering modified instruction by kprobes
* and adjust the displacement if the instruction uses the %rip-relative
@ -411,9 +372,9 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
synthesize_reljump(buf + len, p->ainsn.insn + len,
p->addr + insn->length);
len += JMP32_INSN_SIZE;
p->ainsn.boostable = true;
p->ainsn.boostable = 1;
} else {
p->ainsn.boostable = false;
p->ainsn.boostable = 0;
}
return len;
@ -450,6 +411,67 @@ void free_insn_page(void *page)
module_memfree(page);
}
static void set_resume_flags(struct kprobe *p, struct insn *insn)
{
insn_byte_t opcode = insn->opcode.bytes[0];
switch (opcode) {
case 0xfa: /* cli */
case 0xfb: /* sti */
case 0x9d: /* popf/popfd */
/* Check whether the instruction modifies Interrupt Flag or not */
p->ainsn.if_modifier = 1;
break;
case 0x9c: /* pushfl */
p->ainsn.is_pushf = 1;
break;
case 0xcf: /* iret */
p->ainsn.if_modifier = 1;
fallthrough;
case 0xc2: /* ret/lret */
case 0xc3:
case 0xca:
case 0xcb:
case 0xea: /* jmp absolute -- ip is correct */
/* ip is already adjusted, no more changes required */
p->ainsn.is_abs_ip = 1;
/* Without resume jump, this is boostable */
p->ainsn.boostable = 1;
break;
case 0xe8: /* call relative - Fix return addr */
p->ainsn.is_call = 1;
break;
#ifdef CONFIG_X86_32
case 0x9a: /* call absolute -- same as call absolute, indirect */
p->ainsn.is_call = 1;
p->ainsn.is_abs_ip = 1;
break;
#endif
case 0xff:
opcode = insn->opcode.bytes[1];
if ((opcode & 0x30) == 0x10) {
/*
* call absolute, indirect
* Fix return addr; ip is correct.
* But this is not boostable
*/
p->ainsn.is_call = 1;
p->ainsn.is_abs_ip = 1;
break;
} else if (((opcode & 0x31) == 0x20) ||
((opcode & 0x31) == 0x21)) {
/*
* jmp near and far, absolute indirect
* ip is correct.
*/
p->ainsn.is_abs_ip = 1;
/* Without resume jump, this is boostable */
p->ainsn.boostable = 1;
}
break;
}
}
static int arch_copy_kprobe(struct kprobe *p)
{
struct insn insn;
@ -467,8 +489,8 @@ static int arch_copy_kprobe(struct kprobe *p)
*/
len = prepare_boost(buf, p, &insn);
/* Check whether the instruction modifies Interrupt Flag or not */
p->ainsn.if_modifier = is_IF_modifier(buf);
/* Analyze the opcode and set resume flags */
set_resume_flags(p, &insn);
/* Also, displacement change doesn't affect the first byte */
p->opcode = buf[0];
@ -491,6 +513,9 @@ int arch_prepare_kprobe(struct kprobe *p)
if (!can_probe((unsigned long)p->addr))
return -EILSEQ;
memset(&p->ainsn, 0, sizeof(p->ainsn));
/* insn: must be on special executable page on x86. */
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
@ -806,11 +831,6 @@ NOKPROBE_SYMBOL(trampoline_handler);
* 2) If the single-stepped instruction was a call, the return address
* that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction.
*
* If this is the first time we've single-stepped the instruction at
* this probepoint, and the instruction is boostable, boost it: add a
* jump instruction after the copied instruction, that jumps to the next
* instruction after the probepoint.
*/
static void resume_execution(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
@ -818,60 +838,20 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
unsigned long *tos = stack_addr(regs);
unsigned long copy_ip = (unsigned long)p->ainsn.insn;
unsigned long orig_ip = (unsigned long)p->addr;
kprobe_opcode_t *insn = p->ainsn.insn;
/* Skip prefixes */
insn = skip_prefixes(insn);
regs->flags &= ~X86_EFLAGS_TF;
switch (*insn) {
case 0x9c: /* pushfl */
/* Fixup the contents of top of stack */
if (p->ainsn.is_pushf) {
*tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
*tos |= kcb->kprobe_old_flags;
break;
case 0xc2: /* iret/ret/lret */
case 0xc3:
case 0xca:
case 0xcb:
case 0xcf:
case 0xea: /* jmp absolute -- ip is correct */
/* ip is already adjusted, no more changes required */
p->ainsn.boostable = true;
goto no_change;
case 0xe8: /* call relative - Fix return addr */
} else if (p->ainsn.is_call) {
*tos = orig_ip + (*tos - copy_ip);
break;
#ifdef CONFIG_X86_32
case 0x9a: /* call absolute -- same as call absolute, indirect */
*tos = orig_ip + (*tos - copy_ip);
goto no_change;
#endif
case 0xff:
if ((insn[1] & 0x30) == 0x10) {
/*
* call absolute, indirect
* Fix return addr; ip is correct.
* But this is not boostable
*/
*tos = orig_ip + (*tos - copy_ip);
goto no_change;
} else if (((insn[1] & 0x31) == 0x20) ||
((insn[1] & 0x31) == 0x21)) {
/*
* jmp near and far, absolute indirect
* ip is correct. And this is boostable
*/
p->ainsn.boostable = true;
goto no_change;
}
break;
default:
break;
}
regs->ip += orig_ip - copy_ip;
if (!p->ainsn.is_abs_ip)
regs->ip += orig_ip - copy_ip;
no_change:
restore_btf();
}
NOKPROBE_SYMBOL(resume_execution);

View File

@ -712,7 +712,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
unsigned long mfn, pfn;
/* Do not add to override if the map failed. */
if (map_ops[i].status)
if (map_ops[i].status != GNTST_okay ||
(kmap_ops && kmap_ops[i].status != GNTST_okay))
continue;
if (map_ops[i].flags & GNTMAP_contains_pte) {
@ -750,17 +751,15 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
unsigned long pfn = page_to_pfn(pages[i]);
if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
if (mfn != INVALID_P2M_ENTRY && (mfn & FOREIGN_FRAME_BIT))
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
else
ret = -EINVAL;
goto out;
}
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
if (kunmap_ops)
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
kunmap_ops, count);
out:
kunmap_ops, count) ?: ret;
return ret;
}
EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);

View File

@ -794,8 +794,13 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
pages[i]->persistent_gnt = persistent_gnt;
} else {
if (gnttab_page_cache_get(&ring->free_pages,
&pages[i]->page))
goto out_of_memory;
&pages[i]->page)) {
gnttab_page_cache_put(&ring->free_pages,
pages_to_gnt,
segs_to_map);
ret = -ENOMEM;
goto out;
}
addr = vaddr(pages[i]->page);
pages_to_gnt[segs_to_map] = pages[i]->page;
pages[i]->persistent_gnt = NULL;
@ -811,10 +816,8 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
break;
}
if (segs_to_map) {
if (segs_to_map)
ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
BUG_ON(ret);
}
/*
* Now swizzle the MFN in our domain with the MFN from the other domain
@ -830,7 +833,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
gnttab_page_cache_put(&ring->free_pages,
&pages[seg_idx]->page, 1);
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
ret |= 1;
ret |= !ret;
goto next;
}
pages[seg_idx]->handle = map[new_map_idx].handle;
@ -882,17 +885,18 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
}
segs_to_map = 0;
last_map = map_until;
if (map_until != num)
if (!ret && map_until != num)
goto again;
return ret;
out_of_memory:
pr_alert("%s: out of memory\n", __func__);
gnttab_page_cache_put(&ring->free_pages, pages_to_gnt, segs_to_map);
for (i = last_map; i < num; i++)
out:
for (i = last_map; i < num; i++) {
/* Don't zap current batch's valid persistent grants. */
if(i >= last_map + segs_to_map)
pages[i]->persistent_gnt = NULL;
pages[i]->handle = BLKBACK_INVALID_HANDLE;
return -ENOMEM;
}
return ret;
}
static int xen_blkbk_map_seg(struct pending_req *pending_req)

View File

@ -1261,8 +1261,6 @@ void add_interrupt_randomness(int irq, int irq_flags)
cycles_t cycles = random_get_entropy();
__u32 c_high, j_high;
__u64 ip;
unsigned long seed;
int credit = 0;
if (cycles == 0)
cycles = get_reg(fast_pool, regs);
@ -1298,23 +1296,12 @@ void add_interrupt_randomness(int irq, int irq_flags)
fast_pool->last = now;
__mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool));
/*
* If we have architectural seed generator, produce a seed and
* add it to the pool. For the sake of paranoia don't let the
* architectural seed generator dominate the input from the
* interrupt noise.
*/
if (arch_get_random_seed_long(&seed)) {
__mix_pool_bytes(r, &seed, sizeof(seed));
credit = 1;
}
spin_unlock(&r->lock);
fast_pool->count = 0;
/* award one bit for the contents of the fast pool */
credit_entropy_bits(r, credit + 1);
credit_entropy_bits(r, 1);
}
EXPORT_SYMBOL_GPL(add_interrupt_randomness);

View File

@ -5,16 +5,22 @@
#define pr_fmt(fmt) "smccc: " fmt
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/arm-smccc.h>
#include <asm/archrandom.h>
static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
bool __ro_after_init smccc_trng_available = false;
void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit)
{
smccc_version = version;
smccc_conduit = conduit;
smccc_trng_available = smccc_probe_trng();
}
enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)

View File

@ -1343,13 +1343,11 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
return 0;
gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
if (nr_mops != 0) {
if (nr_mops != 0)
ret = gnttab_map_refs(queue->tx_map_ops,
NULL,
queue->pages_to_map,
nr_mops);
BUG_ON(ret);
}
work_done = xenvif_tx_submit(queue);

View File

@ -1026,12 +1026,11 @@ static void pmu_event_set_period(struct perf_event *event)
static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
{
unsigned long flags;
struct cci_pmu *cci_pmu = dev;
struct cci_pmu_hw_events *events = &cci_pmu->hw_events;
int idx, handled = IRQ_NONE;
raw_spin_lock_irqsave(&events->pmu_lock, flags);
raw_spin_lock(&events->pmu_lock);
/* Disable the PMU while we walk through the counters */
__cci_pmu_disable(cci_pmu);
@ -1061,7 +1060,7 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
/* Enable the PMU and sync possibly overflowed counters */
__cci_pmu_enable_sync(cci_pmu);
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
raw_spin_unlock(&events->pmu_lock);
return IRQ_RETVAL(handled);
}
@ -1376,7 +1375,7 @@ static struct attribute *pmu_attrs[] = {
NULL,
};
static struct attribute_group pmu_attr_group = {
static const struct attribute_group pmu_attr_group = {
.attrs = pmu_attrs,
};

View File

@ -616,7 +616,7 @@ static struct attribute *arm_cmn_cpumask_attrs[] = {
NULL,
};
static struct attribute_group arm_cmn_cpumask_attr_group = {
static const struct attribute_group arm_cmn_cpumask_attr_group = {
.attrs = arm_cmn_cpumask_attrs,
};
@ -1150,7 +1150,7 @@ static int arm_cmn_commit_txn(struct pmu *pmu)
static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct arm_cmn *cmn;
unsigned int target;
unsigned int i, target;
cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node);
if (cpu != cmn->cpu)
@ -1161,6 +1161,8 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
perf_pmu_migrate_context(&cmn->pmu, cpu, target);
for (i = 0; i < cmn->num_dtcs; i++)
irq_set_affinity_hint(cmn->dtc[i].irq, cpumask_of(target));
cmn->cpu = target;
return 0;
}
@ -1502,7 +1504,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
struct arm_cmn *cmn;
const char *name;
static atomic_t id;
int err, rootnode, this_id;
int err, rootnode;
cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL);
if (!cmn)
@ -1549,14 +1551,9 @@ static int arm_cmn_probe(struct platform_device *pdev)
.cancel_txn = arm_cmn_end_txn,
};
this_id = atomic_fetch_inc(&id);
if (this_id == 0) {
name = "arm_cmn";
} else {
name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id);
if (!name)
return -ENOMEM;
}
name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", atomic_fetch_inc(&id));
if (!name)
return -ENOMEM;
err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
if (err)

View File

@ -159,7 +159,7 @@ static struct attribute *dmc620_pmu_events_attrs[] = {
NULL,
};
static struct attribute_group dmc620_pmu_events_attr_group = {
static const struct attribute_group dmc620_pmu_events_attr_group = {
.name = "events",
.attrs = dmc620_pmu_events_attrs,
};
@ -222,7 +222,7 @@ static struct attribute *dmc620_pmu_formats_attrs[] = {
NULL,
};
static struct attribute_group dmc620_pmu_format_attr_group = {
static const struct attribute_group dmc620_pmu_format_attr_group = {
.name = "format",
.attrs = dmc620_pmu_formats_attrs,
};
@ -717,6 +717,7 @@ static struct platform_driver dmc620_pmu_driver = {
.driver = {
.name = DMC620_DRVNAME,
.acpi_match_table = dmc620_acpi_match,
.suppress_bind_attrs = true,
},
.probe = dmc620_pmu_device_probe,
.remove = dmc620_pmu_device_remove,

View File

@ -577,7 +577,7 @@ static struct attribute *armpmu_common_attrs[] = {
NULL,
};
static struct attribute_group armpmu_common_attr_group = {
static const struct attribute_group armpmu_common_attr_group = {
.attrs = armpmu_common_attrs,
};

View File

@ -493,7 +493,7 @@ static struct attribute *smmu_pmu_cpumask_attrs[] = {
NULL
};
static struct attribute_group smmu_pmu_cpumask_group = {
static const struct attribute_group smmu_pmu_cpumask_group = {
.attrs = smmu_pmu_cpumask_attrs,
};
@ -548,7 +548,7 @@ static umode_t smmu_pmu_event_is_visible(struct kobject *kobj,
return 0;
}
static struct attribute_group smmu_pmu_events_group = {
static const struct attribute_group smmu_pmu_events_group = {
.name = "events",
.attrs = smmu_pmu_events,
.is_visible = smmu_pmu_event_is_visible,
@ -583,7 +583,7 @@ static struct attribute *smmu_pmu_identifier_attrs[] = {
NULL
};
static struct attribute_group smmu_pmu_identifier_group = {
static const struct attribute_group smmu_pmu_identifier_group = {
.attrs = smmu_pmu_identifier_attrs,
.is_visible = smmu_pmu_identifier_attr_visible,
};
@ -602,7 +602,7 @@ static struct attribute *smmu_pmu_formats[] = {
NULL
};
static struct attribute_group smmu_pmu_format_group = {
static const struct attribute_group smmu_pmu_format_group = {
.name = "format",
.attrs = smmu_pmu_formats,
};

View File

@ -54,7 +54,7 @@ struct arm_spe_pmu {
struct hlist_node hotplug_node;
int irq; /* PPI */
u16 pmsver;
u16 min_period;
u16 counter_sz;
@ -146,7 +146,7 @@ static struct attribute *arm_spe_pmu_cap_attr[] = {
NULL,
};
static struct attribute_group arm_spe_pmu_cap_group = {
static const struct attribute_group arm_spe_pmu_cap_group = {
.name = "caps",
.attrs = arm_spe_pmu_cap_attr,
};
@ -227,7 +227,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
NULL,
};
static struct attribute_group arm_spe_pmu_format_group = {
static const struct attribute_group arm_spe_pmu_format_group = {
.name = "format",
.attrs = arm_spe_pmu_formats_attr,
};
@ -247,7 +247,7 @@ static struct attribute *arm_spe_pmu_attrs[] = {
NULL,
};
static struct attribute_group arm_spe_pmu_group = {
static const struct attribute_group arm_spe_pmu_group = {
.attrs = arm_spe_pmu_attrs,
};
@ -655,6 +655,18 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
return IRQ_HANDLED;
}
static u64 arm_spe_pmsevfr_res0(u16 pmsver)
{
switch (pmsver) {
case ID_AA64DFR0_PMSVER_8_2:
return SYS_PMSEVFR_EL1_RES0_8_2;
case ID_AA64DFR0_PMSVER_8_3:
/* Return the highest version we support in default */
default:
return SYS_PMSEVFR_EL1_RES0_8_3;
}
}
/* Perf callbacks */
static int arm_spe_pmu_event_init(struct perf_event *event)
{
@ -670,7 +682,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
return -ENOENT;
if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
return -EOPNOTSUPP;
if (attr->exclude_idle)
@ -937,6 +949,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
fld, smp_processor_id());
return;
}
spe_pmu->pmsver = (u16)fld;
/* Read PMBIDR first to determine whether or not we have access */
reg = read_sysreg_s(SYS_PMBIDR_EL1);

View File

@ -133,7 +133,7 @@ static struct attribute *ddr_perf_identifier_attrs[] = {
NULL,
};
static struct attribute_group ddr_perf_identifier_attr_group = {
static const struct attribute_group ddr_perf_identifier_attr_group = {
.attrs = ddr_perf_identifier_attrs,
.is_visible = ddr_perf_identifier_attr_visible,
};
@ -188,7 +188,7 @@ static struct attribute *ddr_perf_filter_cap_attr[] = {
NULL,
};
static struct attribute_group ddr_perf_filter_cap_attr_group = {
static const struct attribute_group ddr_perf_filter_cap_attr_group = {
.name = "caps",
.attrs = ddr_perf_filter_cap_attr,
};
@ -209,7 +209,7 @@ static struct attribute *ddr_perf_cpumask_attrs[] = {
NULL,
};
static struct attribute_group ddr_perf_cpumask_attr_group = {
static const struct attribute_group ddr_perf_cpumask_attr_group = {
.attrs = ddr_perf_cpumask_attrs,
};
@ -265,7 +265,7 @@ static struct attribute *ddr_perf_events_attrs[] = {
NULL,
};
static struct attribute_group ddr_perf_events_attr_group = {
static const struct attribute_group ddr_perf_events_attr_group = {
.name = "events",
.attrs = ddr_perf_events_attrs,
};
@ -281,7 +281,7 @@ static struct attribute *ddr_perf_format_attrs[] = {
NULL,
};
static struct attribute_group ddr_perf_format_attr_group = {
static const struct attribute_group ddr_perf_format_attr_group = {
.name = "format",
.attrs = ddr_perf_format_attrs,
};

View File

@ -319,7 +319,7 @@ static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = {
NULL
};
static struct attribute_group hisi_ddrc_pmu_identifier_group = {
static const struct attribute_group hisi_ddrc_pmu_identifier_group = {
.attrs = hisi_ddrc_pmu_identifier_attrs,
};

View File

@ -331,7 +331,7 @@ static struct attribute *hisi_hha_pmu_identifier_attrs[] = {
NULL
};
static struct attribute_group hisi_hha_pmu_identifier_group = {
static const struct attribute_group hisi_hha_pmu_identifier_group = {
.attrs = hisi_hha_pmu_identifier_attrs,
};

View File

@ -321,7 +321,7 @@ static struct attribute *hisi_l3c_pmu_identifier_attrs[] = {
NULL
};
static struct attribute_group hisi_l3c_pmu_identifier_group = {
static const struct attribute_group hisi_l3c_pmu_identifier_group = {
.attrs = hisi_l3c_pmu_identifier_attrs,
};

View File

@ -649,7 +649,7 @@ static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
NULL,
};
static struct attribute_group l2_cache_pmu_cpumask_group = {
static const struct attribute_group l2_cache_pmu_cpumask_group = {
.attrs = l2_cache_pmu_cpumask_attrs,
};
@ -665,7 +665,7 @@ static struct attribute *l2_cache_pmu_formats[] = {
NULL,
};
static struct attribute_group l2_cache_pmu_format_group = {
static const struct attribute_group l2_cache_pmu_format_group = {
.name = "format",
.attrs = l2_cache_pmu_formats,
};
@ -700,7 +700,7 @@ static struct attribute *l2_cache_pmu_events[] = {
NULL
};
static struct attribute_group l2_cache_pmu_events_group = {
static const struct attribute_group l2_cache_pmu_events_group = {
.name = "events",
.attrs = l2_cache_pmu_events,
};

View File

@ -630,7 +630,7 @@ static struct attribute *qcom_l3_cache_pmu_formats[] = {
NULL,
};
static struct attribute_group qcom_l3_cache_pmu_format_group = {
static const struct attribute_group qcom_l3_cache_pmu_format_group = {
.name = "format",
.attrs = qcom_l3_cache_pmu_formats,
};
@ -663,7 +663,7 @@ static struct attribute *qcom_l3_cache_pmu_events[] = {
NULL
};
static struct attribute_group qcom_l3_cache_pmu_events_group = {
static const struct attribute_group qcom_l3_cache_pmu_events_group = {
.name = "events",
.attrs = qcom_l3_cache_pmu_events,
};
@ -685,7 +685,7 @@ static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = {
NULL,
};
static struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
static const struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
.attrs = qcom_l3_cache_pmu_cpumask_attrs,
};

View File

@ -1234,10 +1234,9 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
u32 intr_mcu, intr_mcb, intr_l3c, intr_iob;
struct xgene_pmu_dev_ctx *ctx;
struct xgene_pmu *xgene_pmu = dev_id;
unsigned long flags;
u32 val;
raw_spin_lock_irqsave(&xgene_pmu->lock, flags);
raw_spin_lock(&xgene_pmu->lock);
/* Get Interrupt PMU source */
val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
@ -1273,7 +1272,7 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
}
}
raw_spin_unlock_irqrestore(&xgene_pmu->lock, flags);
raw_spin_unlock(&xgene_pmu->lock);
return IRQ_HANDLED;
}

View File

@ -309,44 +309,47 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
* to the kernel linear addresses of the struct pages.
* These ptes are completely different from the user ptes dealt
* with find_grant_ptes.
* Note that GNTMAP_device_map isn't needed here: The
* dev_bus_addr output field gets consumed only from ->map_ops,
* and by not requesting it when mapping we also avoid needing
* to mirror dev_bus_addr into ->unmap_ops (and holding an extra
* reference to the page in the hypervisor).
*/
unsigned int flags = (map->flags & ~GNTMAP_device_map) |
GNTMAP_host_map;
for (i = 0; i < map->count; i++) {
unsigned long address = (unsigned long)
pfn_to_kaddr(page_to_pfn(map->pages[i]));
BUG_ON(PageHighMem(map->pages[i]));
gnttab_set_map_op(&map->kmap_ops[i], address,
map->flags | GNTMAP_host_map,
gnttab_set_map_op(&map->kmap_ops[i], address, flags,
map->grants[i].ref,
map->grants[i].domid);
gnttab_set_unmap_op(&map->kunmap_ops[i], address,
map->flags | GNTMAP_host_map, -1);
flags, -1);
}
}
pr_debug("map %d+%d\n", map->index, map->count);
err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
map->pages, map->count);
if (err)
return err;
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status) {
if (map->map_ops[i].status == GNTST_okay)
map->unmap_ops[i].handle = map->map_ops[i].handle;
else if (!err)
err = -EINVAL;
continue;
}
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (use_ptemod)
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
else if (map->dma_vaddr) {
unsigned long bfn;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
bfn = pfn_to_bfn(page_to_pfn(map->pages[i]));
map->unmap_ops[i].dev_bus_addr = __pfn_to_phys(bfn);
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay)
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
else if (!err)
err = -EINVAL;
}
#endif
}
return err;
}

Some files were not shown because too many files have changed in this diff Show More