Merge ac2ab99072 ("Merge tag 'random-5.19-rc1-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/crng/random") into android-mainline

Steps on the way to 5.19-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ibd73f1e28739664ac390c1bba3d6e9b873a4a484
This commit is contained in:
Greg Kroah-Hartman 2022-06-05 13:48:17 +02:00
commit 6d6e7dba9f
62 changed files with 1143 additions and 1748 deletions

View File

@ -398,6 +398,7 @@ Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com>
Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>

View File

@ -37,11 +37,7 @@ Pressure interface
Pressure information for each resource is exported through the
respective file in /proc/pressure/ -- cpu, memory, and io.
The format for CPU is as such::
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
and for memory and IO::
The format is as such::
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is
still doing productive work. As such, time spent in this subset of the
stall state is tracked separately and exported in the "full" averages.
CPU full is undefined at the system level, but has been reported
since 5.13, so it is set to zero for backward compatibility.
The ratios (in %) are tracked as recent trends over ten, sixty, and
three hundred second windows, which gives insight into short term events
as well as medium and long term trends. The total absolute stall time

View File

@ -994,6 +994,9 @@ This is a directory, with the following entries:
* ``boot_id``: a UUID generated the first time this is retrieved, and
unvarying after that;
* ``uuid``: a UUID generated every time this is retrieved (this can
thus be used to generate UUIDs at will);
* ``entropy_avail``: the pool's entropy count, in bits;
* ``poolsize``: the entropy pool size, in bits;
@ -1001,10 +1004,7 @@ This is a directory, with the following entries:
* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
number of seconds between urandom pool reseeding). This file is
writable for compatibility purposes, but writing to it has no effect
on any RNG behavior.
* ``uuid``: a UUID generated every time this is retrieved (this can
thus be used to generate UUIDs at will);
on any RNG behavior;
* ``write_wakeup_threshold``: when the entropy count drops below this
(as a number of bits), processes waiting to write to ``/dev/random``

View File

@ -17531,6 +17531,7 @@ R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
R: Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
R: Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
R: Daniel Bristot de Oliveira <bristot@redhat.com> (SCHED_DEADLINE)
R: Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core

View File

@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void)
__asm__ __volatile__ ("rpcc %0" : "=r"(ret));
return ret;
}
#define get_cycles get_cycles
#endif

View File

@ -11,5 +11,6 @@
typedef unsigned long cycles_t;
#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
#endif

View File

@ -39,6 +39,7 @@ get_cycles (void)
ret = ia64_getreg(_IA64_REG_AR_ITC);
return ret;
}
#define get_cycles get_cycles
extern void ia64_cpu_local_tick (void);
extern unsigned long long ia64_native_sched_clock (void);

View File

@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void)
{
if (mach_random_get_entropy)
return mach_random_get_entropy();
return 0;
return random_get_entropy_fallback();
}
#define random_get_entropy random_get_entropy

View File

@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void)
else
return 0; /* no usable counter */
}
#define get_cycles get_cycles
/*
* Like get_cycles - but where c0_count is not available we desperately
* use c0_random in an attempt to get at least a little bit of entropy.
*
* R6000 and R6000A neither have a count register nor a random register.
* That leaves no entropy source in the CPU itself.
*/
static inline unsigned long random_get_entropy(void)
{
unsigned int prid = read_c0_prid();
unsigned int imp = prid & PRID_IMP_MASK;
unsigned int c0_random;
if (can_use_mips_counter(prid))
if (can_use_mips_counter(read_c0_prid()))
return read_c0_count();
else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A))
return read_c0_random();
if (cpu_has_3kex)
c0_random = (read_c0_random() >> 8) & 0x3f;
else
return 0; /* no usable register */
c0_random = read_c0_random() & 0x3f;
return (random_get_entropy_fallback() << 6) | (0x3f - c0_random);
}
#define random_get_entropy random_get_entropy

View File

@ -8,5 +8,8 @@
typedef unsigned long cycles_t;
extern cycles_t get_cycles(void);
#define get_cycles get_cycles
#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
#endif

View File

@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void)
{
return mfspr(SPR_TTCR);
}
#define get_cycles get_cycles
/* This isn't really used any more */
#define CLOCK_TICK_RATE 1000

View File

@ -521,6 +521,15 @@ _start:
l.ori r3,r0,0x1
l.mtspr r0,r3,SPR_SR
/*
* Start the TTCR as early as possible, so that the RNG can make use of
* measurements of boot time from the earliest opportunity. Especially
* important is that the TTCR does not return zero by the time we reach
* random_init().
*/
l.movhi r3,hi(SPR_TTMR_CR)
l.mtspr r0,r3,SPR_TTMR
CLEAR_GPR(r1)
CLEAR_GPR(r2)
CLEAR_GPR(r3)

View File

@ -13,9 +13,10 @@
typedef unsigned long cycles_t;
static inline cycles_t get_cycles (void)
static inline cycles_t get_cycles(void)
{
return mfctl(16);
}
#define get_cycles get_cycles
#endif

View File

@ -19,6 +19,7 @@ static inline cycles_t get_cycles(void)
{
return mftb();
}
#define get_cycles get_cycles
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_TIMEX_H */

View File

@ -41,7 +41,7 @@ static inline u32 get_cycles_hi(void)
static inline unsigned long random_get_entropy(void)
{
if (unlikely(clint_time_val == NULL))
return 0;
return random_get_entropy_fallback();
return get_cycles();
}
#define random_get_entropy() random_get_entropy()

View File

@ -197,6 +197,7 @@ static inline cycles_t get_cycles(void)
{
return (cycles_t) get_tod_clock() >> 2;
}
#define get_cycles get_cycles
int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);

View File

@ -9,8 +9,6 @@
#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
/* XXX Maybe do something better at some point... -DaveM */
typedef unsigned long cycles_t;
#define get_cycles() (0)
#include <asm-generic/timex.h>
#endif

View File

@ -2,13 +2,8 @@
#ifndef __UM_TIMEX_H
#define __UM_TIMEX_H
typedef unsigned long cycles_t;
static inline cycles_t get_cycles (void)
{
return 0;
}
#define CLOCK_TICK_RATE (HZ)
#include <asm-generic/timex.h>
#endif

View File

@ -5,6 +5,15 @@
#include <asm/processor.h>
#include <asm/tsc.h>
static inline unsigned long random_get_entropy(void)
{
if (!IS_ENABLED(CONFIG_X86_TSC) &&
!cpu_feature_enabled(X86_FEATURE_TSC))
return random_get_entropy_fallback();
return rdtsc();
}
#define random_get_entropy random_get_entropy
/* Assume we use the PIT time source for the clock tick */
#define CLOCK_TICK_RATE PIT_TICK_RATE

View File

@ -20,13 +20,12 @@ extern void disable_TSC(void);
static inline cycles_t get_cycles(void)
{
#ifndef CONFIG_X86_TSC
if (!boot_cpu_has(X86_FEATURE_TSC))
if (!IS_ENABLED(CONFIG_X86_TSC) &&
!cpu_feature_enabled(X86_FEATURE_TSC))
return 0;
#endif
return rdtsc();
}
#define get_cycles get_cycles
extern struct system_counterval_t convert_art_to_tsc(u64 art);
extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);

View File

@ -29,10 +29,6 @@
extern unsigned long ccount_freq;
typedef unsigned long long cycles_t;
#define get_cycles() (0)
void local_timer_setup(unsigned cpu);
/*
@ -59,4 +55,6 @@ static inline void set_linux_timer (unsigned long ccompare)
xtensa_set_sr(ccompare, SREG_CCOMPARE + LINUX_TIMER);
}
#include <asm-generic/timex.h>
#endif /* _XTENSA_TIMEX_H */

File diff suppressed because it is too large Load Diff

View File

@ -126,13 +126,13 @@
*/
#define SCHED_DATA \
STRUCT_ALIGN(); \
__begin_sched_classes = .; \
*(__idle_sched_class) \
*(__fair_sched_class) \
*(__rt_sched_class) \
*(__dl_sched_class) \
__sched_class_highest = .; \
*(__stop_sched_class) \
__end_sched_classes = .;
*(__dl_sched_class) \
*(__rt_sched_class) \
*(__fair_sched_class) \
*(__idle_sched_class) \
__sched_class_lowest = .;
/* The actual configuration determine if the init/exit sections
* are handled as text/data or they can be discarded (which

View File

@ -589,6 +589,15 @@ struct softirq_action
asmlinkage void do_softirq(void);
asmlinkage void __do_softirq(void);
#ifdef CONFIG_PREEMPT_RT
extern void do_softirq_post_smp_call_flush(unsigned int was_pending);
#else
static inline void do_softirq_post_smp_call_flush(unsigned int unused)
{
do_softirq();
}
#endif
extern void open_softirq(int nr, void (*action)(struct softirq_action *));
extern void softirq_init(void);
extern void __raise_softirq_irqoff(unsigned int nr);

View File

@ -2677,6 +2677,7 @@ extern int install_special_mapping(struct mm_struct *mm,
unsigned long flags, struct page **pages);
unsigned long randomize_stack_top(unsigned long stack_top);
unsigned long randomize_page(unsigned long start, unsigned long range);
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);

View File

@ -10,62 +10,16 @@
#include <linux/types.h>
#include <linux/percpu.h>
#include <linux/random.h>
u32 prandom_u32(void);
void prandom_bytes(void *buf, size_t nbytes);
void prandom_seed(u32 seed);
void prandom_reseed_late(void);
DECLARE_PER_CPU(unsigned long, net_rand_noise);
#define PRANDOM_ADD_NOISE(a, b, c, d) \
prandom_u32_add_noise((unsigned long)(a), (unsigned long)(b), \
(unsigned long)(c), (unsigned long)(d))
#if BITS_PER_LONG == 64
/*
* The core SipHash round function. Each line can be executed in
* parallel given enough CPU resources.
*/
#define PRND_SIPROUND(v0, v1, v2, v3) ( \
v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \
v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \
v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \
v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \
)
#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261)
#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573)
#elif BITS_PER_LONG == 32
/*
* On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
* This is weaker, but 32-bit machines are not used for high-traffic
* applications, so there is less output for an attacker to analyze.
*/
#define PRND_SIPROUND(v0, v1, v2, v3) ( \
v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \
v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \
v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \
v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \
)
#define PRND_K0 0x6c796765
#define PRND_K1 0x74656462
#else
#error Unsupported BITS_PER_LONG
#endif
static inline void prandom_u32_add_noise(unsigned long a, unsigned long b,
unsigned long c, unsigned long d)
static inline u32 prandom_u32(void)
{
/*
* This is not used cryptographically; it's just
* a convenient 4-word hash function. (3 xor, 2 add, 2 rol)
*/
a ^= raw_cpu_read(net_rand_noise);
PRND_SIPROUND(a, b, c, d);
raw_cpu_write(net_rand_noise, d);
return get_random_u32();
}
static inline void prandom_bytes(void *buf, size_t nbytes)
{
return get_random_bytes(buf, nbytes);
}
struct rnd_state {
@ -117,7 +71,6 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
state->s2 = __seed(i, 8U);
state->s3 = __seed(i, 16U);
state->s4 = __seed(i, 128U);
PRANDOM_ADD_NOISE(state, i, 0, 0);
}
/* Pseudo random number generator from numerical recipes. */

View File

@ -12,45 +12,32 @@
struct notifier_block;
extern void add_device_randomness(const void *, size_t);
extern void add_bootloader_randomness(const void *, size_t);
void add_device_randomness(const void *buf, size_t len);
void add_bootloader_randomness(const void *buf, size_t len);
void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value) __latent_entropy;
void add_interrupt_randomness(int irq) __latent_entropy;
void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy);
#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
static inline void add_latent_entropy(void)
{
add_device_randomness((const void *)&latent_entropy,
sizeof(latent_entropy));
add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
}
#else
static inline void add_latent_entropy(void) {}
static inline void add_latent_entropy(void) { }
#endif
extern void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value) __latent_entropy;
extern void add_interrupt_randomness(int irq) __latent_entropy;
extern void add_hwgenerator_randomness(const void *buffer, size_t count,
size_t entropy);
#if IS_ENABLED(CONFIG_VMGENID)
extern void add_vmfork_randomness(const void *unique_vm_id, size_t size);
extern int register_random_vmfork_notifier(struct notifier_block *nb);
extern int unregister_random_vmfork_notifier(struct notifier_block *nb);
void add_vmfork_randomness(const void *unique_vm_id, size_t len);
int register_random_vmfork_notifier(struct notifier_block *nb);
int unregister_random_vmfork_notifier(struct notifier_block *nb);
#else
static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
#endif
extern void get_random_bytes(void *buf, size_t nbytes);
extern int wait_for_random_bytes(void);
extern int __init rand_initialize(void);
extern bool rng_is_initialized(void);
extern int register_random_ready_notifier(struct notifier_block *nb);
extern int unregister_random_ready_notifier(struct notifier_block *nb);
extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes);
#ifndef MODULE
extern const struct file_operations random_fops, urandom_fops;
#endif
void get_random_bytes(void *buf, size_t len);
u32 get_random_u32(void);
u64 get_random_u64(void);
static inline unsigned int get_random_int(void)
@ -82,11 +69,14 @@ static inline unsigned long get_random_long(void)
static inline unsigned long get_random_canary(void)
{
unsigned long val = get_random_long();
return val & CANARY_MASK;
return get_random_long() & CANARY_MASK;
}
int __init random_init(const char *command_line);
bool rng_is_initialized(void);
bool rng_has_arch_random(void);
int wait_for_random_bytes(void);
/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
* Returns the result of the call to wait_for_random_bytes. */
static inline int get_random_bytes_wait(void *buf, size_t nbytes)
@ -96,22 +86,20 @@ static inline int get_random_bytes_wait(void *buf, size_t nbytes)
return ret;
}
#define declare_get_random_var_wait(var) \
static inline int get_random_ ## var ## _wait(var *out) { \
#define declare_get_random_var_wait(name, ret_type) \
static inline int get_random_ ## name ## _wait(ret_type *out) { \
int ret = wait_for_random_bytes(); \
if (unlikely(ret)) \
return ret; \
*out = get_random_ ## var(); \
*out = get_random_ ## name(); \
return 0; \
}
declare_get_random_var_wait(u32)
declare_get_random_var_wait(u64)
declare_get_random_var_wait(int)
declare_get_random_var_wait(long)
declare_get_random_var_wait(u32, u32)
declare_get_random_var_wait(u64, u32)
declare_get_random_var_wait(int, unsigned int)
declare_get_random_var_wait(long, unsigned long)
#undef declare_get_random_var
unsigned long randomize_page(unsigned long start, unsigned long range);
/*
* This is designed to be standalone for just prandom
* users, but for now we include it from <linux/random.h>
@ -122,22 +110,10 @@ unsigned long randomize_page(unsigned long start, unsigned long range);
#ifdef CONFIG_ARCH_RANDOM
# include <asm/archrandom.h>
#else
static inline bool __must_check arch_get_random_long(unsigned long *v)
{
return false;
}
static inline bool __must_check arch_get_random_int(unsigned int *v)
{
return false;
}
static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
return false;
}
static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
return false;
}
static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; }
static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; }
static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; }
static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; }
#endif
/*
@ -161,8 +137,12 @@ static inline bool __init arch_get_random_long_early(unsigned long *v)
#endif
#ifdef CONFIG_SMP
extern int random_prepare_cpu(unsigned int cpu);
extern int random_online_cpu(unsigned int cpu);
int random_prepare_cpu(unsigned int cpu);
int random_online_cpu(unsigned int cpu);
#endif
#ifndef MODULE
extern const struct file_operations random_fops, urandom_fops;
#endif
#endif /* _LINUX_RANDOM_H */

View File

@ -2390,20 +2390,6 @@ static inline void rseq_syscall(struct pt_regs *regs)
#endif
const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq);
const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
int sched_trace_rq_cpu(struct rq *rq);
int sched_trace_rq_cpu_capacity(struct rq *rq);
int sched_trace_rq_nr_running(struct rq *rq);
const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
#ifdef CONFIG_SCHED_CORE
extern void sched_core_free(struct task_struct *tsk);
extern void sched_core_fork(struct task_struct *p);
@ -2414,4 +2400,6 @@ static inline void sched_core_free(struct task_struct *tsk) { }
static inline void sched_core_fork(struct task_struct *p) { }
#endif
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
#endif

View File

@ -121,10 +121,12 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
LOCKDOWN_BPF_READ_KERNEL,
LOCKDOWN_DBG_READ_KERNEL,
LOCKDOWN_PERF,
LOCKDOWN_TRACEFS,
LOCKDOWN_XMON_RW,

View File

@ -138,4 +138,32 @@ static inline u32 hsiphash(const void *data, size_t len,
return ___hsiphash_aligned(data, len, key);
}
/*
* These macros expose the raw SipHash and HalfSipHash permutations.
* Do not use them directly! If you think you have a use for them,
* be sure to CC the maintainer of this file explaining why.
*/
#define SIPHASH_PERMUTATION(a, b, c, d) ( \
(a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \
(c) += (d), (d) = rol64((d), 16), (d) ^= (c), \
(a) += (d), (d) = rol64((d), 21), (d) ^= (a), \
(c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32))
#define SIPHASH_CONST_0 0x736f6d6570736575ULL
#define SIPHASH_CONST_1 0x646f72616e646f6dULL
#define SIPHASH_CONST_2 0x6c7967656e657261ULL
#define SIPHASH_CONST_3 0x7465646279746573ULL
#define HSIPHASH_PERMUTATION(a, b, c, d) ( \
(a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \
(c) += (d), (d) = rol32((d), 8), (d) ^= (c), \
(a) += (d), (d) = rol32((d), 7), (d) ^= (a), \
(c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16))
#define HSIPHASH_CONST_0 0U
#define HSIPHASH_CONST_1 0U
#define HSIPHASH_CONST_2 0x6c796765U
#define HSIPHASH_CONST_3 0x74656462U
#endif /* _LINUX_SIPHASH_H */

View File

@ -62,6 +62,8 @@
#include <linux/types.h>
#include <linux/param.h>
unsigned long random_get_entropy_fallback(void);
#include <asm/timex.h>
#ifndef random_get_entropy
@ -74,8 +76,14 @@
*
* By default we use get_cycles() for this purpose, but individual
* architectures may override this in their asm/timex.h header file.
* If a given arch does not have get_cycles(), then we fallback to
* using random_get_entropy_fallback().
*/
#ifdef get_cycles
#define random_get_entropy() ((unsigned long)get_cycles())
#else
#define random_get_entropy() random_get_entropy_fallback()
#endif
#endif
/*

View File

@ -240,13 +240,6 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
}
#endif
#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask)
static inline const struct cpumask *cpu_cluster_mask(int cpu)
{
return topology_cluster_cpumask(cpu);
}
#endif
static inline const struct cpumask *cpu_cpu_mask(int cpu)
{
return cpumask_of_node(cpu_to_node(cpu));

View File

@ -1033,21 +1033,18 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
softirq_init();
timekeeping_init();
kfence_init();
time_init();
/*
* For best initial stack canary entropy, prepare it after:
* - setup_arch() for any UEFI RNG entropy and boot cmdline access
* - timekeeping_init() for ktime entropy used in rand_initialize()
* - rand_initialize() to get any arch-specific entropy like RDRAND
* - add_latent_entropy() to get any latent entropy
* - adding command line entropy
* - timekeeping_init() for ktime entropy used in random_init()
* - time_init() for making random_get_entropy() work on some platforms
* - random_init() to initialize the RNG from from early entropy sources
*/
rand_initialize();
add_latent_entropy();
add_device_randomness(command_line, strlen(command_line));
random_init(command_line);
boot_init_stack_canary();
time_init();
perf_event_init();
profile_init();
call_function_init();

View File

@ -53,6 +53,7 @@
#include <linux/vmacache.h>
#include <linux/rcupdate.h>
#include <linux/irq.h>
#include <linux/security.h>
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
@ -752,6 +753,29 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
continue;
kgdb_connected = 0;
} else {
/*
* This is a brutal way to interfere with the debugger
* and prevent gdb being used to poke at kernel memory.
* This could cause trouble if lockdown is applied when
* there is already an active gdb session. For now the
* answer is simply "don't do that". Typically lockdown
* *will* be applied before the debug core gets started
* so only developers using kgdb for fairly advanced
* early kernel debug can be biten by this. Hopefully
* they are sophisticated enough to take care of
* themselves, especially with help from the lockdown
* message printed on the console!
*/
if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
if (IS_ENABLED(CONFIG_KGDB_KDB)) {
/* Switch back to kdb if possible... */
dbg_kdb_mode = 1;
continue;
} else {
/* ... otherwise just bail */
break;
}
}
error = gdb_serial_stub(ks);
}

View File

@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/security.h>
#include "kdb_private.h"
#undef MODULE_PARAM_PREFIX
@ -166,10 +167,62 @@ struct task_struct *kdb_curr_task(int cpu)
}
/*
* Check whether the flags of the current command and the permissions
* of the kdb console has allow a command to be run.
* Update the permissions flags (kdb_cmd_enabled) to match the
* current lockdown state.
*
* Within this function the calls to security_locked_down() are "lazy". We
* avoid calling them if the current value of kdb_cmd_enabled already excludes
* flags that might be subject to lockdown. Additionally we deliberately check
* the lockdown flags independently (even though read lockdown implies write
* lockdown) since that results in both simpler code and clearer messages to
* the user on first-time debugger entry.
*
* The permission masks during a read+write lockdown permits the following
* flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
*
* The INSPECT commands are not blocked during lockdown because they are
* not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
* forcing them to have no arguments) and lsmod. These commands do expose
* some kernel state but do not allow the developer seated at the console to
* choose what state is reported. SIGNAL and REBOOT should not be controversial,
* given these are allowed for root during lockdown already.
*/
static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
static void kdb_check_for_lockdown(void)
{
const int write_flags = KDB_ENABLE_MEM_WRITE |
KDB_ENABLE_REG_WRITE |
KDB_ENABLE_FLOW_CTRL;
const int read_flags = KDB_ENABLE_MEM_READ |
KDB_ENABLE_REG_READ;
bool need_to_lockdown_write = false;
bool need_to_lockdown_read = false;
if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
need_to_lockdown_write =
security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
need_to_lockdown_read =
security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
/* De-compose KDB_ENABLE_ALL if required */
if (need_to_lockdown_write || need_to_lockdown_read)
if (kdb_cmd_enabled & KDB_ENABLE_ALL)
kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
if (need_to_lockdown_write)
kdb_cmd_enabled &= ~write_flags;
if (need_to_lockdown_read)
kdb_cmd_enabled &= ~read_flags;
}
/*
* Check whether the flags of the current command, the permissions of the kdb
* console and the lockdown state allow a command to be run.
*/
static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
bool no_args)
{
/* permissions comes from userspace so needs massaging slightly */
@ -1180,6 +1233,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
kdb_curr_task(raw_smp_processor_id());
KDB_DEBUG_STATE("kdb_local 1", reason);
kdb_check_for_lockdown();
kdb_go_count = 0;
if (reason == KDB_REASON_DEBUG) {
/* special case below */

View File

@ -1380,13 +1380,14 @@ static const void *nthreads_gen_params(const void *prev, char *desc)
else
nthreads *= 2;
if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
if (!preempt_model_preemptible() ||
!IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
/*
* Without any preemption, keep 2 CPUs free for other tasks, one
* of which is the main test case function checking for
* completion or failure.
*/
const long min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0;
const long min_unused_cpus = preempt_model_none() ? 2 : 0;
const long min_required_cpus = 2 + min_unused_cpus;
if (num_online_cpus() < min_required_cpus) {

View File

@ -15,6 +15,7 @@
/* Headers: */
#include <linux/sched/clock.h>
#include <linux/sched/cputime.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/posix-timers.h>
#include <linux/sched/rt.h>
@ -31,6 +32,7 @@
#include <uapi/linux/sched/types.h>
#include "sched.h"
#include "smp.h"
#include "autogroup.h"
#include "stats.h"

View File

@ -14,6 +14,7 @@
#include <linux/sched/debug.h>
#include <linux/sched/isolation.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/nohz.h>
#include <linux/sched/mm.h>
#include <linux/sched/rseq_api.h>
#include <linux/sched/task_stack.h>

View File

@ -26,7 +26,10 @@
#include <linux/topology.h>
#include <linux/sched/clock.h>
#include <linux/sched/cond_resched.h>
#include <linux/sched/cputime.h>
#include <linux/sched/debug.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/init.h>
#include <linux/sched/isolation.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/mm.h>
@ -617,10 +620,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
swap(rq1, rq2);
raw_spin_rq_lock(rq1);
if (__rq_lockp(rq1) == __rq_lockp(rq2))
return;
if (__rq_lockp(rq1) != __rq_lockp(rq2))
raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
double_rq_clock_clear_update(rq1, rq2);
}
EXPORT_SYMBOL_GPL(double_rq_lock);
#endif
@ -2221,7 +2224,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
if (p->sched_class == rq->curr->sched_class)
rq->curr->sched_class->check_preempt_curr(rq, p, flags);
else if (p->sched_class > rq->curr->sched_class)
else if (sched_class_above(p->sched_class, rq->curr->sched_class))
resched_curr(rq);
/*
@ -2452,7 +2455,7 @@ static int migration_cpu_stop(void *data)
* __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
flush_smp_call_function_from_idle();
flush_smp_call_function_queue();
raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);
@ -5764,7 +5767,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* higher scheduling class, because otherwise those lose the
* opportunity to pull in more work from other CPUs.
*/
if (likely(prev->sched_class <= &fair_sched_class &&
if (likely(!sched_class_above(prev->sched_class, &fair_sched_class) &&
rq->nr_running == rq->cfs.h_nr_running)) {
p = pick_next_task_fair(rq, prev, rf);
@ -9544,11 +9547,11 @@ void __init sched_init(void)
int i;
/* Make sure the linker didn't screw up */
BUG_ON(&idle_sched_class + 1 != &fair_sched_class ||
&fair_sched_class + 1 != &rt_sched_class ||
&rt_sched_class + 1 != &dl_sched_class);
BUG_ON(&idle_sched_class != &fair_sched_class + 1 ||
&fair_sched_class != &rt_sched_class + 1 ||
&rt_sched_class != &dl_sched_class + 1);
#ifdef CONFIG_SMP
BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
BUG_ON(&dl_sched_class != &stop_sched_class + 1);
#endif
wait_bit_init();

View File

@ -1220,8 +1220,6 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
return (dl_se->runtime <= 0);
}
extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
/*
* This function implements the GRUB accounting rule:
* according to the GRUB reclaiming algorithm, the runtime is
@ -1832,6 +1830,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
{
struct rq_flags rf;
struct rq *rq;
if (READ_ONCE(p->__state) != TASK_WAKING)
@ -1843,7 +1842,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
* from try_to_wake_up(). Hence, p->pi_lock is locked, but
* rq->lock is not... So, lock it
*/
raw_spin_rq_lock(rq);
rq_lock(rq, &rf);
if (p->dl.dl_non_contending) {
update_rq_clock(rq);
sub_running_bw(&p->dl, &rq->dl);
@ -1859,7 +1858,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
put_task_struct(p);
}
sub_rq_bw(&p->dl, &rq->dl);
raw_spin_rq_unlock(rq);
rq_unlock(rq, &rf);
}
static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
@ -2319,13 +2318,7 @@ static int push_dl_task(struct rq *rq)
deactivate_task(rq, next_task, 0);
set_task_cpu(next_task, later_rq->cpu);
/*
* Update the later_rq clock here, because the clock is used
* by the cpufreq_update_util() inside __add_running_bw().
*/
update_rq_clock(later_rq);
activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
activate_task(later_rq, next_task, 0);
ret = 1;
resched_curr(later_rq);

View File

@ -36,6 +36,7 @@
#include <linux/sched/cond_resched.h>
#include <linux/sched/cputime.h>
#include <linux/sched/isolation.h>
#include <linux/sched/nohz.h>
#include <linux/cpuidle.h>
#include <linux/interrupt.h>
@ -316,19 +317,6 @@ const struct sched_class fair_sched_class;
#define for_each_sched_entity(se) \
for (; se; se = se->parent)
static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
{
if (!path)
return;
if (cfs_rq && task_group_is_autogroup(cfs_rq->tg))
autogroup_path(cfs_rq->tg, path, len);
else if (cfs_rq && cfs_rq->tg->css.cgroup)
cgroup_path(cfs_rq->tg->css.cgroup, path, len);
else
strlcpy(path, "(null)", len);
}
static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
struct rq *rq = rq_of(cfs_rq);
@ -496,12 +484,6 @@ static int se_is_idle(struct sched_entity *se)
#define for_each_sched_entity(se) \
for (; se; se = NULL)
static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
{
if (path)
strlcpy(path, "(null)", len);
}
static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
return true;
@ -4869,11 +4851,11 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
cfs_rq->throttle_count--;
if (!cfs_rq->throttle_count) {
cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
cfs_rq->throttled_clock_task;
cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
cfs_rq->throttled_clock_pelt;
/* Add cfs_rq with load or one or more already running entities to the list */
if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
if (!cfs_rq_is_decayed(cfs_rq))
list_add_leaf_cfs_rq(cfs_rq);
}
@ -4887,7 +4869,7 @@ static int tg_throttle_down(struct task_group *tg, void *data)
/* group is entering throttled state, stop time */
if (!cfs_rq->throttle_count) {
cfs_rq->throttled_clock_task = rq_clock_task(rq);
cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
list_del_leaf_cfs_rq(cfs_rq);
}
cfs_rq->throttle_count++;
@ -5331,7 +5313,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
pcfs_rq = tg->parent->cfs_rq[cpu];
cfs_rq->throttle_count = pcfs_rq->throttle_count;
cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
cfs_rq->throttled_clock_pelt = rq_clock_pelt(cpu_rq(cpu));
}
/* conditionally throttle active cfs_rq's from put_prev_entity() */
@ -6574,6 +6556,68 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
return target;
}
/*
* Predicts what cpu_util(@cpu) would return if @p was removed from @cpu
* (@dst_cpu = -1) or migrated to @dst_cpu.
*/
static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
{
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
/*
* If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
* contribution. If @p migrates from another CPU to @cpu add its
* contribution. In all the other cases @cpu is not impacted by the
* migration so its util_avg is already correct.
*/
if (task_cpu(p) == cpu && dst_cpu != cpu)
lsub_positive(&util, task_util(p));
else if (task_cpu(p) != cpu && dst_cpu == cpu)
util += task_util(p);
if (sched_feat(UTIL_EST)) {
unsigned long util_est;
util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
/*
* During wake-up @p isn't enqueued yet and doesn't contribute
* to any cpu_rq(cpu)->cfs.avg.util_est.enqueued.
* If @dst_cpu == @cpu add it to "simulate" cpu_util after @p
* has been enqueued.
*
* During exec (@dst_cpu = -1) @p is enqueued and does
* contribute to cpu_rq(cpu)->cfs.util_est.enqueued.
* Remove it to "simulate" cpu_util without @p's contribution.
*
* Despite the task_on_rq_queued(@p) check there is still a
* small window for a possible race when an exec
* select_task_rq_fair() races with LB's detach_task().
*
* detach_task()
* deactivate_task()
* p->on_rq = TASK_ON_RQ_MIGRATING;
* -------------------------------- A
* dequeue_task() \
* dequeue_task_fair() + Race Time
* util_est_dequeue() /
* -------------------------------- B
*
* The additional check "current == p" is required to further
* reduce the race window.
*/
if (dst_cpu == cpu)
util_est += _task_util_est(p);
else if (unlikely(task_on_rq_queued(p) || current == p))
lsub_positive(&util_est, _task_util_est(p));
util = max(util, util_est);
}
return min(util, capacity_orig_of(cpu));
}
/*
* cpu_util_without: compute cpu utilization without any contributions from *p
* @cpu: the CPU which utilization is requested
@ -6589,116 +6633,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
*/
static unsigned long cpu_util_without(int cpu, struct task_struct *p)
{
struct cfs_rq *cfs_rq;
unsigned int util;
/* Task has no contribution or is new */
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
return cpu_util_cfs(cpu);
cfs_rq = &cpu_rq(cpu)->cfs;
util = READ_ONCE(cfs_rq->avg.util_avg);
/* Discount task's util from CPU's util */
lsub_positive(&util, task_util(p));
/*
* Covered cases:
*
* a) if *p is the only task sleeping on this CPU, then:
* cpu_util (== task_util) > util_est (== 0)
* and thus we return:
* cpu_util_without = (cpu_util - task_util) = 0
*
* b) if other tasks are SLEEPING on this CPU, which is now exiting
* IDLE, then:
* cpu_util >= task_util
* cpu_util > util_est (== 0)
* and thus we discount *p's blocked utilization to return:
* cpu_util_without = (cpu_util - task_util) >= 0
*
* c) if other tasks are RUNNABLE on that CPU and
* util_est > cpu_util
* then we use util_est since it returns a more restrictive
* estimation of the spare capacity on that CPU, by just
* considering the expected utilization of tasks already
* runnable on that CPU.
*
* Cases a) and b) are covered by the above code, while case c) is
* covered by the following code when estimated utilization is
* enabled.
*/
if (sched_feat(UTIL_EST)) {
unsigned int estimated =
READ_ONCE(cfs_rq->avg.util_est.enqueued);
/*
* Despite the following checks we still have a small window
* for a possible race, when an execl's select_task_rq_fair()
* races with LB's detach_task():
*
* detach_task()
* p->on_rq = TASK_ON_RQ_MIGRATING;
* ---------------------------------- A
* deactivate_task() \
* dequeue_task() + RaceTime
* util_est_dequeue() /
* ---------------------------------- B
*
* The additional check on "current == p" it's required to
* properly fix the execl regression and it helps in further
* reducing the chances for the above race.
*/
if (unlikely(task_on_rq_queued(p) || current == p))
lsub_positive(&estimated, _task_util_est(p));
util = max(util, estimated);
}
/*
* Utilization (estimated) can exceed the CPU capacity, thus let's
* clamp to the maximum CPU capacity to ensure consistency with
* cpu_util.
*/
return min_t(unsigned long, util, capacity_orig_of(cpu));
}
/*
* Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
* to @dst_cpu.
*/
static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
{
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
unsigned long util_est, util = READ_ONCE(cfs_rq->avg.util_avg);
/*
* If @p migrates from @cpu to another, remove its contribution. Or,
* if @p migrates from another CPU to @cpu, add its contribution. In
* the other cases, @cpu is not impacted by the migration, so the
* util_avg should already be correct.
*/
if (task_cpu(p) == cpu && dst_cpu != cpu)
lsub_positive(&util, task_util(p));
else if (task_cpu(p) != cpu && dst_cpu == cpu)
util += task_util(p);
if (sched_feat(UTIL_EST)) {
util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
/*
* During wake-up, the task isn't enqueued yet and doesn't
* appear in the cfs_rq->avg.util_est.enqueued of any rq,
* so just add it (if needed) to "simulate" what will be
* cpu_util after the task has been enqueued.
*/
if (dst_cpu == cpu)
util_est += _task_util_est(p);
util = max(util, util_est);
}
return min(util, capacity_orig_of(cpu));
return cpu_util_next(cpu, p, -1);
}
/*
@ -9554,8 +9493,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
local->avg_load = (local->group_load * SCHED_CAPACITY_SCALE) /
local->group_capacity;
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
sds->total_capacity;
/*
* If the local group is more loaded than the selected
* busiest group don't try to pull any tasks.
@ -9564,6 +9501,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
env->imbalance = 0;
return;
}
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
sds->total_capacity;
}
/*
@ -9589,7 +9529,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* busiest \ local has_spare fully_busy misfit asym imbalanced overloaded
* has_spare nr_idle balanced N/A N/A balanced balanced
* fully_busy nr_idle nr_idle N/A N/A balanced balanced
* misfit_task force N/A N/A N/A force force
* misfit_task force N/A N/A N/A N/A N/A
* asym_packing force force N/A N/A force force
* imbalanced force force N/A N/A force force
* overloaded force force N/A N/A force avg_load
@ -12000,101 +11940,3 @@ __init void init_sched_fair_class(void)
#endif /* SMP */
}
/*
* Helper functions to facilitate extracting info from tracepoints.
*/
const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq)
{
#ifdef CONFIG_SMP
return cfs_rq ? &cfs_rq->avg : NULL;
#else
return NULL;
#endif
}
EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_avg);
char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len)
{
if (!cfs_rq) {
if (str)
strlcpy(str, "(null)", len);
else
return NULL;
}
cfs_rq_tg_path(cfs_rq, str, len);
return str;
}
EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_path);
int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq)
{
return cfs_rq ? cpu_of(rq_of(cfs_rq)) : -1;
}
EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_cpu);
const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq)
{
#ifdef CONFIG_SMP
return rq ? &rq->avg_rt : NULL;
#else
return NULL;
#endif
}
EXPORT_SYMBOL_GPL(sched_trace_rq_avg_rt);
const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq)
{
#ifdef CONFIG_SMP
return rq ? &rq->avg_dl : NULL;
#else
return NULL;
#endif
}
EXPORT_SYMBOL_GPL(sched_trace_rq_avg_dl);
const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq)
{
#if defined(CONFIG_SMP) && defined(CONFIG_HAVE_SCHED_AVG_IRQ)
return rq ? &rq->avg_irq : NULL;
#else
return NULL;
#endif
}
EXPORT_SYMBOL_GPL(sched_trace_rq_avg_irq);
int sched_trace_rq_cpu(struct rq *rq)
{
return rq ? cpu_of(rq) : -1;
}
EXPORT_SYMBOL_GPL(sched_trace_rq_cpu);
int sched_trace_rq_cpu_capacity(struct rq *rq)
{
return rq ?
#ifdef CONFIG_SMP
rq->cpu_capacity
#else
SCHED_CAPACITY_SCALE
#endif
: -1;
}
EXPORT_SYMBOL_GPL(sched_trace_rq_cpu_capacity);
const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
{
#ifdef CONFIG_SMP
return rd ? rd->span : NULL;
#else
return NULL;
#endif
}
EXPORT_SYMBOL_GPL(sched_trace_rd_span);
int sched_trace_rq_nr_running(struct rq *rq)
{
return rq ? rq->nr_running : -1;
}
EXPORT_SYMBOL_GPL(sched_trace_rq_nr_running);

View File

@ -327,7 +327,7 @@ static void do_idle(void)
* RCU relies on this call to be done outside of an RCU read-side
* critical section.
*/
flush_smp_call_function_from_idle();
flush_smp_call_function_queue();
schedule_idle();
if (unlikely(klp_patch_pending(current)))

View File

@ -145,9 +145,9 @@ static inline u64 rq_clock_pelt(struct rq *rq)
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time;
return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time;
}
#else
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)

View File

@ -1060,14 +1060,17 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
mutex_unlock(&group->avgs_lock);
for (full = 0; full < 2; full++) {
unsigned long avg[3];
u64 total;
unsigned long avg[3] = { 0, };
u64 total = 0;
int w;
for (w = 0; w < 3; w++)
avg[w] = group->avg[res * 2 + full][w];
total = div_u64(group->total[PSI_AVGS][res * 2 + full],
NSEC_PER_USEC);
/* CPU FULL is undefined at the system level */
if (!(group == &psi_system && res == PSI_CPU && full)) {
for (w = 0; w < 3; w++)
avg[w] = group->avg[res * 2 + full][w];
total = div_u64(group->total[PSI_AVGS][res * 2 + full],
NSEC_PER_USEC);
}
seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
full ? "full" : "some",
@ -1117,7 +1120,8 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
t->state = state;
t->threshold = threshold_us * NSEC_PER_USEC;
t->win.size = window_us * NSEC_PER_USEC;
window_reset(&t->win, 0, 0, 0);
window_reset(&t->win, sched_clock(),
group->total[PSI_POLL][t->state], 0);
t->event = 0;
t->last_event_time = 0;

View File

@ -873,6 +873,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
int enqueue = 0;
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
struct rq *rq = rq_of_rt_rq(rt_rq);
struct rq_flags rf;
int skip;
/*
@ -887,7 +888,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
if (skip)
continue;
raw_spin_rq_lock(rq);
rq_lock(rq, &rf);
update_rq_clock(rq);
if (rt_rq->rt_time) {
@ -925,7 +926,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
raw_spin_rq_unlock(rq);
rq_unlock(rq, &rf);
}
if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))

View File

@ -609,8 +609,8 @@ struct cfs_rq {
s64 runtime_remaining;
u64 throttled_clock;
u64 throttled_clock_task;
u64 throttled_clock_task_time;
u64 throttled_clock_pelt;
u64 throttled_clock_pelt_time;
int throttled;
int throttle_count;
struct list_head throttled_list;
@ -1839,12 +1839,7 @@ static inline void dirty_sched_domain_sysctl(int cpu)
#endif
extern int sched_update_scaling(void);
extern void flush_smp_call_function_from_idle(void);
#else /* !CONFIG_SMP: */
static inline void flush_smp_call_function_from_idle(void) { }
#endif
#endif /* CONFIG_SMP */
#include "stats.h"
@ -2198,6 +2193,8 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
*
* include/asm-generic/vmlinux.lds.h
*
* *CAREFUL* they are laid out in *REVERSE* order!!!
*
* Also enforce alignment on the instance, not the type, to guarantee layout.
*/
#define DEFINE_SCHED_CLASS(name) \
@ -2206,17 +2203,16 @@ const struct sched_class name##_sched_class \
__section("__" #name "_sched_class")
/* Defined in include/asm-generic/vmlinux.lds.h */
extern struct sched_class __begin_sched_classes[];
extern struct sched_class __end_sched_classes[];
#define sched_class_highest (__end_sched_classes - 1)
#define sched_class_lowest (__begin_sched_classes - 1)
extern struct sched_class __sched_class_highest[];
extern struct sched_class __sched_class_lowest[];
#define for_class_range(class, _from, _to) \
for (class = (_from); class != (_to); class--)
for (class = (_from); class < (_to); class++)
#define for_each_class(class) \
for_class_range(class, sched_class_highest, sched_class_lowest)
for_class_range(class, __sched_class_highest, __sched_class_lowest)
#define sched_class_above(_a, _b) ((_a) < (_b))
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
@ -2326,6 +2322,7 @@ extern void resched_cpu(int cpu);
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
@ -2495,6 +2492,24 @@ unsigned long arch_scale_freq_capacity(int cpu)
}
#endif
#ifdef CONFIG_SCHED_DEBUG
/*
* In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to
* acquire rq lock instead of rq_lock(). So at the end of these two functions
* we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of
* rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
*/
static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
{
rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
/* rq1 == rq2 for !CONFIG_SMP, so just clear RQCF_UPDATED once. */
#ifdef CONFIG_SMP
rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
#endif
}
#else
static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
#endif
#ifdef CONFIG_SMP
@ -2560,14 +2575,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
__acquires(busiest->lock)
__acquires(this_rq->lock)
{
if (__rq_lockp(this_rq) == __rq_lockp(busiest))
return 0;
if (likely(raw_spin_rq_trylock(busiest)))
if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
likely(raw_spin_rq_trylock(busiest))) {
double_rq_clock_clear_update(this_rq, busiest);
return 0;
}
if (rq_order_less(this_rq, busiest)) {
raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
double_rq_clock_clear_update(this_rq, busiest);
return 0;
}
@ -2661,6 +2677,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
BUG_ON(rq1 != rq2);
raw_spin_rq_lock(rq1);
__acquire(rq2->lock); /* Fake it out ;) */
double_rq_clock_clear_update(rq1, rq2);
}
/*

View File

@ -7,3 +7,9 @@
extern void sched_ttwu_pending(void *arg);
extern void send_call_function_single_ipi(int cpu);
#ifdef CONFIG_SMP
extern void flush_smp_call_function_queue(void);
#else
static inline void flush_smp_call_function_queue(void) { }
#endif

View File

@ -96,7 +96,7 @@ static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
static void flush_smp_call_function_queue(bool warn_cpu_offline);
static void __flush_smp_call_function_queue(bool warn_cpu_offline);
int smpcfd_prepare_cpu(unsigned int cpu)
{
@ -141,7 +141,7 @@ int smpcfd_dying_cpu(unsigned int cpu)
* ensure that the outgoing CPU doesn't go offline with work
* still pending.
*/
flush_smp_call_function_queue(false);
__flush_smp_call_function_queue(false);
irq_work_run();
return 0;
}
@ -544,11 +544,11 @@ void generic_smp_call_function_single_interrupt(void)
{
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU,
smp_processor_id(), CFD_SEQ_GOTIPI);
flush_smp_call_function_queue(true);
__flush_smp_call_function_queue(true);
}
/**
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
* __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
*
* @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
* offline CPU. Skip this check if set to 'false'.
@ -561,7 +561,7 @@ void generic_smp_call_function_single_interrupt(void)
* Loop through the call_single_queue and run all the queued callbacks.
* Must be called with interrupts disabled.
*/
static void flush_smp_call_function_queue(bool warn_cpu_offline)
static void __flush_smp_call_function_queue(bool warn_cpu_offline)
{
call_single_data_t *csd, *csd_next;
struct llist_node *entry, *prev;
@ -684,8 +684,22 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
smp_processor_id(), CFD_SEQ_HDLEND);
}
void flush_smp_call_function_from_idle(void)
/**
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
* from task context (idle, migration thread)
*
* When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
* set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
* setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
* handle queued SMP function calls before scheduling.
*
* The migration thread has to ensure that an eventually pending wakeup has
* been handled before it migrates a task.
*/
void flush_smp_call_function_queue(void)
{
unsigned int was_pending;
unsigned long flags;
if (llist_empty(this_cpu_ptr(&call_single_queue)))
@ -694,9 +708,11 @@ void flush_smp_call_function_from_idle(void)
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
smp_processor_id(), CFD_SEQ_IDLE);
local_irq_save(flags);
flush_smp_call_function_queue(true);
/* Get the already pending soft interrupts for RT enabled kernels */
was_pending = local_softirq_pending();
__flush_smp_call_function_queue(true);
if (local_softirq_pending())
do_softirq();
do_softirq_post_smp_call_flush(was_pending);
local_irq_restore(flags);
}

View File

@ -297,6 +297,19 @@ static inline void invoke_softirq(void)
wakeup_softirqd();
}
/*
* flush_smp_call_function_queue() can raise a soft interrupt in a function
* call. On RT kernels this is undesired and the only known functionality
* in the block layer which does this is disabled on RT. If soft interrupts
* get raised which haven't been raised before the flush, warn so it can be
* investigated.
*/
void do_softirq_post_smp_call_flush(unsigned int was_pending)
{
if (WARN_ON_ONCE(was_pending != local_softirq_pending()))
invoke_softirq();
}
#else /* CONFIG_PREEMPT_RT */
/*

View File

@ -536,8 +536,6 @@ void stop_machine_park(int cpu)
kthread_park(stopper->thread);
}
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
static void cpu_stop_create(unsigned int cpu)
{
sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));

View File

@ -17,6 +17,7 @@
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <linux/tick.h>
#include <linux/stop_machine.h>
#include <linux/pvclock_gtod.h>
@ -2397,6 +2398,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc)
return 0;
}
/**
* random_get_entropy_fallback - Returns the raw clock source value,
* used by random.c for platforms with no valid random_get_entropy().
*/
unsigned long random_get_entropy_fallback(void)
{
struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
struct clocksource *clock = READ_ONCE(tkr->clock);
if (unlikely(timekeeping_suspended || !clock))
return 0;
return clock->read(clock);
}
EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
/**
* do_adjtimex() - Accessor function to NTP __do_adjtimex function

View File

@ -1833,8 +1833,6 @@ void update_process_times(int user_tick)
{
struct task_struct *p = current;
PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0);
/* Note: this timer irq context must be accounted for as well. */
account_process_tick(p, user_tick);
run_local_timers();

View File

@ -4290,17 +4290,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
entries,
total,
buf->cpu,
#if defined(CONFIG_PREEMPT_NONE)
"server",
#elif defined(CONFIG_PREEMPT_VOLUNTARY)
"desktop",
#elif defined(CONFIG_PREEMPT)
"preempt",
#elif defined(CONFIG_PREEMPT_RT)
"preempt_rt",
#else
preempt_model_none() ? "server" :
preempt_model_voluntary() ? "desktop" :
preempt_model_full() ? "preempt" :
preempt_model_rt() ? "preempt_rt" :
"unknown",
#endif
/* These are reserved for later use */
0, 0, 0, 0);
#ifdef CONFIG_SMP

View File

@ -1617,8 +1617,7 @@ config WARN_ALL_UNSEEDED_RANDOM
so architecture maintainers really need to do what they can
to get the CRNG seeded sooner after the system is booted.
However, since users cannot do anything actionable to
address this, by default the kernel will issue only a single
warning for the first use of unseeded randomness.
address this, by default this option is disabled.
Say Y here if you want to receive warnings for all uses of
unseeded randomness. This will be of use primarily for

View File

@ -245,25 +245,13 @@ static struct prandom_test2 {
{ 407983964U, 921U, 728767059U },
};
static u32 __extract_hwseed(void)
{
unsigned int val = 0;
(void)(arch_get_random_seed_int(&val) ||
arch_get_random_int(&val));
return val;
}
static void prandom_seed_early(struct rnd_state *state, u32 seed,
bool mix_with_hwseed)
static void prandom_state_selftest_seed(struct rnd_state *state, u32 seed)
{
#define LCG(x) ((x) * 69069U) /* super-duper LCG */
#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
state->s1 = __seed(HWSEED() ^ LCG(seed), 2U);
state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U);
state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U);
state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
state->s1 = __seed(LCG(seed), 2U);
state->s2 = __seed(LCG(state->s1), 8U);
state->s3 = __seed(LCG(state->s2), 16U);
state->s4 = __seed(LCG(state->s3), 128U);
}
static int __init prandom_state_selftest(void)
@ -274,7 +262,7 @@ static int __init prandom_state_selftest(void)
for (i = 0; i < ARRAY_SIZE(test1); i++) {
struct rnd_state state;
prandom_seed_early(&state, test1[i].seed, false);
prandom_state_selftest_seed(&state, test1[i].seed);
prandom_warmup(&state);
if (test1[i].result != prandom_u32_state(&state))
@ -289,7 +277,7 @@ static int __init prandom_state_selftest(void)
for (i = 0; i < ARRAY_SIZE(test2); i++) {
struct rnd_state state;
prandom_seed_early(&state, test2[i].seed, false);
prandom_state_selftest_seed(&state, test2[i].seed);
prandom_warmup(&state);
for (j = 0; j < test2[i].iteration - 1; j++)
@ -310,324 +298,3 @@ static int __init prandom_state_selftest(void)
}
core_initcall(prandom_state_selftest);
#endif
/*
* The prandom_u32() implementation is now completely separate from the
* prandom_state() functions, which are retained (for now) for compatibility.
*
* Because of (ab)use in the networking code for choosing random TCP/UDP port
* numbers, which open DoS possibilities if guessable, we want something
* stronger than a standard PRNG. But the performance requirements of
* the network code do not allow robust crypto for this application.
*
* So this is a homebrew Junior Spaceman implementation, based on the
* lowest-latency trustworthy crypto primitive available, SipHash.
* (The authors of SipHash have not been consulted about this abuse of
* their work.)
*
* Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to
* one word of output. This abbreviated version uses 2 rounds per word
* of output.
*/
struct siprand_state {
unsigned long v0;
unsigned long v1;
unsigned long v2;
unsigned long v3;
};
static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
DEFINE_PER_CPU(unsigned long, net_rand_noise);
EXPORT_PER_CPU_SYMBOL(net_rand_noise);
/*
* This is the core CPRNG function. As "pseudorandom", this is not used
* for truly valuable things, just intended to be a PITA to guess.
* For maximum speed, we do just two SipHash rounds per word. This is
* the same rate as 4 rounds per 64 bits that SipHash normally uses,
* so hopefully it's reasonably secure.
*
* There are two changes from the official SipHash finalization:
* - We omit some constants XORed with v2 in the SipHash spec as irrelevant;
* they are there only to make the output rounds distinct from the input
* rounds, and this application has no input rounds.
* - Rather than returning v0^v1^v2^v3, return v1+v3.
* If you look at the SipHash round, the last operation on v3 is
* "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time.
* Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but
* it still cancels out half of the bits in v2 for no benefit.)
* Second, since the last combining operation was xor, continue the
* pattern of alternating xor/add for a tiny bit of extra non-linearity.
*/
static inline u32 siprand_u32(struct siprand_state *s)
{
unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3;
unsigned long n = raw_cpu_read(net_rand_noise);
v3 ^= n;
PRND_SIPROUND(v0, v1, v2, v3);
PRND_SIPROUND(v0, v1, v2, v3);
v0 ^= n;
s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3;
return v1 + v3;
}
/**
* prandom_u32 - pseudo random number generator
*
* A 32 bit pseudo-random number is generated using a fast
* algorithm suitable for simulation. This algorithm is NOT
* considered safe for cryptographic use.
*/
u32 prandom_u32(void)
{
struct siprand_state *state = get_cpu_ptr(&net_rand_state);
u32 res = siprand_u32(state);
put_cpu_ptr(&net_rand_state);
return res;
}
EXPORT_SYMBOL(prandom_u32);
/**
* prandom_bytes - get the requested number of pseudo-random bytes
* @buf: where to copy the pseudo-random bytes to
* @bytes: the requested number of bytes
*/
void prandom_bytes(void *buf, size_t bytes)
{
struct siprand_state *state = get_cpu_ptr(&net_rand_state);
u8 *ptr = buf;
while (bytes >= sizeof(u32)) {
put_unaligned(siprand_u32(state), (u32 *)ptr);
ptr += sizeof(u32);
bytes -= sizeof(u32);
}
if (bytes > 0) {
u32 rem = siprand_u32(state);
do {
*ptr++ = (u8)rem;
rem >>= BITS_PER_BYTE;
} while (--bytes > 0);
}
put_cpu_ptr(&net_rand_state);
}
EXPORT_SYMBOL(prandom_bytes);
/**
* prandom_seed - add entropy to pseudo random number generator
* @entropy: entropy value
*
* Add some additional seed material to the prandom pool.
* The "entropy" is actually our IP address (the only caller is
* the network code), not for unpredictability, but to ensure that
* different machines are initialized differently.
*/
void prandom_seed(u32 entropy)
{
int i;
add_device_randomness(&entropy, sizeof(entropy));
for_each_possible_cpu(i) {
struct siprand_state *state = per_cpu_ptr(&net_rand_state, i);
unsigned long v0 = state->v0, v1 = state->v1;
unsigned long v2 = state->v2, v3 = state->v3;
do {
v3 ^= entropy;
PRND_SIPROUND(v0, v1, v2, v3);
PRND_SIPROUND(v0, v1, v2, v3);
v0 ^= entropy;
} while (unlikely(!v0 || !v1 || !v2 || !v3));
WRITE_ONCE(state->v0, v0);
WRITE_ONCE(state->v1, v1);
WRITE_ONCE(state->v2, v2);
WRITE_ONCE(state->v3, v3);
}
}
EXPORT_SYMBOL(prandom_seed);
/*
* Generate some initially weak seeding values to allow
* the prandom_u32() engine to be started.
*/
static int __init prandom_init_early(void)
{
int i;
unsigned long v0, v1, v2, v3;
if (!arch_get_random_long(&v0))
v0 = jiffies;
if (!arch_get_random_long(&v1))
v1 = random_get_entropy();
v2 = v0 ^ PRND_K0;
v3 = v1 ^ PRND_K1;
for_each_possible_cpu(i) {
struct siprand_state *state;
v3 ^= i;
PRND_SIPROUND(v0, v1, v2, v3);
PRND_SIPROUND(v0, v1, v2, v3);
v0 ^= i;
state = per_cpu_ptr(&net_rand_state, i);
state->v0 = v0; state->v1 = v1;
state->v2 = v2; state->v3 = v3;
}
return 0;
}
core_initcall(prandom_init_early);
/* Stronger reseeding when available, and periodically thereafter. */
static void prandom_reseed(struct timer_list *unused);
static DEFINE_TIMER(seed_timer, prandom_reseed);
static void prandom_reseed(struct timer_list *unused)
{
unsigned long expires;
int i;
/*
* Reinitialize each CPU's PRNG with 128 bits of key.
* No locking on the CPUs, but then somewhat random results are,
* well, expected.
*/
for_each_possible_cpu(i) {
struct siprand_state *state;
unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0;
unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1;
#if BITS_PER_LONG == 32
int j;
/*
* On 32-bit machines, hash in two extra words to
* approximate 128-bit key length. Not that the hash
* has that much security, but this prevents a trivial
* 64-bit brute force.
*/
for (j = 0; j < 2; j++) {
unsigned long m = get_random_long();
v3 ^= m;
PRND_SIPROUND(v0, v1, v2, v3);
PRND_SIPROUND(v0, v1, v2, v3);
v0 ^= m;
}
#endif
/*
* Probably impossible in practice, but there is a
* theoretical risk that a race between this reseeding
* and the target CPU writing its state back could
* create the all-zero SipHash fixed point.
*
* To ensure that never happens, ensure the state
* we write contains no zero words.
*/
state = per_cpu_ptr(&net_rand_state, i);
WRITE_ONCE(state->v0, v0 ? v0 : -1ul);
WRITE_ONCE(state->v1, v1 ? v1 : -1ul);
WRITE_ONCE(state->v2, v2 ? v2 : -1ul);
WRITE_ONCE(state->v3, v3 ? v3 : -1ul);
}
/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ));
mod_timer(&seed_timer, expires);
}
/*
* The random ready callback can be called from almost any interrupt.
* To avoid worrying about whether it's safe to delay that interrupt
* long enough to seed all CPUs, just schedule an immediate timer event.
*/
static int prandom_timer_start(struct notifier_block *nb,
unsigned long action, void *data)
{
mod_timer(&seed_timer, jiffies);
return 0;
}
#ifdef CONFIG_RANDOM32_SELFTEST
/* Principle: True 32-bit random numbers will all have 16 differing bits on
* average. For each 32-bit number, there are 601M numbers differing by 16
* bits, and 89% of the numbers differ by at least 12 bits. Note that more
* than 16 differing bits also implies a correlation with inverted bits. Thus
* we take 1024 random numbers and compare each of them to the other ones,
* counting the deviation of correlated bits to 16. Constants report 32,
* counters 32-log2(TEST_SIZE), and pure randoms, around 6 or lower. With the
* u32 total, TEST_SIZE may be as large as 4096 samples.
*/
#define TEST_SIZE 1024
static int __init prandom32_state_selftest(void)
{
unsigned int x, y, bits, samples;
u32 xor, flip;
u32 total;
u32 *data;
data = kmalloc(sizeof(*data) * TEST_SIZE, GFP_KERNEL);
if (!data)
return 0;
for (samples = 0; samples < TEST_SIZE; samples++)
data[samples] = prandom_u32();
flip = total = 0;
for (x = 0; x < samples; x++) {
for (y = 0; y < samples; y++) {
if (x == y)
continue;
xor = data[x] ^ data[y];
flip |= xor;
bits = hweight32(xor);
total += (bits - 16) * (bits - 16);
}
}
/* We'll return the average deviation as 2*sqrt(corr/samples), which
* is also sqrt(4*corr/samples) which provides a better resolution.
*/
bits = int_sqrt(total / (samples * (samples - 1)) * 4);
if (bits > 6)
pr_warn("prandom32: self test failed (at least %u bits"
" correlated, fixed_mask=%#x fixed_value=%#x\n",
bits, ~flip, data[0] & ~flip);
else
pr_info("prandom32: self test passed (less than %u bits"
" correlated)\n",
bits+1);
kfree(data);
return 0;
}
core_initcall(prandom32_state_selftest);
#endif /* CONFIG_RANDOM32_SELFTEST */
/*
* Start periodic full reseeding as soon as strong
* random numbers are available.
*/
static int __init prandom_init_late(void)
{
static struct notifier_block random_ready = {
.notifier_call = prandom_timer_start
};
int ret = register_random_ready_notifier(&random_ready);
if (ret == -EALREADY) {
prandom_timer_start(&random_ready, 0, NULL);
ret = 0;
}
return ret;
}
late_initcall(prandom_init_late);

View File

@ -18,19 +18,13 @@
#include <asm/word-at-a-time.h>
#endif
#define SIPROUND \
do { \
v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
} while (0)
#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3)
#define PREAMBLE(len) \
u64 v0 = 0x736f6d6570736575ULL; \
u64 v1 = 0x646f72616e646f6dULL; \
u64 v2 = 0x6c7967656e657261ULL; \
u64 v3 = 0x7465646279746573ULL; \
u64 v0 = SIPHASH_CONST_0; \
u64 v1 = SIPHASH_CONST_1; \
u64 v2 = SIPHASH_CONST_2; \
u64 v3 = SIPHASH_CONST_3; \
u64 b = ((u64)(len)) << 56; \
v3 ^= key->key[1]; \
v2 ^= key->key[0]; \
@ -389,19 +383,13 @@ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
}
EXPORT_SYMBOL(hsiphash_4u32);
#else
#define HSIPROUND \
do { \
v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
} while (0)
#define HSIPROUND HSIPHASH_PERMUTATION(v0, v1, v2, v3)
#define HPREAMBLE(len) \
u32 v0 = 0; \
u32 v1 = 0; \
u32 v2 = 0x6c796765U; \
u32 v3 = 0x74656462U; \
u32 v0 = HSIPHASH_CONST_0; \
u32 v1 = HSIPHASH_CONST_1; \
u32 v2 = HSIPHASH_CONST_2; \
u32 v3 = HSIPHASH_CONST_3; \
u32 b = ((u32)(len)) << 24; \
v3 ^= key->key[1]; \
v2 ^= key->key[0]; \

View File

@ -750,61 +750,38 @@ static int __init debug_boot_weak_hash_enable(char *str)
}
early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable);
static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
static siphash_key_t ptr_key __read_mostly;
static DEFINE_STATIC_KEY_FALSE(filled_random_ptr_key);
static void enable_ptr_key_workfn(struct work_struct *work)
{
get_random_bytes(&ptr_key, sizeof(ptr_key));
/* Needs to run from preemptible context */
static_branch_disable(&not_filled_random_ptr_key);
static_branch_enable(&filled_random_ptr_key);
}
static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
static int fill_random_ptr_key(struct notifier_block *nb,
unsigned long action, void *data)
{
/* This may be in an interrupt handler. */
queue_work(system_unbound_wq, &enable_ptr_key_work);
return 0;
}
static struct notifier_block random_ready = {
.notifier_call = fill_random_ptr_key
};
static int __init initialize_ptr_random(void)
{
int key_size = sizeof(ptr_key);
int ret;
/* Use hw RNG if available. */
if (get_random_bytes_arch(&ptr_key, key_size) == key_size) {
static_branch_disable(&not_filled_random_ptr_key);
return 0;
}
ret = register_random_ready_notifier(&random_ready);
if (!ret) {
return 0;
} else if (ret == -EALREADY) {
/* This is in preemptible context */
enable_ptr_key_workfn(&enable_ptr_key_work);
return 0;
}
return ret;
}
early_initcall(initialize_ptr_random);
/* Maps a pointer to a 32 bit unique identifier. */
static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out)
{
static siphash_key_t ptr_key __read_mostly;
unsigned long hashval;
if (static_branch_unlikely(&not_filled_random_ptr_key))
return -EAGAIN;
if (!static_branch_likely(&filled_random_ptr_key)) {
static bool filled = false;
static DEFINE_SPINLOCK(filling);
static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
unsigned long flags;
if (!system_unbound_wq ||
(!rng_is_initialized() && !rng_has_arch_random()) ||
!spin_trylock_irqsave(&filling, flags))
return -EAGAIN;
if (!filled) {
get_random_bytes(&ptr_key, sizeof(ptr_key));
queue_work(system_unbound_wq, &enable_ptr_key_work);
filled = true;
}
spin_unlock_irqrestore(&filling, flags);
}
#ifdef CONFIG_64BIT
hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);

View File

@ -346,6 +346,38 @@ unsigned long randomize_stack_top(unsigned long stack_top)
#endif
}
/**
* randomize_page - Generate a random, page aligned address
* @start: The smallest acceptable address the caller will take.
* @range: The size of the area, starting at @start, within which the
* random address must fall.
*
* If @start + @range would overflow, @range is capped.
*
* NOTE: Historical use of randomize_range, which this replaces, presumed that
* @start was already page aligned. We now align it regardless.
*
* Return: A page aligned address within [start, start + range). On error,
* @start is returned.
*/
unsigned long randomize_page(unsigned long start, unsigned long range)
{
if (!PAGE_ALIGNED(start)) {
range -= PAGE_ALIGN(start) - start;
start = PAGE_ALIGN(start);
}
if (start > ULONG_MAX - range)
range = ULONG_MAX - start;
range >>= PAGE_SHIFT;
if (range == 0)
return start;
return start + (get_random_long() % range << PAGE_SHIFT);
}
#ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
unsigned long arch_randomize_brk(struct mm_struct *mm)
{

View File

@ -3534,7 +3534,6 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
dev_queue_xmit_nit(skb, dev);
len = skb->len;
PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies);
trace_net_dev_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev, txq, more);
trace_net_dev_xmit(skb, rc, dev, len);
@ -4175,7 +4174,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (!skb)
goto out;
PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) {
@ -4241,7 +4239,6 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
skb_set_queue_mapping(skb, queue_id);
txq = skb_get_tx_queue(dev, skb);
PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
local_bh_disable();

View File

@ -536,10 +536,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
return ret;
}
if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
prandom_seed((__force u32) ifa->ifa_local);
if (!(ifa->ifa_flags & IFA_F_SECONDARY))
ifap = last_primary;
}
rcu_assign_pointer(ifa->ifa_next, *ifap);
rcu_assign_pointer(*ifap, ifa);

View File

@ -3994,8 +3994,6 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
addrconf_join_solict(dev, &ifp->addr);
prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
read_lock_bh(&idev->lock);
spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD)

View File

@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_DEBUGFS] = "debugfs access",
[LOCKDOWN_XMON_WR] = "xmon write access",
[LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
[LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
[LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM",
[LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
[LOCKDOWN_PERF] = "unsafe use of perf",
[LOCKDOWN_TRACEFS] = "use of tracefs",
[LOCKDOWN_XMON_RW] = "xmon read and write access",