Merge 69fc06f70f
("Merge tag 'objtool-core-2020-06-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") into android-mainline
Baby steps on the way to some 5.8-rc1 merge bisection... Change-Id: Ib185f04a2838587bb578c7c7b28cb5e50d85eb36 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
commit
75153325fa
@ -1943,56 +1943,27 @@ invoked from a CPU-hotplug notifier.
|
||||
Scheduler and RCU
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
RCU depends on the scheduler, and the scheduler uses RCU to protect some
|
||||
of its data structures. The preemptible-RCU ``rcu_read_unlock()``
|
||||
implementation must therefore be written carefully to avoid deadlocks
|
||||
involving the scheduler's runqueue and priority-inheritance locks. In
|
||||
particular, ``rcu_read_unlock()`` must tolerate an interrupt where the
|
||||
interrupt handler invokes both ``rcu_read_lock()`` and
|
||||
``rcu_read_unlock()``. This possibility requires ``rcu_read_unlock()``
|
||||
to use negative nesting levels to avoid destructive recursion via
|
||||
interrupt handler's use of RCU.
|
||||
|
||||
This scheduler-RCU requirement came as a `complete
|
||||
surprise <https://lwn.net/Articles/453002/>`__.
|
||||
|
||||
As noted above, RCU makes use of kthreads, and it is necessary to avoid
|
||||
excessive CPU-time accumulation by these kthreads. This requirement was
|
||||
no surprise, but RCU's violation of it when running context-switch-heavy
|
||||
workloads when built with ``CONFIG_NO_HZ_FULL=y`` `did come as a
|
||||
surprise
|
||||
RCU makes use of kthreads, and it is necessary to avoid excessive CPU-time
|
||||
accumulation by these kthreads. This requirement was no surprise, but
|
||||
RCU's violation of it when running context-switch-heavy workloads when
|
||||
built with ``CONFIG_NO_HZ_FULL=y`` `did come as a surprise
|
||||
[PDF] <http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf>`__.
|
||||
RCU has made good progress towards meeting this requirement, even for
|
||||
context-switch-heavy ``CONFIG_NO_HZ_FULL=y`` workloads, but there is
|
||||
room for further improvement.
|
||||
|
||||
It is forbidden to hold any of scheduler's runqueue or
|
||||
priority-inheritance spinlocks across an ``rcu_read_unlock()`` unless
|
||||
interrupts have been disabled across the entire RCU read-side critical
|
||||
section, that is, up to and including the matching ``rcu_read_lock()``.
|
||||
Violating this restriction can result in deadlocks involving these
|
||||
scheduler spinlocks. There was hope that this restriction might be
|
||||
lifted when interrupt-disabled calls to ``rcu_read_unlock()`` started
|
||||
deferring the reporting of the resulting RCU-preempt quiescent state
|
||||
until the end of the corresponding interrupts-disabled region.
|
||||
Unfortunately, timely reporting of the corresponding quiescent state to
|
||||
expedited grace periods requires a call to ``raise_softirq()``, which
|
||||
can acquire these scheduler spinlocks. In addition, real-time systems
|
||||
using RCU priority boosting need this restriction to remain in effect
|
||||
because deferred quiescent-state reporting would also defer deboosting,
|
||||
which in turn would degrade real-time latencies.
|
||||
There is no longer any prohibition against holding any of
|
||||
scheduler's runqueue or priority-inheritance spinlocks across an
|
||||
``rcu_read_unlock()``, even if interrupts and preemption were enabled
|
||||
somewhere within the corresponding RCU read-side critical section.
|
||||
Therefore, it is now perfectly legal to execute ``rcu_read_lock()``
|
||||
with preemption enabled, acquire one of the scheduler locks, and hold
|
||||
that lock across the matching ``rcu_read_unlock()``.
|
||||
|
||||
In theory, if a given RCU read-side critical section could be guaranteed
|
||||
to be less than one second in duration, holding a scheduler spinlock
|
||||
across that critical section's ``rcu_read_unlock()`` would require only
|
||||
that preemption be disabled across the entire RCU read-side critical
|
||||
section, not interrupts. Unfortunately, given the possibility of vCPU
|
||||
preemption, long-running interrupts, and so on, it is not possible in
|
||||
practice to guarantee that a given RCU read-side critical section will
|
||||
complete in less than one second. Therefore, as noted above, if
|
||||
scheduler spinlocks are held across a given call to
|
||||
``rcu_read_unlock()``, interrupts must be disabled across the entire RCU
|
||||
read-side critical section.
|
||||
Similarly, the RCU flavor consolidation has removed the need for negative
|
||||
nesting. The fact that interrupt-disabled regions of code act as RCU
|
||||
read-side critical sections implicitly avoids earlier issues that used
|
||||
to result in destructive recursion via interrupt handler's use of RCU.
|
||||
|
||||
Tracing and RCU
|
||||
~~~~~~~~~~~~~~~
|
||||
|
@ -4210,12 +4210,24 @@
|
||||
Duration of CPU stall (s) to test RCU CPU stall
|
||||
warnings, zero to disable.
|
||||
|
||||
rcutorture.stall_cpu_block= [KNL]
|
||||
Sleep while stalling if set. This will result
|
||||
in warnings from preemptible RCU in addition
|
||||
to any other stall-related activity.
|
||||
|
||||
rcutorture.stall_cpu_holdoff= [KNL]
|
||||
Time to wait (s) after boot before inducing stall.
|
||||
|
||||
rcutorture.stall_cpu_irqsoff= [KNL]
|
||||
Disable interrupts while stalling if set.
|
||||
|
||||
rcutorture.stall_gp_kthread= [KNL]
|
||||
Duration (s) of forced sleep within RCU
|
||||
grace-period kthread to test RCU CPU stall
|
||||
warnings, zero to disable. If both stall_cpu
|
||||
and stall_gp_kthread are specified, the
|
||||
kthread is starved first, then the CPU.
|
||||
|
||||
rcutorture.stat_interval= [KNL]
|
||||
Time (s) between statistics printk()s.
|
||||
|
||||
@ -4286,6 +4298,13 @@
|
||||
only normal grace-period primitives. No effect
|
||||
on CONFIG_TINY_RCU kernels.
|
||||
|
||||
rcupdate.rcu_task_ipi_delay= [KNL]
|
||||
Set time in jiffies during which RCU tasks will
|
||||
avoid sending IPIs, starting with the beginning
|
||||
of a given grace period. Setting a large
|
||||
number avoids disturbing real-time workloads,
|
||||
but lengthens grace periods.
|
||||
|
||||
rcupdate.rcu_task_stall_timeout= [KNL]
|
||||
Set timeout in jiffies for RCU task stall warning
|
||||
messages. Disable with a value less than or equal
|
||||
|
@ -13,6 +13,7 @@ The kernel provides a variety of locking primitives which can be divided
|
||||
into two categories:
|
||||
|
||||
- Sleeping locks
|
||||
- CPU local locks
|
||||
- Spinning locks
|
||||
|
||||
This document conceptually describes these lock types and provides rules
|
||||
@ -44,9 +45,23 @@ Sleeping lock types:
|
||||
|
||||
On PREEMPT_RT kernels, these lock types are converted to sleeping locks:
|
||||
|
||||
- local_lock
|
||||
- spinlock_t
|
||||
- rwlock_t
|
||||
|
||||
|
||||
CPU local locks
|
||||
---------------
|
||||
|
||||
- local_lock
|
||||
|
||||
On non-PREEMPT_RT kernels, local_lock functions are wrappers around
|
||||
preemption and interrupt disabling primitives. Contrary to other locking
|
||||
mechanisms, disabling preemption or interrupts are pure CPU local
|
||||
concurrency control mechanisms and not suited for inter-CPU concurrency
|
||||
control.
|
||||
|
||||
|
||||
Spinning locks
|
||||
--------------
|
||||
|
||||
@ -67,6 +82,7 @@ can have suffixes which apply further protections:
|
||||
_irqsave/restore() Save and disable / restore interrupt disabled state
|
||||
=================== ====================================================
|
||||
|
||||
|
||||
Owner semantics
|
||||
===============
|
||||
|
||||
@ -139,6 +155,56 @@ implementation, thus changing the fairness:
|
||||
writer from starving readers.
|
||||
|
||||
|
||||
local_lock
|
||||
==========
|
||||
|
||||
local_lock provides a named scope to critical sections which are protected
|
||||
by disabling preemption or interrupts.
|
||||
|
||||
On non-PREEMPT_RT kernels local_lock operations map to the preemption and
|
||||
interrupt disabling and enabling primitives:
|
||||
|
||||
=========================== ======================
|
||||
local_lock(&llock) preempt_disable()
|
||||
local_unlock(&llock) preempt_enable()
|
||||
local_lock_irq(&llock) local_irq_disable()
|
||||
local_unlock_irq(&llock) local_irq_enable()
|
||||
local_lock_save(&llock) local_irq_save()
|
||||
local_lock_restore(&llock) local_irq_save()
|
||||
=========================== ======================
|
||||
|
||||
The named scope of local_lock has two advantages over the regular
|
||||
primitives:
|
||||
|
||||
- The lock name allows static analysis and is also a clear documentation
|
||||
of the protection scope while the regular primitives are scopeless and
|
||||
opaque.
|
||||
|
||||
- If lockdep is enabled the local_lock gains a lockmap which allows to
|
||||
validate the correctness of the protection. This can detect cases where
|
||||
e.g. a function using preempt_disable() as protection mechanism is
|
||||
invoked from interrupt or soft-interrupt context. Aside of that
|
||||
lockdep_assert_held(&llock) works as with any other locking primitive.
|
||||
|
||||
local_lock and PREEMPT_RT
|
||||
-------------------------
|
||||
|
||||
PREEMPT_RT kernels map local_lock to a per-CPU spinlock_t, thus changing
|
||||
semantics:
|
||||
|
||||
- All spinlock_t changes also apply to local_lock.
|
||||
|
||||
local_lock usage
|
||||
----------------
|
||||
|
||||
local_lock should be used in situations where disabling preemption or
|
||||
interrupts is the appropriate form of concurrency control to protect
|
||||
per-CPU data structures on a non PREEMPT_RT kernel.
|
||||
|
||||
local_lock is not suitable to protect against preemption or interrupts on a
|
||||
PREEMPT_RT kernel due to the PREEMPT_RT specific spinlock_t semantics.
|
||||
|
||||
|
||||
raw_spinlock_t and spinlock_t
|
||||
=============================
|
||||
|
||||
@ -258,10 +324,82 @@ implementation, thus changing semantics:
|
||||
PREEMPT_RT caveats
|
||||
==================
|
||||
|
||||
local_lock on RT
|
||||
----------------
|
||||
|
||||
The mapping of local_lock to spinlock_t on PREEMPT_RT kernels has a few
|
||||
implications. For example, on a non-PREEMPT_RT kernel the following code
|
||||
sequence works as expected::
|
||||
|
||||
local_lock_irq(&local_lock);
|
||||
raw_spin_lock(&lock);
|
||||
|
||||
and is fully equivalent to::
|
||||
|
||||
raw_spin_lock_irq(&lock);
|
||||
|
||||
On a PREEMPT_RT kernel this code sequence breaks because local_lock_irq()
|
||||
is mapped to a per-CPU spinlock_t which neither disables interrupts nor
|
||||
preemption. The following code sequence works perfectly correct on both
|
||||
PREEMPT_RT and non-PREEMPT_RT kernels::
|
||||
|
||||
local_lock_irq(&local_lock);
|
||||
spin_lock(&lock);
|
||||
|
||||
Another caveat with local locks is that each local_lock has a specific
|
||||
protection scope. So the following substitution is wrong::
|
||||
|
||||
func1()
|
||||
{
|
||||
local_irq_save(flags); -> local_lock_irqsave(&local_lock_1, flags);
|
||||
func3();
|
||||
local_irq_restore(flags); -> local_lock_irqrestore(&local_lock_1, flags);
|
||||
}
|
||||
|
||||
func2()
|
||||
{
|
||||
local_irq_save(flags); -> local_lock_irqsave(&local_lock_2, flags);
|
||||
func3();
|
||||
local_irq_restore(flags); -> local_lock_irqrestore(&local_lock_2, flags);
|
||||
}
|
||||
|
||||
func3()
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
access_protected_data();
|
||||
}
|
||||
|
||||
On a non-PREEMPT_RT kernel this works correctly, but on a PREEMPT_RT kernel
|
||||
local_lock_1 and local_lock_2 are distinct and cannot serialize the callers
|
||||
of func3(). Also the lockdep assert will trigger on a PREEMPT_RT kernel
|
||||
because local_lock_irqsave() does not disable interrupts due to the
|
||||
PREEMPT_RT-specific semantics of spinlock_t. The correct substitution is::
|
||||
|
||||
func1()
|
||||
{
|
||||
local_irq_save(flags); -> local_lock_irqsave(&local_lock, flags);
|
||||
func3();
|
||||
local_irq_restore(flags); -> local_lock_irqrestore(&local_lock, flags);
|
||||
}
|
||||
|
||||
func2()
|
||||
{
|
||||
local_irq_save(flags); -> local_lock_irqsave(&local_lock, flags);
|
||||
func3();
|
||||
local_irq_restore(flags); -> local_lock_irqrestore(&local_lock, flags);
|
||||
}
|
||||
|
||||
func3()
|
||||
{
|
||||
lockdep_assert_held(&local_lock);
|
||||
access_protected_data();
|
||||
}
|
||||
|
||||
|
||||
spinlock_t and rwlock_t
|
||||
-----------------------
|
||||
|
||||
These changes in spinlock_t and rwlock_t semantics on PREEMPT_RT kernels
|
||||
The changes in spinlock_t and rwlock_t semantics on PREEMPT_RT kernels
|
||||
have a few implications. For example, on a non-PREEMPT_RT kernel the
|
||||
following code sequence works as expected::
|
||||
|
||||
@ -282,9 +420,61 @@ local_lock mechanism. Acquiring the local_lock pins the task to a CPU,
|
||||
allowing things like per-CPU interrupt disabled locks to be acquired.
|
||||
However, this approach should be used only where absolutely necessary.
|
||||
|
||||
A typical scenario is protection of per-CPU variables in thread context::
|
||||
|
||||
raw_spinlock_t
|
||||
--------------
|
||||
struct foo *p = get_cpu_ptr(&var1);
|
||||
|
||||
spin_lock(&p->lock);
|
||||
p->count += this_cpu_read(var2);
|
||||
|
||||
This is correct code on a non-PREEMPT_RT kernel, but on a PREEMPT_RT kernel
|
||||
this breaks. The PREEMPT_RT-specific change of spinlock_t semantics does
|
||||
not allow to acquire p->lock because get_cpu_ptr() implicitly disables
|
||||
preemption. The following substitution works on both kernels::
|
||||
|
||||
struct foo *p;
|
||||
|
||||
migrate_disable();
|
||||
p = this_cpu_ptr(&var1);
|
||||
spin_lock(&p->lock);
|
||||
p->count += this_cpu_read(var2);
|
||||
|
||||
On a non-PREEMPT_RT kernel migrate_disable() maps to preempt_disable()
|
||||
which makes the above code fully equivalent. On a PREEMPT_RT kernel
|
||||
migrate_disable() ensures that the task is pinned on the current CPU which
|
||||
in turn guarantees that the per-CPU access to var1 and var2 are staying on
|
||||
the same CPU.
|
||||
|
||||
The migrate_disable() substitution is not valid for the following
|
||||
scenario::
|
||||
|
||||
func()
|
||||
{
|
||||
struct foo *p;
|
||||
|
||||
migrate_disable();
|
||||
p = this_cpu_ptr(&var1);
|
||||
p->val = func2();
|
||||
|
||||
While correct on a non-PREEMPT_RT kernel, this breaks on PREEMPT_RT because
|
||||
here migrate_disable() does not protect against reentrancy from a
|
||||
preempting task. A correct substitution for this case is::
|
||||
|
||||
func()
|
||||
{
|
||||
struct foo *p;
|
||||
|
||||
local_lock(&foo_lock);
|
||||
p = this_cpu_ptr(&var1);
|
||||
p->val = func2();
|
||||
|
||||
On a non-PREEMPT_RT kernel this protects against reentrancy by disabling
|
||||
preemption. On a PREEMPT_RT kernel this is achieved by acquiring the
|
||||
underlying per-CPU spinlock.
|
||||
|
||||
|
||||
raw_spinlock_t on RT
|
||||
--------------------
|
||||
|
||||
Acquiring a raw_spinlock_t disables preemption and possibly also
|
||||
interrupts, so the critical section must avoid acquiring a regular
|
||||
@ -325,22 +515,25 @@ Lock type nesting rules
|
||||
|
||||
The most basic rules are:
|
||||
|
||||
- Lock types of the same lock category (sleeping, spinning) can nest
|
||||
arbitrarily as long as they respect the general lock ordering rules to
|
||||
prevent deadlocks.
|
||||
- Lock types of the same lock category (sleeping, CPU local, spinning)
|
||||
can nest arbitrarily as long as they respect the general lock ordering
|
||||
rules to prevent deadlocks.
|
||||
|
||||
- Sleeping lock types cannot nest inside spinning lock types.
|
||||
- Sleeping lock types cannot nest inside CPU local and spinning lock types.
|
||||
|
||||
- Spinning lock types can nest inside sleeping lock types.
|
||||
- CPU local and spinning lock types can nest inside sleeping lock types.
|
||||
|
||||
- Spinning lock types can nest inside all lock types
|
||||
|
||||
These constraints apply both in PREEMPT_RT and otherwise.
|
||||
|
||||
The fact that PREEMPT_RT changes the lock category of spinlock_t and
|
||||
rwlock_t from spinning to sleeping means that they cannot be acquired while
|
||||
holding a raw spinlock. This results in the following nesting ordering:
|
||||
rwlock_t from spinning to sleeping and substitutes local_lock with a
|
||||
per-CPU spinlock_t means that they cannot be acquired while holding a raw
|
||||
spinlock. This results in the following nesting ordering:
|
||||
|
||||
1) Sleeping locks
|
||||
2) spinlock_t and rwlock_t
|
||||
2) spinlock_t, rwlock_t, local_lock
|
||||
3) raw_spinlock_t and bit spinlocks
|
||||
|
||||
Lockdep will complain if these constraints are violated, both in
|
||||
|
@ -229,14 +229,6 @@ Adding support for it is easy: just define the macro in asm/ftrace.h and
|
||||
pass the return address pointer as the 'retp' argument to
|
||||
ftrace_push_return_trace().
|
||||
|
||||
HAVE_FTRACE_NMI_ENTER
|
||||
---------------------
|
||||
|
||||
If you can't trace NMI functions, then skip this option.
|
||||
|
||||
<details to be filled>
|
||||
|
||||
|
||||
HAVE_SYSCALL_TRACEPOINTS
|
||||
------------------------
|
||||
|
||||
|
@ -32,30 +32,70 @@ u64 smp_irq_stat_cpu(unsigned int cpu);
|
||||
|
||||
struct nmi_ctx {
|
||||
u64 hcr;
|
||||
unsigned int cnt;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts);
|
||||
|
||||
#define arch_nmi_enter() \
|
||||
do { \
|
||||
if (is_kernel_in_hyp_mode()) { \
|
||||
struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \
|
||||
nmi_ctx->hcr = read_sysreg(hcr_el2); \
|
||||
if (!(nmi_ctx->hcr & HCR_TGE)) { \
|
||||
write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \
|
||||
isb(); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
#define arch_nmi_enter() \
|
||||
do { \
|
||||
struct nmi_ctx *___ctx; \
|
||||
u64 ___hcr; \
|
||||
\
|
||||
if (!is_kernel_in_hyp_mode()) \
|
||||
break; \
|
||||
\
|
||||
___ctx = this_cpu_ptr(&nmi_contexts); \
|
||||
if (___ctx->cnt) { \
|
||||
___ctx->cnt++; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
___hcr = read_sysreg(hcr_el2); \
|
||||
if (!(___hcr & HCR_TGE)) { \
|
||||
write_sysreg(___hcr | HCR_TGE, hcr_el2); \
|
||||
isb(); \
|
||||
} \
|
||||
/* \
|
||||
* Make sure the sysreg write is performed before ___ctx->cnt \
|
||||
* is set to 1. NMIs that see cnt == 1 will rely on us. \
|
||||
*/ \
|
||||
barrier(); \
|
||||
___ctx->cnt = 1; \
|
||||
/* \
|
||||
* Make sure ___ctx->cnt is set before we save ___hcr. We \
|
||||
* don't want ___ctx->hcr to be overwritten. \
|
||||
*/ \
|
||||
barrier(); \
|
||||
___ctx->hcr = ___hcr; \
|
||||
} while (0)
|
||||
|
||||
#define arch_nmi_exit() \
|
||||
do { \
|
||||
if (is_kernel_in_hyp_mode()) { \
|
||||
struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \
|
||||
if (!(nmi_ctx->hcr & HCR_TGE)) \
|
||||
write_sysreg(nmi_ctx->hcr, hcr_el2); \
|
||||
} \
|
||||
} while (0)
|
||||
#define arch_nmi_exit() \
|
||||
do { \
|
||||
struct nmi_ctx *___ctx; \
|
||||
u64 ___hcr; \
|
||||
\
|
||||
if (!is_kernel_in_hyp_mode()) \
|
||||
break; \
|
||||
\
|
||||
___ctx = this_cpu_ptr(&nmi_contexts); \
|
||||
___hcr = ___ctx->hcr; \
|
||||
/* \
|
||||
* Make sure we read ___ctx->hcr before we release \
|
||||
* ___ctx->cnt as it makes ___ctx->hcr updatable again. \
|
||||
*/ \
|
||||
barrier(); \
|
||||
___ctx->cnt--; \
|
||||
/* \
|
||||
* Make sure ___ctx->cnt release is visible before we \
|
||||
* restore the sysreg. Otherwise a new NMI occurring \
|
||||
* right after write_sysreg() can be fooled and think \
|
||||
* we secured things for it. \
|
||||
*/ \
|
||||
barrier(); \
|
||||
if (!___ctx->cnt && !(___hcr & HCR_TGE)) \
|
||||
write_sysreg(___hcr, hcr_el2); \
|
||||
} while (0)
|
||||
|
||||
static inline void ack_bad_irq(unsigned int irq)
|
||||
{
|
||||
|
@ -251,22 +251,12 @@ asmlinkage __kprobes notrace unsigned long
|
||||
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
|
||||
{
|
||||
unsigned long ret;
|
||||
bool do_nmi_exit = false;
|
||||
|
||||
/*
|
||||
* nmi_enter() deals with printk() re-entrance and use of RCU when
|
||||
* RCU believed this CPU was idle. Because critical events can
|
||||
* interrupt normal events, we may already be in_nmi().
|
||||
*/
|
||||
if (!in_nmi()) {
|
||||
nmi_enter();
|
||||
do_nmi_exit = true;
|
||||
}
|
||||
nmi_enter();
|
||||
|
||||
ret = _sdei_handler(regs, arg);
|
||||
|
||||
if (do_nmi_exit)
|
||||
nmi_exit();
|
||||
nmi_exit();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -906,17 +906,13 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
|
||||
|
||||
asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
|
||||
{
|
||||
const bool was_in_nmi = in_nmi();
|
||||
|
||||
if (!was_in_nmi)
|
||||
nmi_enter();
|
||||
nmi_enter();
|
||||
|
||||
/* non-RAS errors are not containable */
|
||||
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
|
||||
arm64_serror_panic(regs, esr);
|
||||
|
||||
if (!was_in_nmi)
|
||||
nmi_exit();
|
||||
nmi_exit();
|
||||
}
|
||||
|
||||
asmlinkage void enter_from_user_mode(void)
|
||||
|
@ -441,15 +441,9 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
|
||||
void system_reset_exception(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long hsrr0, hsrr1;
|
||||
bool nested = in_nmi();
|
||||
bool saved_hsrrs = false;
|
||||
|
||||
/*
|
||||
* Avoid crashes in case of nested NMI exceptions. Recoverability
|
||||
* is determined by RI and in_nmi
|
||||
*/
|
||||
if (!nested)
|
||||
nmi_enter();
|
||||
nmi_enter();
|
||||
|
||||
/*
|
||||
* System reset can interrupt code where HSRRs are live and MSR[RI]=1.
|
||||
@ -521,8 +515,7 @@ void system_reset_exception(struct pt_regs *regs)
|
||||
mtspr(SPRN_HSRR1, hsrr1);
|
||||
}
|
||||
|
||||
if (!nested)
|
||||
nmi_exit();
|
||||
nmi_exit();
|
||||
|
||||
/* What should we do here? We could issue a shutdown or hard reset. */
|
||||
}
|
||||
@ -823,9 +816,8 @@ int machine_check_generic(struct pt_regs *regs)
|
||||
void machine_check_exception(struct pt_regs *regs)
|
||||
{
|
||||
int recover = 0;
|
||||
bool nested = in_nmi();
|
||||
if (!nested)
|
||||
nmi_enter();
|
||||
|
||||
nmi_enter();
|
||||
|
||||
__this_cpu_inc(irq_stat.mce_exceptions);
|
||||
|
||||
@ -851,8 +843,7 @@ void machine_check_exception(struct pt_regs *regs)
|
||||
if (check_io_access(regs))
|
||||
goto bail;
|
||||
|
||||
if (!nested)
|
||||
nmi_exit();
|
||||
nmi_exit();
|
||||
|
||||
die("Machine check", regs, SIGBUS);
|
||||
|
||||
@ -863,8 +854,7 @@ void machine_check_exception(struct pt_regs *regs)
|
||||
return;
|
||||
|
||||
bail:
|
||||
if (!nested)
|
||||
nmi_exit();
|
||||
nmi_exit();
|
||||
}
|
||||
|
||||
void SMIException(struct pt_regs *regs)
|
||||
|
@ -71,7 +71,6 @@ config SUPERH32
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_ARCH_KGDB
|
||||
|
@ -170,11 +170,21 @@ BUILD_TRAP_HANDLER(bug)
|
||||
force_sig(SIGTRAP);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
extern void arch_ftrace_nmi_enter(void);
|
||||
extern void arch_ftrace_nmi_exit(void);
|
||||
#else
|
||||
static inline void arch_ftrace_nmi_enter(void) { }
|
||||
static inline void arch_ftrace_nmi_exit(void) { }
|
||||
#endif
|
||||
|
||||
BUILD_TRAP_HANDLER(nmi)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
TRAP_HANDLER_DECL;
|
||||
|
||||
arch_ftrace_nmi_enter();
|
||||
|
||||
nmi_enter();
|
||||
nmi_count(cpu)++;
|
||||
|
||||
@ -190,4 +200,6 @@ BUILD_TRAP_HANDLER(nmi)
|
||||
}
|
||||
|
||||
nmi_exit();
|
||||
|
||||
arch_ftrace_nmi_exit();
|
||||
}
|
||||
|
@ -2758,7 +2758,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
|
||||
pxor INC, STATE4
|
||||
movdqu IV, 0x30(OUTP)
|
||||
|
||||
CALL_NOSPEC %r11
|
||||
CALL_NOSPEC r11
|
||||
|
||||
movdqu 0x00(OUTP), INC
|
||||
pxor INC, STATE1
|
||||
@ -2803,7 +2803,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
|
||||
_aesni_gf128mul_x_ble()
|
||||
movups IV, (IVP)
|
||||
|
||||
CALL_NOSPEC %r11
|
||||
CALL_NOSPEC r11
|
||||
|
||||
movdqu 0x40(OUTP), INC
|
||||
pxor INC, STATE1
|
||||
|
@ -1228,7 +1228,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
|
||||
vpxor 14 * 16(%rax), %xmm15, %xmm14;
|
||||
vpxor 15 * 16(%rax), %xmm15, %xmm15;
|
||||
|
||||
CALL_NOSPEC %r9;
|
||||
CALL_NOSPEC r9;
|
||||
|
||||
addq $(16 * 16), %rsp;
|
||||
|
||||
|
@ -1339,7 +1339,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
|
||||
vpxor 14 * 32(%rax), %ymm15, %ymm14;
|
||||
vpxor 15 * 32(%rax), %ymm15, %ymm15;
|
||||
|
||||
CALL_NOSPEC %r9;
|
||||
CALL_NOSPEC r9;
|
||||
|
||||
addq $(16 * 32), %rsp;
|
||||
|
||||
|
@ -75,7 +75,7 @@
|
||||
|
||||
.text
|
||||
SYM_FUNC_START(crc_pcl)
|
||||
#define bufp %rdi
|
||||
#define bufp rdi
|
||||
#define bufp_dw %edi
|
||||
#define bufp_w %di
|
||||
#define bufp_b %dil
|
||||
@ -105,9 +105,9 @@ SYM_FUNC_START(crc_pcl)
|
||||
## 1) ALIGN:
|
||||
################################################################
|
||||
|
||||
mov bufp, bufptmp # rdi = *buf
|
||||
neg bufp
|
||||
and $7, bufp # calculate the unalignment amount of
|
||||
mov %bufp, bufptmp # rdi = *buf
|
||||
neg %bufp
|
||||
and $7, %bufp # calculate the unalignment amount of
|
||||
# the address
|
||||
je proc_block # Skip if aligned
|
||||
|
||||
@ -123,13 +123,13 @@ SYM_FUNC_START(crc_pcl)
|
||||
do_align:
|
||||
#### Calculate CRC of unaligned bytes of the buffer (if any)
|
||||
movq (bufptmp), tmp # load a quadward from the buffer
|
||||
add bufp, bufptmp # align buffer pointer for quadword
|
||||
add %bufp, bufptmp # align buffer pointer for quadword
|
||||
# processing
|
||||
sub bufp, len # update buffer length
|
||||
sub %bufp, len # update buffer length
|
||||
align_loop:
|
||||
crc32b %bl, crc_init_dw # compute crc32 of 1-byte
|
||||
shr $8, tmp # get next byte
|
||||
dec bufp
|
||||
dec %bufp
|
||||
jne align_loop
|
||||
|
||||
proc_block:
|
||||
@ -169,10 +169,10 @@ continue_block:
|
||||
xor crc2, crc2
|
||||
|
||||
## branch into array
|
||||
lea jump_table(%rip), bufp
|
||||
movzxw (bufp, %rax, 2), len
|
||||
lea crc_array(%rip), bufp
|
||||
lea (bufp, len, 1), bufp
|
||||
lea jump_table(%rip), %bufp
|
||||
movzxw (%bufp, %rax, 2), len
|
||||
lea crc_array(%rip), %bufp
|
||||
lea (%bufp, len, 1), %bufp
|
||||
JMP_NOSPEC bufp
|
||||
|
||||
################################################################
|
||||
@ -218,9 +218,9 @@ LABEL crc_ %i
|
||||
## 4) Combine three results:
|
||||
################################################################
|
||||
|
||||
lea (K_table-8)(%rip), bufp # first entry is for idx 1
|
||||
lea (K_table-8)(%rip), %bufp # first entry is for idx 1
|
||||
shlq $3, %rax # rax *= 8
|
||||
pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2
|
||||
pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2
|
||||
leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
|
||||
subq %rax, tmp # tmp -= rax*24
|
||||
|
||||
|
@ -816,7 +816,7 @@ SYM_CODE_START(ret_from_fork)
|
||||
|
||||
/* kernel thread */
|
||||
1: movl %edi, %eax
|
||||
CALL_NOSPEC %ebx
|
||||
CALL_NOSPEC ebx
|
||||
/*
|
||||
* A kernel thread is allowed to return here after successfully
|
||||
* calling do_execve(). Exit to userspace to complete the execve()
|
||||
@ -1501,7 +1501,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
CALL_NOSPEC %edi
|
||||
CALL_NOSPEC edi
|
||||
jmp ret_from_exception
|
||||
SYM_CODE_END(common_exception_read_cr2)
|
||||
|
||||
@ -1522,7 +1522,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception)
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
CALL_NOSPEC %edi
|
||||
CALL_NOSPEC edi
|
||||
jmp ret_from_exception
|
||||
SYM_CODE_END(common_exception)
|
||||
|
||||
|
@ -348,7 +348,7 @@ SYM_CODE_START(ret_from_fork)
|
||||
/* kernel thread */
|
||||
UNWIND_HINT_EMPTY
|
||||
movq %r12, %rdi
|
||||
CALL_NOSPEC %rbx
|
||||
CALL_NOSPEC rbx
|
||||
/*
|
||||
* A kernel thread is allowed to return here after successfully
|
||||
* calling do_execve(). Exit to userspace to complete the execve()
|
||||
|
25
arch/x86/include/asm/GEN-for-each-reg.h
Normal file
25
arch/x86/include/asm/GEN-for-each-reg.h
Normal file
@ -0,0 +1,25 @@
|
||||
#ifdef CONFIG_64BIT
|
||||
GEN(rax)
|
||||
GEN(rbx)
|
||||
GEN(rcx)
|
||||
GEN(rdx)
|
||||
GEN(rsi)
|
||||
GEN(rdi)
|
||||
GEN(rbp)
|
||||
GEN(r8)
|
||||
GEN(r9)
|
||||
GEN(r10)
|
||||
GEN(r11)
|
||||
GEN(r12)
|
||||
GEN(r13)
|
||||
GEN(r14)
|
||||
GEN(r15)
|
||||
#else
|
||||
GEN(eax)
|
||||
GEN(ebx)
|
||||
GEN(ecx)
|
||||
GEN(edx)
|
||||
GEN(esi)
|
||||
GEN(edi)
|
||||
GEN(ebp)
|
||||
#endif
|
@ -17,24 +17,19 @@ extern void cmpxchg8b_emu(void);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
#ifdef CONFIG_X86_32
|
||||
#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
|
||||
#else
|
||||
#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
|
||||
INDIRECT_THUNK(8)
|
||||
INDIRECT_THUNK(9)
|
||||
INDIRECT_THUNK(10)
|
||||
INDIRECT_THUNK(11)
|
||||
INDIRECT_THUNK(12)
|
||||
INDIRECT_THUNK(13)
|
||||
INDIRECT_THUNK(14)
|
||||
INDIRECT_THUNK(15)
|
||||
#endif
|
||||
INDIRECT_THUNK(ax)
|
||||
INDIRECT_THUNK(bx)
|
||||
INDIRECT_THUNK(cx)
|
||||
INDIRECT_THUNK(dx)
|
||||
INDIRECT_THUNK(si)
|
||||
INDIRECT_THUNK(di)
|
||||
INDIRECT_THUNK(bp)
|
||||
|
||||
#define DECL_INDIRECT_THUNK(reg) \
|
||||
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
|
||||
|
||||
#define DECL_RETPOLINE(reg) \
|
||||
extern asmlinkage void __x86_retpoline_ ## reg (void);
|
||||
|
||||
#undef GEN
|
||||
#define GEN(reg) DECL_INDIRECT_THUNK(reg)
|
||||
#include <asm/GEN-for-each-reg.h>
|
||||
|
||||
#undef GEN
|
||||
#define GEN(reg) DECL_RETPOLINE(reg)
|
||||
#include <asm/GEN-for-each-reg.h>
|
||||
|
||||
#endif /* CONFIG_RETPOLINE */
|
||||
|
@ -4,20 +4,13 @@
|
||||
#define _ASM_X86_NOSPEC_BRANCH_H_
|
||||
|
||||
#include <linux/static_key.h>
|
||||
#include <linux/frame.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
/*
|
||||
* This should be used immediately before a retpoline alternative. It tells
|
||||
* objtool where the retpolines are so that it can make sense of the control
|
||||
* flow by just reading the original instruction(s) and ignoring the
|
||||
* alternatives.
|
||||
*/
|
||||
#define ANNOTATE_NOSPEC_ALTERNATIVE \
|
||||
ANNOTATE_IGNORE_ALTERNATIVE
|
||||
#include <asm/unwind_hints.h>
|
||||
|
||||
/*
|
||||
* Fill the CPU return stack buffer.
|
||||
@ -46,21 +39,25 @@
|
||||
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
|
||||
mov $(nr/2), reg; \
|
||||
771: \
|
||||
ANNOTATE_INTRA_FUNCTION_CALL; \
|
||||
call 772f; \
|
||||
773: /* speculation trap */ \
|
||||
UNWIND_HINT_EMPTY; \
|
||||
pause; \
|
||||
lfence; \
|
||||
jmp 773b; \
|
||||
772: \
|
||||
ANNOTATE_INTRA_FUNCTION_CALL; \
|
||||
call 774f; \
|
||||
775: /* speculation trap */ \
|
||||
UNWIND_HINT_EMPTY; \
|
||||
pause; \
|
||||
lfence; \
|
||||
jmp 775b; \
|
||||
774: \
|
||||
add $(BITS_PER_LONG/8) * 2, sp; \
|
||||
dec reg; \
|
||||
jnz 771b; \
|
||||
add $(BITS_PER_LONG/8) * nr, sp;
|
||||
jnz 771b;
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
@ -76,34 +73,6 @@
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
/*
|
||||
* These are the bare retpoline primitives for indirect jmp and call.
|
||||
* Do not use these directly; they only exist to make the ALTERNATIVE
|
||||
* invocation below less ugly.
|
||||
*/
|
||||
.macro RETPOLINE_JMP reg:req
|
||||
call .Ldo_rop_\@
|
||||
.Lspec_trap_\@:
|
||||
pause
|
||||
lfence
|
||||
jmp .Lspec_trap_\@
|
||||
.Ldo_rop_\@:
|
||||
mov \reg, (%_ASM_SP)
|
||||
ret
|
||||
.endm
|
||||
|
||||
/*
|
||||
* This is a wrapper around RETPOLINE_JMP so the called function in reg
|
||||
* returns to the instruction after the macro.
|
||||
*/
|
||||
.macro RETPOLINE_CALL reg:req
|
||||
jmp .Ldo_call_\@
|
||||
.Ldo_retpoline_jmp_\@:
|
||||
RETPOLINE_JMP \reg
|
||||
.Ldo_call_\@:
|
||||
call .Ldo_retpoline_jmp_\@
|
||||
.endm
|
||||
|
||||
/*
|
||||
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
|
||||
* indirect jmp/call which may be susceptible to the Spectre variant 2
|
||||
@ -111,23 +80,21 @@
|
||||
*/
|
||||
.macro JMP_NOSPEC reg:req
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
|
||||
__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
|
||||
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
|
||||
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
|
||||
__stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
|
||||
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
|
||||
#else
|
||||
jmp *\reg
|
||||
jmp *%\reg
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro CALL_NOSPEC reg:req
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
|
||||
__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
|
||||
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
|
||||
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
|
||||
__stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
|
||||
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
|
||||
#else
|
||||
call *\reg
|
||||
call *%\reg
|
||||
#endif
|
||||
.endm
|
||||
|
||||
@ -137,10 +104,8 @@
|
||||
*/
|
||||
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||
ALTERNATIVE "jmp .Lskip_rsb_\@", \
|
||||
__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
|
||||
\ftr
|
||||
ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
|
||||
__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
|
||||
.Lskip_rsb_\@:
|
||||
#endif
|
||||
.endm
|
||||
@ -161,16 +126,16 @@
|
||||
* which is ensured when CONFIG_RETPOLINE is defined.
|
||||
*/
|
||||
# define CALL_NOSPEC \
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE \
|
||||
ALTERNATIVE_2( \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%[thunk_target]\n", \
|
||||
"call __x86_indirect_thunk_%V[thunk_target]\n", \
|
||||
"call __x86_retpoline_%V[thunk_target]\n", \
|
||||
X86_FEATURE_RETPOLINE, \
|
||||
"lfence;\n" \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%[thunk_target]\n", \
|
||||
X86_FEATURE_RETPOLINE_AMD)
|
||||
|
||||
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
|
||||
|
||||
#else /* CONFIG_X86_32 */
|
||||
@ -180,7 +145,6 @@
|
||||
* here, anyway.
|
||||
*/
|
||||
# define CALL_NOSPEC \
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE \
|
||||
ALTERNATIVE_2( \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%[thunk_target]\n", \
|
||||
|
@ -58,8 +58,7 @@
|
||||
#define ORC_TYPE_CALL 0
|
||||
#define ORC_TYPE_REGS 1
|
||||
#define ORC_TYPE_REGS_IRET 2
|
||||
#define UNWIND_HINT_TYPE_SAVE 3
|
||||
#define UNWIND_HINT_TYPE_RESTORE 4
|
||||
#define UNWIND_HINT_TYPE_RET_OFFSET 3
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
/*
|
||||
|
@ -728,7 +728,6 @@ static inline void sync_core(void)
|
||||
unsigned int tmp;
|
||||
|
||||
asm volatile (
|
||||
UNWIND_HINT_SAVE
|
||||
"mov %%ss, %0\n\t"
|
||||
"pushq %q0\n\t"
|
||||
"pushq %%rsp\n\t"
|
||||
@ -738,7 +737,6 @@ static inline void sync_core(void)
|
||||
"pushq %q0\n\t"
|
||||
"pushq $1f\n\t"
|
||||
"iretq\n\t"
|
||||
UNWIND_HINT_RESTORE
|
||||
"1:"
|
||||
: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
|
||||
#endif
|
||||
|
@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
|
||||
X86_FEATURE_SMAP)
|
||||
asm volatile ("# smap_save\n\t"
|
||||
ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
|
||||
"pushf; pop %0; " __ASM_CLAC "\n\t"
|
||||
"1:"
|
||||
: "=rm" (flags) : : "memory", "cc");
|
||||
|
||||
return flags;
|
||||
@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void)
|
||||
|
||||
static __always_inline void smap_restore(unsigned long flags)
|
||||
{
|
||||
asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
|
||||
asm volatile ("# smap_restore\n\t"
|
||||
ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
|
||||
"push %0; popf\n\t"
|
||||
"1:"
|
||||
: : "g" (flags) : "memory", "cc");
|
||||
}
|
||||
|
||||
|
@ -118,11 +118,6 @@ void smp_spurious_interrupt(struct pt_regs *regs);
|
||||
void smp_error_interrupt(struct pt_regs *regs);
|
||||
asmlinkage void smp_irq_move_cleanup_interrupt(void);
|
||||
|
||||
extern void ist_enter(struct pt_regs *regs);
|
||||
extern void ist_exit(struct pt_regs *regs);
|
||||
extern void ist_begin_non_atomic(struct pt_regs *regs);
|
||||
extern void ist_end_non_atomic(void);
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
void __noreturn handle_stack_overflow(const char *message,
|
||||
struct pt_regs *regs,
|
||||
|
@ -86,32 +86,15 @@
|
||||
UNWIND_HINT sp_offset=\sp_offset
|
||||
.endm
|
||||
|
||||
.macro UNWIND_HINT_SAVE
|
||||
UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
|
||||
/*
|
||||
* RET_OFFSET: Used on instructions that terminate a function; mostly RETURN
|
||||
* and sibling calls. On these, sp_offset denotes the expected offset from
|
||||
* initial_func_cfi.
|
||||
*/
|
||||
.macro UNWIND_HINT_RET_OFFSET sp_offset=8
|
||||
UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset
|
||||
.endm
|
||||
|
||||
.macro UNWIND_HINT_RESTORE
|
||||
UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
|
||||
.endm
|
||||
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
|
||||
"987: \n\t" \
|
||||
".pushsection .discard.unwind_hints\n\t" \
|
||||
/* struct unwind_hint */ \
|
||||
".long 987b - .\n\t" \
|
||||
".short " __stringify(sp_offset) "\n\t" \
|
||||
".byte " __stringify(sp_reg) "\n\t" \
|
||||
".byte " __stringify(type) "\n\t" \
|
||||
".byte " __stringify(end) "\n\t" \
|
||||
".balign 4 \n\t" \
|
||||
".popsection\n\t"
|
||||
|
||||
#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0)
|
||||
|
||||
#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0)
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_X86_UNWIND_HINTS_H */
|
||||
|
@ -42,6 +42,8 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/set_memory.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/intel-family.h>
|
||||
#include <asm/processor.h>
|
||||
@ -1086,23 +1088,6 @@ static void mce_clear_state(unsigned long *toclear)
|
||||
}
|
||||
}
|
||||
|
||||
static int do_memory_failure(struct mce *m)
|
||||
{
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
int ret;
|
||||
|
||||
pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
|
||||
if (!(m->mcgstatus & MCG_STATUS_RIPV))
|
||||
flags |= MF_MUST_KILL;
|
||||
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
|
||||
if (ret)
|
||||
pr_err("Memory error not recovered");
|
||||
else
|
||||
set_mce_nospec(m->addr >> PAGE_SHIFT);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Cases where we avoid rendezvous handler timeout:
|
||||
* 1) If this CPU is offline.
|
||||
@ -1204,6 +1189,29 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
|
||||
*m = *final;
|
||||
}
|
||||
|
||||
static void kill_me_now(struct callback_head *ch)
|
||||
{
|
||||
force_sig(SIGBUS);
|
||||
}
|
||||
|
||||
static void kill_me_maybe(struct callback_head *cb)
|
||||
{
|
||||
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
|
||||
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
|
||||
if (!(p->mce_status & MCG_STATUS_RIPV))
|
||||
flags |= MF_MUST_KILL;
|
||||
|
||||
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_err("Memory error not recovered");
|
||||
kill_me_now(cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* The actual machine check handler. This only handles real
|
||||
* exceptions when something got corrupted coming in through int 18.
|
||||
@ -1222,7 +1230,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
|
||||
* backing the user stack, tracing that reads the user stack will cause
|
||||
* potentially infinite recursion.
|
||||
*/
|
||||
void notrace do_machine_check(struct pt_regs *regs, long error_code)
|
||||
void noinstr do_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
||||
@ -1259,7 +1267,7 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
|
||||
if (__mc_check_crashing_cpu(cpu))
|
||||
return;
|
||||
|
||||
ist_enter(regs);
|
||||
nmi_enter();
|
||||
|
||||
this_cpu_inc(mce_exception_count);
|
||||
|
||||
@ -1352,23 +1360,24 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
||||
/* Fault was in user mode and we need to take some action */
|
||||
if ((m.cs & 3) == 3) {
|
||||
ist_begin_non_atomic(regs);
|
||||
local_irq_enable();
|
||||
/* If this triggers there is no way to recover. Die hard. */
|
||||
BUG_ON(!on_thread_stack() || !user_mode(regs));
|
||||
|
||||
if (kill_it || do_memory_failure(&m))
|
||||
force_sig(SIGBUS);
|
||||
local_irq_disable();
|
||||
ist_end_non_atomic();
|
||||
current->mce_addr = m.addr;
|
||||
current->mce_status = m.mcgstatus;
|
||||
current->mce_kill_me.func = kill_me_maybe;
|
||||
if (kill_it)
|
||||
current->mce_kill_me.func = kill_me_now;
|
||||
task_work_add(current, ¤t->mce_kill_me, true);
|
||||
} else {
|
||||
if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
|
||||
mce_panic("Failed kernel mode recovery", &m, msg);
|
||||
}
|
||||
|
||||
out_ist:
|
||||
ist_exit(regs);
|
||||
nmi_exit();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_machine_check);
|
||||
NOKPROBE_SYMBOL(do_machine_check);
|
||||
|
||||
#ifndef CONFIG_MEMORY_FAILURE
|
||||
int memory_failure(unsigned long pfn, int flags)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/traps.h>
|
||||
@ -24,7 +25,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
u32 loaddr, hi, lotype;
|
||||
|
||||
ist_enter(regs);
|
||||
nmi_enter();
|
||||
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
@ -39,7 +40,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs);
|
||||
nmi_exit();
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/traps.h>
|
||||
@ -18,12 +19,12 @@
|
||||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
ist_enter(regs);
|
||||
nmi_enter();
|
||||
|
||||
pr_emerg("CPU0: Machine Check Exception.\n");
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs);
|
||||
nmi_exit();
|
||||
}
|
||||
|
||||
/* Set up machine check reporting on the Winchip C6 series */
|
||||
|
@ -282,7 +282,8 @@ static inline void tramp_free(void *tramp) { }
|
||||
|
||||
/* Defined as markers to the end of the ftrace default trampolines */
|
||||
extern void ftrace_regs_caller_end(void);
|
||||
extern void ftrace_epilogue(void);
|
||||
extern void ftrace_regs_caller_ret(void);
|
||||
extern void ftrace_caller_end(void);
|
||||
extern void ftrace_caller_op_ptr(void);
|
||||
extern void ftrace_regs_caller_op_ptr(void);
|
||||
|
||||
@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
||||
call_offset = (unsigned long)ftrace_regs_call;
|
||||
} else {
|
||||
start_offset = (unsigned long)ftrace_caller;
|
||||
end_offset = (unsigned long)ftrace_epilogue;
|
||||
end_offset = (unsigned long)ftrace_caller_end;
|
||||
op_offset = (unsigned long)ftrace_caller_op_ptr;
|
||||
call_offset = (unsigned long)ftrace_call;
|
||||
}
|
||||
@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
||||
if (WARN_ON(ret < 0))
|
||||
goto fail;
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
|
||||
ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
|
||||
ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
|
||||
if (WARN_ON(ret < 0))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* The address of the ftrace_ops that is used for this trampoline
|
||||
* is stored at the end of the trampoline. This will be used to
|
||||
@ -433,7 +441,7 @@ void set_ftrace_ops_ro(void)
|
||||
end_offset = (unsigned long)ftrace_regs_caller_end;
|
||||
} else {
|
||||
start_offset = (unsigned long)ftrace_caller;
|
||||
end_offset = (unsigned long)ftrace_epilogue;
|
||||
end_offset = (unsigned long)ftrace_caller_end;
|
||||
}
|
||||
size = end_offset - start_offset;
|
||||
size = size + RET_SIZE + sizeof(void *);
|
||||
|
@ -189,5 +189,5 @@ return_to_handler:
|
||||
movl %eax, %ecx
|
||||
popl %edx
|
||||
popl %eax
|
||||
JMP_NOSPEC %ecx
|
||||
JMP_NOSPEC ecx
|
||||
#endif
|
||||
|
@ -23,7 +23,7 @@
|
||||
#endif /* CONFIG_FRAME_POINTER */
|
||||
|
||||
/* Size of stack used to save mcount regs in save_mcount_regs */
|
||||
#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE)
|
||||
#define MCOUNT_REG_SIZE (FRAME_SIZE + MCOUNT_FRAME_SIZE)
|
||||
|
||||
/*
|
||||
* gcc -pg option adds a call to 'mcount' in most functions.
|
||||
@ -77,7 +77,7 @@
|
||||
/*
|
||||
* We add enough stack to save all regs.
|
||||
*/
|
||||
subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp
|
||||
subq $(FRAME_SIZE), %rsp
|
||||
movq %rax, RAX(%rsp)
|
||||
movq %rcx, RCX(%rsp)
|
||||
movq %rdx, RDX(%rsp)
|
||||
@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
|
||||
* think twice before adding any new code or changing the
|
||||
* layout here.
|
||||
*/
|
||||
SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
|
||||
SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
|
||||
|
||||
jmp ftrace_epilogue
|
||||
SYM_FUNC_END(ftrace_caller);
|
||||
|
||||
SYM_FUNC_START(ftrace_epilogue)
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
|
||||
jmp ftrace_stub
|
||||
@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
|
||||
*/
|
||||
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
|
||||
retq
|
||||
SYM_FUNC_END(ftrace_caller)
|
||||
SYM_FUNC_END(ftrace_epilogue)
|
||||
|
||||
SYM_FUNC_START(ftrace_regs_caller)
|
||||
/* Save the current flags before any operations that can change them */
|
||||
pushfq
|
||||
|
||||
UNWIND_HINT_SAVE
|
||||
|
||||
/* added 8 bytes to save flags */
|
||||
save_mcount_regs 8
|
||||
/* save_mcount_regs fills in first two parameters */
|
||||
@ -233,10 +235,13 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
|
||||
movq ORIG_RAX(%rsp), %rax
|
||||
movq %rax, MCOUNT_REG_SIZE-8(%rsp)
|
||||
|
||||
/* If ORIG_RAX is anything but zero, make this a call to that */
|
||||
/*
|
||||
* If ORIG_RAX is anything but zero, make this a call to that.
|
||||
* See arch_ftrace_set_direct_caller().
|
||||
*/
|
||||
movq ORIG_RAX(%rsp), %rax
|
||||
cmpq $0, %rax
|
||||
je 1f
|
||||
testq %rax, %rax
|
||||
jz 1f
|
||||
|
||||
/* Swap the flags with orig_rax */
|
||||
movq MCOUNT_REG_SIZE(%rsp), %rdi
|
||||
@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
|
||||
movq %rax, MCOUNT_REG_SIZE(%rsp)
|
||||
|
||||
restore_mcount_regs 8
|
||||
/* Restore flags */
|
||||
popfq
|
||||
|
||||
jmp 2f
|
||||
SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
|
||||
UNWIND_HINT_RET_OFFSET
|
||||
jmp ftrace_epilogue
|
||||
|
||||
1: restore_mcount_regs
|
||||
|
||||
|
||||
2:
|
||||
/*
|
||||
* The stack layout is nondetermistic here, depending on which path was
|
||||
* taken. This confuses objtool and ORC, rightfully so. For now,
|
||||
* pretend the stack always looks like the non-direct case.
|
||||
*/
|
||||
UNWIND_HINT_RESTORE
|
||||
|
||||
/* Restore flags */
|
||||
popfq
|
||||
|
||||
@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
|
||||
* to the return.
|
||||
*/
|
||||
SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
|
||||
|
||||
jmp ftrace_epilogue
|
||||
|
||||
SYM_FUNC_END(ftrace_regs_caller)
|
||||
@ -303,7 +301,7 @@ trace:
|
||||
* function tracing is enabled.
|
||||
*/
|
||||
movq ftrace_trace_function, %r8
|
||||
CALL_NOSPEC %r8
|
||||
CALL_NOSPEC r8
|
||||
restore_mcount_regs
|
||||
|
||||
jmp fgraph_trace
|
||||
@ -340,6 +338,6 @@ SYM_CODE_START(return_to_handler)
|
||||
movq 8(%rsp), %rdx
|
||||
movq (%rsp), %rax
|
||||
addq $24, %rsp
|
||||
JMP_NOSPEC %rdi
|
||||
JMP_NOSPEC rdi
|
||||
SYM_CODE_END(return_to_handler)
|
||||
#endif
|
||||
|
@ -37,10 +37,12 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/traps.h>
|
||||
@ -82,78 +84,6 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
/*
|
||||
* In IST context, we explicitly disable preemption. This serves two
|
||||
* purposes: it makes it much less likely that we would accidentally
|
||||
* schedule in IST context and it will force a warning if we somehow
|
||||
* manage to schedule by accident.
|
||||
*/
|
||||
void ist_enter(struct pt_regs *regs)
|
||||
{
|
||||
if (user_mode(regs)) {
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
} else {
|
||||
/*
|
||||
* We might have interrupted pretty much anything. In
|
||||
* fact, if we're a machine check, we can even interrupt
|
||||
* NMI processing. We don't want in_nmi() to return true,
|
||||
* but we need to notify RCU.
|
||||
*/
|
||||
rcu_nmi_enter();
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/* This code is a bit fragile. Test it. */
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
|
||||
}
|
||||
NOKPROBE_SYMBOL(ist_enter);
|
||||
|
||||
void ist_exit(struct pt_regs *regs)
|
||||
{
|
||||
preempt_enable_no_resched();
|
||||
|
||||
if (!user_mode(regs))
|
||||
rcu_nmi_exit();
|
||||
}
|
||||
|
||||
/**
|
||||
* ist_begin_non_atomic() - begin a non-atomic section in an IST exception
|
||||
* @regs: regs passed to the IST exception handler
|
||||
*
|
||||
* IST exception handlers normally cannot schedule. As a special
|
||||
* exception, if the exception interrupted userspace code (i.e.
|
||||
* user_mode(regs) would return true) and the exception was not
|
||||
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
|
||||
* begins a non-atomic section within an ist_enter()/ist_exit() region.
|
||||
* Callers are responsible for enabling interrupts themselves inside
|
||||
* the non-atomic section, and callers must call ist_end_non_atomic()
|
||||
* before ist_exit().
|
||||
*/
|
||||
void ist_begin_non_atomic(struct pt_regs *regs)
|
||||
{
|
||||
BUG_ON(!user_mode(regs));
|
||||
|
||||
/*
|
||||
* Sanity check: we need to be on the normal thread stack. This
|
||||
* will catch asm bugs and any attempt to use ist_preempt_enable
|
||||
* from double_fault.
|
||||
*/
|
||||
BUG_ON(!on_thread_stack());
|
||||
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
/**
|
||||
* ist_end_non_atomic() - begin a non-atomic section in an IST exception
|
||||
*
|
||||
* Ends a non-atomic section started with ist_begin_non_atomic().
|
||||
*/
|
||||
void ist_end_non_atomic(void)
|
||||
{
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
int is_valid_bugaddr(unsigned long addr)
|
||||
{
|
||||
unsigned short ud;
|
||||
@ -363,7 +293,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
|
||||
* The net result is that our #GP handler will think that we
|
||||
* entered from usermode with the bad user context.
|
||||
*
|
||||
* No need for ist_enter here because we don't use RCU.
|
||||
* No need for nmi_enter() here because we don't use RCU.
|
||||
*/
|
||||
if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
|
||||
regs->cs == __KERNEL_CS &&
|
||||
@ -398,7 +328,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
|
||||
}
|
||||
#endif
|
||||
|
||||
ist_enter(regs);
|
||||
nmi_enter();
|
||||
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
@ -592,19 +522,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Unlike any other non-IST entry, we can be called from a kprobe in
|
||||
* non-CONTEXT_KERNEL kernel mode or even during context tracking
|
||||
* state changes. Make sure that we wake up RCU even if we're coming
|
||||
* from kernel code.
|
||||
*
|
||||
* This means that we can't schedule even if we came from a
|
||||
* preemptible kernel context. That's okay.
|
||||
* Unlike any other non-IST entry, we can be called from pretty much
|
||||
* any location in the kernel through kprobes -- text_poke() will most
|
||||
* likely be handled by poke_int3_handler() above. This means this
|
||||
* handler is effectively NMI-like.
|
||||
*/
|
||||
if (!user_mode(regs)) {
|
||||
rcu_nmi_enter();
|
||||
preempt_disable();
|
||||
}
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
if (!user_mode(regs))
|
||||
nmi_enter();
|
||||
|
||||
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
@ -626,10 +550,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
cond_local_irq_disable(regs);
|
||||
|
||||
exit:
|
||||
if (!user_mode(regs)) {
|
||||
preempt_enable_no_resched();
|
||||
rcu_nmi_exit();
|
||||
}
|
||||
if (!user_mode(regs))
|
||||
nmi_exit();
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_int3);
|
||||
|
||||
@ -733,7 +655,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
unsigned long dr6;
|
||||
int si_code;
|
||||
|
||||
ist_enter(regs);
|
||||
nmi_enter();
|
||||
|
||||
get_debugreg(dr6, 6);
|
||||
/*
|
||||
@ -826,7 +748,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
||||
ist_exit(regs);
|
||||
nmi_exit();
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_debug);
|
||||
|
||||
|
@ -153,7 +153,7 @@ SYM_FUNC_START(csum_partial)
|
||||
negl %ebx
|
||||
lea 45f(%ebx,%ebx,2), %ebx
|
||||
testl %esi, %esi
|
||||
JMP_NOSPEC %ebx
|
||||
JMP_NOSPEC ebx
|
||||
|
||||
# Handle 2-byte-aligned regions
|
||||
20: addw (%esi), %ax
|
||||
@ -436,7 +436,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
|
||||
andl $-32,%edx
|
||||
lea 3f(%ebx,%ebx), %ebx
|
||||
testl %esi, %esi
|
||||
JMP_NOSPEC %ebx
|
||||
JMP_NOSPEC ebx
|
||||
1: addl $64,%esi
|
||||
addl $64,%edi
|
||||
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
|
||||
|
@ -7,15 +7,31 @@
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/export.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/unwind_hints.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
.macro THUNK reg
|
||||
.section .text.__x86.indirect_thunk
|
||||
|
||||
.align 32
|
||||
SYM_FUNC_START(__x86_indirect_thunk_\reg)
|
||||
CFI_STARTPROC
|
||||
JMP_NOSPEC %\reg
|
||||
CFI_ENDPROC
|
||||
JMP_NOSPEC \reg
|
||||
SYM_FUNC_END(__x86_indirect_thunk_\reg)
|
||||
|
||||
SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
|
||||
ANNOTATE_INTRA_FUNCTION_CALL
|
||||
call .Ldo_rop_\@
|
||||
.Lspec_trap_\@:
|
||||
UNWIND_HINT_EMPTY
|
||||
pause
|
||||
lfence
|
||||
jmp .Lspec_trap_\@
|
||||
.Ldo_rop_\@:
|
||||
mov %\reg, (%_ASM_SP)
|
||||
UNWIND_HINT_RET_OFFSET
|
||||
ret
|
||||
SYM_FUNC_END(__x86_retpoline_\reg)
|
||||
|
||||
.endm
|
||||
|
||||
/*
|
||||
@ -24,25 +40,24 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
|
||||
* only see one instance of "__x86_indirect_thunk_\reg" rather
|
||||
* than one per register with the correct names. So we do it
|
||||
* the simple and nasty way...
|
||||
*
|
||||
* Worse, you can only have a single EXPORT_SYMBOL per line,
|
||||
* and CPP can't insert newlines, so we have to repeat everything
|
||||
* at least twice.
|
||||
*/
|
||||
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
|
||||
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
|
||||
#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
|
||||
|
||||
GENERATE_THUNK(_ASM_AX)
|
||||
GENERATE_THUNK(_ASM_BX)
|
||||
GENERATE_THUNK(_ASM_CX)
|
||||
GENERATE_THUNK(_ASM_DX)
|
||||
GENERATE_THUNK(_ASM_SI)
|
||||
GENERATE_THUNK(_ASM_DI)
|
||||
GENERATE_THUNK(_ASM_BP)
|
||||
#ifdef CONFIG_64BIT
|
||||
GENERATE_THUNK(r8)
|
||||
GENERATE_THUNK(r9)
|
||||
GENERATE_THUNK(r10)
|
||||
GENERATE_THUNK(r11)
|
||||
GENERATE_THUNK(r12)
|
||||
GENERATE_THUNK(r13)
|
||||
GENERATE_THUNK(r14)
|
||||
GENERATE_THUNK(r15)
|
||||
#endif
|
||||
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
|
||||
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
|
||||
#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg)
|
||||
|
||||
#undef GEN
|
||||
#define GEN(reg) THUNK reg
|
||||
#include <asm/GEN-for-each-reg.h>
|
||||
|
||||
#undef GEN
|
||||
#define GEN(reg) EXPORT_THUNK(reg)
|
||||
#include <asm/GEN-for-each-reg.h>
|
||||
|
||||
#undef GEN
|
||||
#define GEN(reg) EXPORT_RETPOLINE(reg)
|
||||
#include <asm/GEN-for-each-reg.h>
|
||||
|
@ -21,7 +21,7 @@ SYM_FUNC_START(__efi_call)
|
||||
mov %r8, %r9
|
||||
mov %rcx, %r8
|
||||
mov %rsi, %rcx
|
||||
CALL_NOSPEC %rdi
|
||||
CALL_NOSPEC rdi
|
||||
leave
|
||||
ret
|
||||
SYM_FUNC_END(__efi_call)
|
||||
|
@ -37,19 +37,16 @@ static void zcomp_strm_free(struct zcomp_strm *zstrm)
|
||||
if (!IS_ERR_OR_NULL(zstrm->tfm))
|
||||
crypto_free_comp(zstrm->tfm);
|
||||
free_pages((unsigned long)zstrm->buffer, 1);
|
||||
kfree(zstrm);
|
||||
zstrm->tfm = NULL;
|
||||
zstrm->buffer = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* allocate new zcomp_strm structure with ->tfm initialized by
|
||||
* backend, return NULL on error
|
||||
* Initialize zcomp_strm structure with ->tfm initialized by backend, and
|
||||
* ->buffer. Return a negative value on error.
|
||||
*/
|
||||
static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
|
||||
static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp)
|
||||
{
|
||||
struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL);
|
||||
if (!zstrm)
|
||||
return NULL;
|
||||
|
||||
zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0);
|
||||
/*
|
||||
* allocate 2 pages. 1 for compressed data, plus 1 extra for the
|
||||
@ -58,9 +55,9 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
|
||||
zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
|
||||
if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) {
|
||||
zcomp_strm_free(zstrm);
|
||||
zstrm = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
return zstrm;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool zcomp_available_algorithm(const char *comp)
|
||||
@ -113,12 +110,13 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
|
||||
|
||||
struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
|
||||
{
|
||||
return *get_cpu_ptr(comp->stream);
|
||||
local_lock(&comp->stream->lock);
|
||||
return this_cpu_ptr(comp->stream);
|
||||
}
|
||||
|
||||
void zcomp_stream_put(struct zcomp *comp)
|
||||
{
|
||||
put_cpu_ptr(comp->stream);
|
||||
local_unlock(&comp->stream->lock);
|
||||
}
|
||||
|
||||
int zcomp_compress(struct zcomp_strm *zstrm,
|
||||
@ -159,17 +157,15 @@ int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct zcomp *comp = hlist_entry(node, struct zcomp, node);
|
||||
struct zcomp_strm *zstrm;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
|
||||
return 0;
|
||||
zstrm = per_cpu_ptr(comp->stream, cpu);
|
||||
local_lock_init(&zstrm->lock);
|
||||
|
||||
zstrm = zcomp_strm_alloc(comp);
|
||||
if (IS_ERR_OR_NULL(zstrm)) {
|
||||
ret = zcomp_strm_init(zstrm, comp);
|
||||
if (ret)
|
||||
pr_err("Can't allocate a compression stream\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
*per_cpu_ptr(comp->stream, cpu) = zstrm;
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node)
|
||||
@ -177,10 +173,8 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node)
|
||||
struct zcomp *comp = hlist_entry(node, struct zcomp, node);
|
||||
struct zcomp_strm *zstrm;
|
||||
|
||||
zstrm = *per_cpu_ptr(comp->stream, cpu);
|
||||
if (!IS_ERR_OR_NULL(zstrm))
|
||||
zcomp_strm_free(zstrm);
|
||||
*per_cpu_ptr(comp->stream, cpu) = NULL;
|
||||
zstrm = per_cpu_ptr(comp->stream, cpu);
|
||||
zcomp_strm_free(zstrm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -188,7 +182,7 @@ static int zcomp_init(struct zcomp *comp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
comp->stream = alloc_percpu(struct zcomp_strm *);
|
||||
comp->stream = alloc_percpu(struct zcomp_strm);
|
||||
if (!comp->stream)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -5,8 +5,11 @@
|
||||
|
||||
#ifndef _ZCOMP_H_
|
||||
#define _ZCOMP_H_
|
||||
#include <linux/local_lock.h>
|
||||
|
||||
struct zcomp_strm {
|
||||
/* The members ->buffer and ->tfm are protected by ->lock. */
|
||||
local_lock_t lock;
|
||||
/* compression/decompression buffer */
|
||||
void *buffer;
|
||||
struct crypto_comp *tfm;
|
||||
@ -14,7 +17,7 @@ struct zcomp_strm {
|
||||
|
||||
/* dynamic per-device compression frontend */
|
||||
struct zcomp {
|
||||
struct zcomp_strm * __percpu *stream;
|
||||
struct zcomp_strm __percpu *stream;
|
||||
const char *name;
|
||||
struct hlist_node node;
|
||||
};
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/pid_namespace.h>
|
||||
|
||||
#include <linux/cn_proc.h>
|
||||
#include <linux/local_lock.h>
|
||||
|
||||
/*
|
||||
* Size of a cn_msg followed by a proc_event structure. Since the
|
||||
@ -38,25 +39,31 @@ static inline struct cn_msg *buffer_to_cn_msg(__u8 *buffer)
|
||||
static atomic_t proc_event_num_listeners = ATOMIC_INIT(0);
|
||||
static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
|
||||
|
||||
/* proc_event_counts is used as the sequence number of the netlink message */
|
||||
static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
|
||||
/* local_event.count is used as the sequence number of the netlink message */
|
||||
struct local_event {
|
||||
local_lock_t lock;
|
||||
__u32 count;
|
||||
};
|
||||
static DEFINE_PER_CPU(struct local_event, local_event) = {
|
||||
.lock = INIT_LOCAL_LOCK(lock),
|
||||
};
|
||||
|
||||
static inline void send_msg(struct cn_msg *msg)
|
||||
{
|
||||
preempt_disable();
|
||||
local_lock(&local_event.lock);
|
||||
|
||||
msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
|
||||
msg->seq = __this_cpu_inc_return(local_event.count) - 1;
|
||||
((struct proc_event *)msg->data)->cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* Preemption remains disabled during send to ensure the messages are
|
||||
* ordered according to their sequence numbers.
|
||||
* local_lock() disables preemption during send to ensure the messages
|
||||
* are ordered according to their sequence numbers.
|
||||
*
|
||||
* If cn_netlink_send() fails, the data is not sent.
|
||||
*/
|
||||
cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
|
||||
|
||||
preempt_enable();
|
||||
local_unlock(&local_event.lock);
|
||||
}
|
||||
|
||||
void proc_fork_connector(struct task_struct *task)
|
||||
|
@ -5499,7 +5499,7 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port)
|
||||
{
|
||||
struct drm_dp_mst_port *immediate_upstream_port;
|
||||
struct drm_dp_mst_port *fec_port;
|
||||
struct drm_dp_desc desc = { 0 };
|
||||
struct drm_dp_desc desc = { };
|
||||
u8 endpoint_fec;
|
||||
u8 endpoint_dsc;
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/local_lock.h>
|
||||
|
||||
#include "squashfs_fs.h"
|
||||
#include "squashfs_fs_sb.h"
|
||||
@ -20,7 +21,8 @@
|
||||
*/
|
||||
|
||||
struct squashfs_stream {
|
||||
void *stream;
|
||||
void *stream;
|
||||
local_lock_t lock;
|
||||
};
|
||||
|
||||
void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
|
||||
@ -41,6 +43,7 @@ void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
|
||||
err = PTR_ERR(stream->stream);
|
||||
goto out;
|
||||
}
|
||||
local_lock_init(&stream->lock);
|
||||
}
|
||||
|
||||
kfree(comp_opts);
|
||||
@ -75,12 +78,16 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
|
||||
int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
|
||||
int b, int offset, int length, struct squashfs_page_actor *output)
|
||||
{
|
||||
struct squashfs_stream __percpu *percpu =
|
||||
(struct squashfs_stream __percpu *) msblk->stream;
|
||||
struct squashfs_stream *stream = get_cpu_ptr(percpu);
|
||||
int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
|
||||
offset, length, output);
|
||||
put_cpu_ptr(stream);
|
||||
struct squashfs_stream *stream;
|
||||
int res;
|
||||
|
||||
local_lock(&msblk->stream->lock);
|
||||
stream = this_cpu_ptr(msblk->stream);
|
||||
|
||||
res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
|
||||
offset, length, output);
|
||||
|
||||
local_unlock(&msblk->stream->lock);
|
||||
|
||||
if (res < 0)
|
||||
ERROR("%s decompression failed, data probably corrupt\n",
|
||||
|
@ -15,9 +15,20 @@
|
||||
static void __used __section(.discard.func_stack_frame_non_standard) \
|
||||
*__func_stack_frame_non_standard_##func = func
|
||||
|
||||
/*
|
||||
* This macro indicates that the following intra-function call is valid.
|
||||
* Any non-annotated intra-function call will cause objtool to issue a warning.
|
||||
*/
|
||||
#define ANNOTATE_INTRA_FUNCTION_CALL \
|
||||
999: \
|
||||
.pushsection .discard.intra_function_calls; \
|
||||
.long 999b; \
|
||||
.popsection;
|
||||
|
||||
#else /* !CONFIG_STACK_VALIDATION */
|
||||
|
||||
#define STACK_FRAME_NON_STANDARD(func)
|
||||
#define ANNOTATE_INTRA_FUNCTION_CALL
|
||||
|
||||
#endif /* CONFIG_STACK_VALIDATION */
|
||||
|
||||
|
@ -2,15 +2,6 @@
|
||||
#ifndef _LINUX_FTRACE_IRQ_H
|
||||
#define _LINUX_FTRACE_IRQ_H
|
||||
|
||||
|
||||
#ifdef CONFIG_FTRACE_NMI_ENTER
|
||||
extern void arch_ftrace_nmi_enter(void);
|
||||
extern void arch_ftrace_nmi_exit(void);
|
||||
#else
|
||||
static inline void arch_ftrace_nmi_enter(void) { }
|
||||
static inline void arch_ftrace_nmi_exit(void) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HWLAT_TRACER
|
||||
extern bool trace_hwlat_callback_enabled;
|
||||
extern void trace_hwlat_callback(bool enter);
|
||||
@ -22,12 +13,10 @@ static inline void ftrace_nmi_enter(void)
|
||||
if (trace_hwlat_callback_enabled)
|
||||
trace_hwlat_callback(true);
|
||||
#endif
|
||||
arch_ftrace_nmi_enter();
|
||||
}
|
||||
|
||||
static inline void ftrace_nmi_exit(void)
|
||||
{
|
||||
arch_ftrace_nmi_exit();
|
||||
#ifdef CONFIG_HWLAT_TRACER
|
||||
if (trace_hwlat_callback_enabled)
|
||||
trace_hwlat_callback(false);
|
||||
|
@ -2,31 +2,28 @@
|
||||
#ifndef LINUX_HARDIRQ_H
|
||||
#define LINUX_HARDIRQ_H
|
||||
|
||||
#include <linux/context_tracking_state.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/ftrace_irq.h>
|
||||
#include <linux/vtime.h>
|
||||
#include <asm/hardirq.h>
|
||||
|
||||
|
||||
extern void synchronize_irq(unsigned int irq);
|
||||
extern bool synchronize_hardirq(unsigned int irq);
|
||||
|
||||
#if defined(CONFIG_TINY_RCU)
|
||||
|
||||
static inline void rcu_nmi_enter(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void rcu_nmi_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
void __rcu_irq_enter_check_tick(void);
|
||||
#else
|
||||
extern void rcu_nmi_enter(void);
|
||||
extern void rcu_nmi_exit(void);
|
||||
static inline void __rcu_irq_enter_check_tick(void) { }
|
||||
#endif
|
||||
|
||||
static __always_inline void rcu_irq_enter_check_tick(void)
|
||||
{
|
||||
if (context_tracking_enabled())
|
||||
__rcu_irq_enter_check_tick();
|
||||
}
|
||||
|
||||
/*
|
||||
* It is safe to do non-atomic ops on ->hardirq_context,
|
||||
* because NMI handlers may not preempt and the ops are
|
||||
@ -65,14 +62,34 @@ extern void irq_exit(void);
|
||||
#define arch_nmi_exit() do { } while (0)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
static inline void rcu_nmi_enter(void) { }
|
||||
static inline void rcu_nmi_exit(void) { }
|
||||
#else
|
||||
extern void rcu_nmi_enter(void);
|
||||
extern void rcu_nmi_exit(void);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NMI vs Tracing
|
||||
* --------------
|
||||
*
|
||||
* We must not land in a tracer until (or after) we've changed preempt_count
|
||||
* such that in_nmi() becomes true. To that effect all NMI C entry points must
|
||||
* be marked 'notrace' and call nmi_enter() as soon as possible.
|
||||
*/
|
||||
|
||||
/*
|
||||
* nmi_enter() can nest up to 15 times; see NMI_BITS.
|
||||
*/
|
||||
#define nmi_enter() \
|
||||
do { \
|
||||
arch_nmi_enter(); \
|
||||
printk_nmi_enter(); \
|
||||
lockdep_off(); \
|
||||
ftrace_nmi_enter(); \
|
||||
BUG_ON(in_nmi()); \
|
||||
preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
|
||||
BUG_ON(in_nmi() == NMI_MASK); \
|
||||
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
|
||||
rcu_nmi_enter(); \
|
||||
lockdep_hardirq_enter(); \
|
||||
} while (0)
|
||||
@ -82,7 +99,7 @@ extern void irq_exit(void);
|
||||
lockdep_hardirq_exit(); \
|
||||
rcu_nmi_exit(); \
|
||||
BUG_ON(!in_nmi()); \
|
||||
preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
|
||||
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
|
||||
ftrace_nmi_exit(); \
|
||||
lockdep_on(); \
|
||||
printk_nmi_exit(); \
|
||||
|
@ -171,7 +171,7 @@ static inline bool idr_is_empty(const struct idr *idr)
|
||||
*/
|
||||
static inline void idr_preload_end(void)
|
||||
{
|
||||
preempt_enable();
|
||||
local_unlock(&radix_tree_preloads.lock);
|
||||
}
|
||||
|
||||
/**
|
||||
|
54
include/linux/local_lock.h
Normal file
54
include/linux/local_lock.h
Normal file
@ -0,0 +1,54 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_LOCAL_LOCK_H
|
||||
#define _LINUX_LOCAL_LOCK_H
|
||||
|
||||
#include <linux/local_lock_internal.h>
|
||||
|
||||
/**
|
||||
* local_lock_init - Runtime initialize a lock instance
|
||||
*/
|
||||
#define local_lock_init(lock) __local_lock_init(lock)
|
||||
|
||||
/**
|
||||
* local_lock - Acquire a per CPU local lock
|
||||
* @lock: The lock variable
|
||||
*/
|
||||
#define local_lock(lock) __local_lock(lock)
|
||||
|
||||
/**
|
||||
* local_lock_irq - Acquire a per CPU local lock and disable interrupts
|
||||
* @lock: The lock variable
|
||||
*/
|
||||
#define local_lock_irq(lock) __local_lock_irq(lock)
|
||||
|
||||
/**
|
||||
* local_lock_irqsave - Acquire a per CPU local lock, save and disable
|
||||
* interrupts
|
||||
* @lock: The lock variable
|
||||
* @flags: Storage for interrupt flags
|
||||
*/
|
||||
#define local_lock_irqsave(lock, flags) \
|
||||
__local_lock_irqsave(lock, flags)
|
||||
|
||||
/**
|
||||
* local_unlock - Release a per CPU local lock
|
||||
* @lock: The lock variable
|
||||
*/
|
||||
#define local_unlock(lock) __local_unlock(lock)
|
||||
|
||||
/**
|
||||
* local_unlock_irq - Release a per CPU local lock and enable interrupts
|
||||
* @lock: The lock variable
|
||||
*/
|
||||
#define local_unlock_irq(lock) __local_unlock_irq(lock)
|
||||
|
||||
/**
|
||||
* local_unlock_irqrestore - Release a per CPU local lock and restore
|
||||
* interrupt flags
|
||||
* @lock: The lock variable
|
||||
* @flags: Interrupt flags to restore
|
||||
*/
|
||||
#define local_unlock_irqrestore(lock, flags) \
|
||||
__local_unlock_irqrestore(lock, flags)
|
||||
|
||||
#endif
|
90
include/linux/local_lock_internal.h
Normal file
90
include/linux/local_lock_internal.h
Normal file
@ -0,0 +1,90 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_LOCAL_LOCK_H
|
||||
# error "Do not include directly, include linux/local_lock.h"
|
||||
#endif
|
||||
|
||||
#include <linux/percpu-defs.h>
|
||||
#include <linux/lockdep.h>
|
||||
|
||||
typedef struct {
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map dep_map;
|
||||
struct task_struct *owner;
|
||||
#endif
|
||||
} local_lock_t;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
# define LL_DEP_MAP_INIT(lockname) \
|
||||
.dep_map = { \
|
||||
.name = #lockname, \
|
||||
.wait_type_inner = LD_WAIT_CONFIG, \
|
||||
}
|
||||
#else
|
||||
# define LL_DEP_MAP_INIT(lockname)
|
||||
#endif
|
||||
|
||||
#define INIT_LOCAL_LOCK(lockname) { LL_DEP_MAP_INIT(lockname) }
|
||||
|
||||
#define __local_lock_init(lock) \
|
||||
do { \
|
||||
static struct lock_class_key __key; \
|
||||
\
|
||||
debug_check_no_locks_freed((void *)lock, sizeof(*lock));\
|
||||
lockdep_init_map_wait(&(lock)->dep_map, #lock, &__key, 0, LD_WAIT_CONFIG);\
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
static inline void local_lock_acquire(local_lock_t *l)
|
||||
{
|
||||
lock_map_acquire(&l->dep_map);
|
||||
DEBUG_LOCKS_WARN_ON(l->owner);
|
||||
l->owner = current;
|
||||
}
|
||||
|
||||
static inline void local_lock_release(local_lock_t *l)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(l->owner != current);
|
||||
l->owner = NULL;
|
||||
lock_map_release(&l->dep_map);
|
||||
}
|
||||
|
||||
#else /* CONFIG_DEBUG_LOCK_ALLOC */
|
||||
static inline void local_lock_acquire(local_lock_t *l) { }
|
||||
static inline void local_lock_release(local_lock_t *l) { }
|
||||
#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
#define __local_lock(lock) \
|
||||
do { \
|
||||
preempt_disable(); \
|
||||
local_lock_acquire(this_cpu_ptr(lock)); \
|
||||
} while (0)
|
||||
|
||||
#define __local_lock_irq(lock) \
|
||||
do { \
|
||||
local_irq_disable(); \
|
||||
local_lock_acquire(this_cpu_ptr(lock)); \
|
||||
} while (0)
|
||||
|
||||
#define __local_lock_irqsave(lock, flags) \
|
||||
do { \
|
||||
local_irq_save(flags); \
|
||||
local_lock_acquire(this_cpu_ptr(lock)); \
|
||||
} while (0)
|
||||
|
||||
#define __local_unlock(lock) \
|
||||
do { \
|
||||
local_lock_release(this_cpu_ptr(lock)); \
|
||||
preempt_enable(); \
|
||||
} while (0)
|
||||
|
||||
#define __local_unlock_irq(lock) \
|
||||
do { \
|
||||
local_lock_release(this_cpu_ptr(lock)); \
|
||||
local_irq_enable(); \
|
||||
} while (0)
|
||||
|
||||
#define __local_unlock_irqrestore(lock, flags) \
|
||||
do { \
|
||||
local_lock_release(this_cpu_ptr(lock)); \
|
||||
local_irq_restore(flags); \
|
||||
} while (0)
|
@ -308,8 +308,27 @@ extern void lockdep_set_selftest_task(struct task_struct *task);
|
||||
|
||||
extern void lockdep_init_task(struct task_struct *task);
|
||||
|
||||
extern void lockdep_off(void);
|
||||
extern void lockdep_on(void);
|
||||
/*
|
||||
* Split the recrursion counter in two to readily detect 'off' vs recursion.
|
||||
*/
|
||||
#define LOCKDEP_RECURSION_BITS 16
|
||||
#define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS)
|
||||
#define LOCKDEP_RECURSION_MASK (LOCKDEP_OFF - 1)
|
||||
|
||||
/*
|
||||
* lockdep_{off,on}() are macros to avoid tracing and kprobes; not inlines due
|
||||
* to header dependencies.
|
||||
*/
|
||||
|
||||
#define lockdep_off() \
|
||||
do { \
|
||||
current->lockdep_recursion += LOCKDEP_OFF; \
|
||||
} while (0)
|
||||
|
||||
#define lockdep_on() \
|
||||
do { \
|
||||
current->lockdep_recursion -= LOCKDEP_OFF; \
|
||||
} while (0)
|
||||
|
||||
extern void lockdep_register_key(struct lock_class_key *key);
|
||||
extern void lockdep_unregister_key(struct lock_class_key *key);
|
||||
|
@ -26,13 +26,13 @@
|
||||
* PREEMPT_MASK: 0x000000ff
|
||||
* SOFTIRQ_MASK: 0x0000ff00
|
||||
* HARDIRQ_MASK: 0x000f0000
|
||||
* NMI_MASK: 0x00100000
|
||||
* NMI_MASK: 0x00f00000
|
||||
* PREEMPT_NEED_RESCHED: 0x80000000
|
||||
*/
|
||||
#define PREEMPT_BITS 8
|
||||
#define SOFTIRQ_BITS 8
|
||||
#define HARDIRQ_BITS 4
|
||||
#define NMI_BITS 1
|
||||
#define NMI_BITS 4
|
||||
|
||||
#define PREEMPT_SHIFT 0
|
||||
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
||||
|
@ -16,11 +16,20 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/local_lock.h>
|
||||
|
||||
/* Keep unconverted code working */
|
||||
#define radix_tree_root xarray
|
||||
#define radix_tree_node xa_node
|
||||
|
||||
struct radix_tree_preload {
|
||||
local_lock_t lock;
|
||||
unsigned nr;
|
||||
/* nodes->parent points to next preallocated node */
|
||||
struct radix_tree_node *nodes;
|
||||
};
|
||||
DECLARE_PER_CPU(struct radix_tree_preload, radix_tree_preloads);
|
||||
|
||||
/*
|
||||
* The bottom two bits of the slot determine how the remaining bits in the
|
||||
* slot are interpreted:
|
||||
@ -245,7 +254,7 @@ int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);
|
||||
|
||||
static inline void radix_tree_preload_end(void)
|
||||
{
|
||||
preempt_enable();
|
||||
local_unlock(&radix_tree_preloads.lock);
|
||||
}
|
||||
|
||||
void __rcu **idr_get_free(struct radix_tree_root *root,
|
||||
|
@ -371,7 +371,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
|
||||
* @pos: the type * to use as a loop cursor.
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_head within the struct.
|
||||
* @cond...: optional lockdep expression if called from non-RCU protection.
|
||||
* @cond: optional lockdep expression if called from non-RCU protection.
|
||||
*
|
||||
* This list-traversal primitive may safely run concurrently with
|
||||
* the _rcu list-mutation primitives such as list_add_rcu()
|
||||
@ -646,7 +646,7 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
|
||||
* @pos: the type * to use as a loop cursor.
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the hlist_node within the struct.
|
||||
* @cond...: optional lockdep expression if called from non-RCU protection.
|
||||
* @cond: optional lockdep expression if called from non-RCU protection.
|
||||
*
|
||||
* This list-traversal primitive may safely run concurrently with
|
||||
* the _rcu list-mutation primitives such as hlist_add_head_rcu()
|
||||
|
@ -37,6 +37,7 @@
|
||||
/* Exported common interfaces */
|
||||
void call_rcu(struct rcu_head *head, rcu_callback_t func);
|
||||
void rcu_barrier_tasks(void);
|
||||
void rcu_barrier_tasks_rude(void);
|
||||
void synchronize_rcu(void);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
@ -129,25 +130,57 @@ static inline void rcu_init_nohz(void) { }
|
||||
* Note a quasi-voluntary context switch for RCU-tasks's benefit.
|
||||
* This is a macro rather than an inline function to avoid #include hell.
|
||||
*/
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
#define rcu_tasks_qs(t) \
|
||||
do { \
|
||||
if (READ_ONCE((t)->rcu_tasks_holdout)) \
|
||||
WRITE_ONCE((t)->rcu_tasks_holdout, false); \
|
||||
#ifdef CONFIG_TASKS_RCU_GENERIC
|
||||
|
||||
# ifdef CONFIG_TASKS_RCU
|
||||
# define rcu_tasks_classic_qs(t, preempt) \
|
||||
do { \
|
||||
if (!(preempt) && READ_ONCE((t)->rcu_tasks_holdout)) \
|
||||
WRITE_ONCE((t)->rcu_tasks_holdout, false); \
|
||||
} while (0)
|
||||
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t)
|
||||
void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
|
||||
void synchronize_rcu_tasks(void);
|
||||
# else
|
||||
# define rcu_tasks_classic_qs(t, preempt) do { } while (0)
|
||||
# define call_rcu_tasks call_rcu
|
||||
# define synchronize_rcu_tasks synchronize_rcu
|
||||
# endif
|
||||
|
||||
# ifdef CONFIG_TASKS_RCU_TRACE
|
||||
# define rcu_tasks_trace_qs(t) \
|
||||
do { \
|
||||
if (!likely(READ_ONCE((t)->trc_reader_checked)) && \
|
||||
!unlikely(READ_ONCE((t)->trc_reader_nesting))) { \
|
||||
smp_store_release(&(t)->trc_reader_checked, true); \
|
||||
smp_mb(); /* Readers partitioned by store. */ \
|
||||
} \
|
||||
} while (0)
|
||||
# else
|
||||
# define rcu_tasks_trace_qs(t) do { } while (0)
|
||||
# endif
|
||||
|
||||
#define rcu_tasks_qs(t, preempt) \
|
||||
do { \
|
||||
rcu_tasks_classic_qs((t), (preempt)); \
|
||||
rcu_tasks_trace_qs((t)); \
|
||||
} while (0)
|
||||
|
||||
# ifdef CONFIG_TASKS_RUDE_RCU
|
||||
void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func);
|
||||
void synchronize_rcu_tasks_rude(void);
|
||||
# endif
|
||||
|
||||
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
|
||||
void exit_tasks_rcu_start(void);
|
||||
void exit_tasks_rcu_finish(void);
|
||||
#else /* #ifdef CONFIG_TASKS_RCU */
|
||||
#define rcu_tasks_qs(t) do { } while (0)
|
||||
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
#define rcu_tasks_qs(t, preempt) do { } while (0)
|
||||
#define rcu_note_voluntary_context_switch(t) do { } while (0)
|
||||
#define call_rcu_tasks call_rcu
|
||||
#define synchronize_rcu_tasks synchronize_rcu
|
||||
static inline void exit_tasks_rcu_start(void) { }
|
||||
static inline void exit_tasks_rcu_finish(void) { }
|
||||
#endif /* #else #ifdef CONFIG_TASKS_RCU */
|
||||
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
|
||||
/**
|
||||
* cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
|
||||
@ -158,7 +191,7 @@ static inline void exit_tasks_rcu_finish(void) { }
|
||||
*/
|
||||
#define cond_resched_tasks_rcu_qs() \
|
||||
do { \
|
||||
rcu_tasks_qs(current); \
|
||||
rcu_tasks_qs(current, false); \
|
||||
cond_resched(); \
|
||||
} while (0)
|
||||
|
||||
|
88
include/linux/rcupdate_trace.h
Normal file
88
include/linux/rcupdate_trace.h
Normal file
@ -0,0 +1,88 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0+ */
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion, adapted for tracing.
|
||||
*
|
||||
* Copyright (C) 2020 Paul E. McKenney.
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_RCUPDATE_TRACE_H
|
||||
#define __LINUX_RCUPDATE_TRACE_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
||||
extern struct lockdep_map rcu_trace_lock_map;
|
||||
|
||||
static inline int rcu_read_lock_trace_held(void)
|
||||
{
|
||||
return lock_is_held(&rcu_trace_lock_map);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
static inline int rcu_read_lock_trace_held(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
|
||||
void rcu_read_unlock_trace_special(struct task_struct *t, int nesting);
|
||||
|
||||
/**
|
||||
* rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
|
||||
*
|
||||
* When synchronize_rcu_trace() is invoked by one task, then that task
|
||||
* is guaranteed to block until all other tasks exit their read-side
|
||||
* critical sections. Similarly, if call_rcu_trace() is invoked on one
|
||||
* task while other tasks are within RCU read-side critical sections,
|
||||
* invocation of the corresponding RCU callback is deferred until after
|
||||
* the all the other tasks exit their critical sections.
|
||||
*
|
||||
* For more details, please see the documentation for rcu_read_lock().
|
||||
*/
|
||||
static inline void rcu_read_lock_trace(void)
|
||||
{
|
||||
struct task_struct *t = current;
|
||||
|
||||
WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
|
||||
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
|
||||
t->trc_reader_special.b.need_mb)
|
||||
smp_mb(); // Pairs with update-side barriers
|
||||
rcu_lock_acquire(&rcu_trace_lock_map);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_read_unlock_trace - mark end of RCU-trace read-side critical section
|
||||
*
|
||||
* Pairs with a preceding call to rcu_read_lock_trace(), and nesting is
|
||||
* allowed. Invoking a rcu_read_unlock_trace() when there is no matching
|
||||
* rcu_read_lock_trace() is verboten, and will result in lockdep complaints.
|
||||
*
|
||||
* For more details, please see the documentation for rcu_read_unlock().
|
||||
*/
|
||||
static inline void rcu_read_unlock_trace(void)
|
||||
{
|
||||
int nesting;
|
||||
struct task_struct *t = current;
|
||||
|
||||
rcu_lock_release(&rcu_trace_lock_map);
|
||||
nesting = READ_ONCE(t->trc_reader_nesting) - 1;
|
||||
if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
|
||||
WRITE_ONCE(t->trc_reader_nesting, nesting);
|
||||
return; // We assume shallow reader nesting.
|
||||
}
|
||||
rcu_read_unlock_trace_special(t, nesting);
|
||||
}
|
||||
|
||||
void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
|
||||
void synchronize_rcu_tasks_trace(void);
|
||||
void rcu_barrier_tasks_trace(void);
|
||||
|
||||
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
|
||||
#endif /* __LINUX_RCUPDATE_TRACE_H */
|
@ -31,4 +31,23 @@ do { \
|
||||
|
||||
#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* synchronize_rcu_mult - Wait concurrently for multiple grace periods
|
||||
* @...: List of call_rcu() functions for different grace periods to wait on
|
||||
*
|
||||
* This macro waits concurrently for multiple types of RCU grace periods.
|
||||
* For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait
|
||||
* on concurrent RCU and RCU-tasks grace periods. Waiting on a given SRCU
|
||||
* domain requires you to write a wrapper function for that SRCU domain's
|
||||
* call_srcu() function, with this wrapper supplying the pointer to the
|
||||
* corresponding srcu_struct.
|
||||
*
|
||||
* The first argument tells Tiny RCU's _wait_rcu_gp() not to
|
||||
* bother waiting for RCU. The reason for this is because anywhere
|
||||
* synchronize_rcu_mult() can be called is automatically already a full
|
||||
* grace period.
|
||||
*/
|
||||
#define synchronize_rcu_mult(...) \
|
||||
_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
|
||||
|
||||
#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */
|
||||
|
@ -49,7 +49,7 @@ static inline void rcu_softirq_qs(void)
|
||||
#define rcu_note_context_switch(preempt) \
|
||||
do { \
|
||||
rcu_qs(); \
|
||||
rcu_tasks_qs(current); \
|
||||
rcu_tasks_qs(current, (preempt)); \
|
||||
} while (0)
|
||||
|
||||
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
|
||||
@ -71,6 +71,8 @@ static inline void rcu_irq_enter(void) { }
|
||||
static inline void rcu_irq_exit_irqson(void) { }
|
||||
static inline void rcu_irq_enter_irqson(void) { }
|
||||
static inline void rcu_irq_exit(void) { }
|
||||
static inline void rcu_irq_exit_preempt(void) { }
|
||||
static inline void rcu_irq_exit_check_preempt(void) { }
|
||||
static inline void exit_rcu(void) { }
|
||||
static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
@ -85,8 +87,10 @@ static inline void rcu_scheduler_starting(void) { }
|
||||
static inline void rcu_end_inkernel_boot(void) { }
|
||||
static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
|
||||
static inline bool rcu_is_watching(void) { return true; }
|
||||
static inline bool __rcu_is_watching(void) { return true; }
|
||||
static inline void rcu_momentary_dyntick_idle(void) { }
|
||||
static inline void kfree_rcu_scheduler_running(void) { }
|
||||
static inline bool rcu_gp_might_be_stalled(void) { return false; }
|
||||
|
||||
/* Avoid RCU read-side critical sections leaking across. */
|
||||
static inline void rcu_all_qs(void) { barrier(); }
|
||||
|
@ -39,6 +39,7 @@ void rcu_barrier(void);
|
||||
bool rcu_eqs_special_set(int cpu);
|
||||
void rcu_momentary_dyntick_idle(void);
|
||||
void kfree_rcu_scheduler_running(void);
|
||||
bool rcu_gp_might_be_stalled(void);
|
||||
unsigned long get_state_synchronize_rcu(void);
|
||||
void cond_synchronize_rcu(unsigned long oldstate);
|
||||
|
||||
@ -46,9 +47,16 @@ void rcu_idle_enter(void);
|
||||
void rcu_idle_exit(void);
|
||||
void rcu_irq_enter(void);
|
||||
void rcu_irq_exit(void);
|
||||
void rcu_irq_exit_preempt(void);
|
||||
void rcu_irq_enter_irqson(void);
|
||||
void rcu_irq_exit_irqson(void);
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
void rcu_irq_exit_check_preempt(void);
|
||||
#else
|
||||
static inline void rcu_irq_exit_check_preempt(void) { }
|
||||
#endif
|
||||
|
||||
void exit_rcu(void);
|
||||
|
||||
void rcu_scheduler_starting(void);
|
||||
@ -56,6 +64,7 @@ extern int rcu_scheduler_active __read_mostly;
|
||||
void rcu_end_inkernel_boot(void);
|
||||
bool rcu_inkernel_boot_has_ended(void);
|
||||
bool rcu_is_watching(void);
|
||||
bool __rcu_is_watching(void);
|
||||
#ifndef CONFIG_PREEMPTION
|
||||
void rcu_all_qs(void);
|
||||
#endif
|
||||
|
@ -613,7 +613,7 @@ union rcu_special {
|
||||
u8 blocked;
|
||||
u8 need_qs;
|
||||
u8 exp_hint; /* Hint for performance. */
|
||||
u8 deferred_qs;
|
||||
u8 need_mb; /* Readers need smp_mb(). */
|
||||
} b; /* Bits. */
|
||||
u32 s; /* Set of bits. */
|
||||
};
|
||||
@ -724,6 +724,14 @@ struct task_struct {
|
||||
struct list_head rcu_tasks_holdout_list;
|
||||
#endif /* #ifdef CONFIG_TASKS_RCU */
|
||||
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
int trc_reader_nesting;
|
||||
int trc_ipi_to_cpu;
|
||||
union rcu_special trc_reader_special;
|
||||
bool trc_reader_checked;
|
||||
struct list_head trc_holdout_list;
|
||||
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
|
||||
struct sched_info sched_info;
|
||||
|
||||
struct list_head tasks;
|
||||
@ -1293,6 +1301,12 @@ struct task_struct {
|
||||
unsigned long prev_lowest_stack;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
u64 mce_addr;
|
||||
u64 mce_status;
|
||||
struct callback_head mce_kill_me;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* New fields for task_struct should be added above here, so that
|
||||
* they are included in the randomized portion of task_struct.
|
||||
|
@ -337,6 +337,7 @@ extern void activate_page(struct page *);
|
||||
extern void mark_page_accessed(struct page *);
|
||||
extern void lru_add_drain(void);
|
||||
extern void lru_add_drain_cpu(int cpu);
|
||||
extern void lru_add_drain_cpu_zone(struct zone *zone);
|
||||
extern void lru_add_drain_all(void);
|
||||
extern void rotate_reclaimable_page(struct page *page);
|
||||
extern void deactivate_file_page(struct page *page);
|
||||
|
@ -89,7 +89,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
#define torture_preempt_schedule() preempt_schedule()
|
||||
#else
|
||||
#define torture_preempt_schedule()
|
||||
#define torture_preempt_schedule() do { } while (0)
|
||||
#endif
|
||||
|
||||
#endif /* __LINUX_TORTURE_H */
|
||||
|
@ -1149,4 +1149,6 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
|
||||
(wait)->flags = 0; \
|
||||
} while (0)
|
||||
|
||||
bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg);
|
||||
|
||||
#endif /* _LINUX_WAIT_H */
|
||||
|
@ -142,6 +142,11 @@ struct task_struct init_task
|
||||
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
|
||||
.rcu_tasks_idle_cpu = -1,
|
||||
#endif
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
.trc_reader_nesting = 0,
|
||||
.trc_reader_special.s = 0,
|
||||
.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
|
||||
#endif
|
||||
#ifdef CONFIG_CPUSETS
|
||||
.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
|
||||
#endif
|
||||
|
@ -1694,6 +1694,11 @@ static inline void rcu_copy_process(struct task_struct *p)
|
||||
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
|
||||
p->rcu_tasks_idle_cpu = -1;
|
||||
#endif /* #ifdef CONFIG_TASKS_RCU */
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
p->trc_reader_nesting = 0;
|
||||
p->trc_reader_special.s = 0;
|
||||
INIT_LIST_HEAD(&p->trc_holdout_list);
|
||||
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
|
||||
}
|
||||
|
||||
struct pid *pidfd_pid(const struct file *file)
|
||||
|
@ -393,25 +393,6 @@ void lockdep_init_task(struct task_struct *task)
|
||||
task->lockdep_recursion = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Split the recrursion counter in two to readily detect 'off' vs recursion.
|
||||
*/
|
||||
#define LOCKDEP_RECURSION_BITS 16
|
||||
#define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS)
|
||||
#define LOCKDEP_RECURSION_MASK (LOCKDEP_OFF - 1)
|
||||
|
||||
void lockdep_off(void)
|
||||
{
|
||||
current->lockdep_recursion += LOCKDEP_OFF;
|
||||
}
|
||||
EXPORT_SYMBOL(lockdep_off);
|
||||
|
||||
void lockdep_on(void)
|
||||
{
|
||||
current->lockdep_recursion -= LOCKDEP_OFF;
|
||||
}
|
||||
EXPORT_SYMBOL(lockdep_on);
|
||||
|
||||
static inline void lockdep_recursion_finish(void)
|
||||
{
|
||||
if (WARN_ON_ONCE(--current->lockdep_recursion))
|
||||
@ -489,7 +470,7 @@ struct lock_trace {
|
||||
struct hlist_node hash_entry;
|
||||
u32 hash;
|
||||
u32 nr_entries;
|
||||
unsigned long entries[0] __aligned(sizeof(unsigned long));
|
||||
unsigned long entries[] __aligned(sizeof(unsigned long));
|
||||
};
|
||||
#define LOCK_TRACE_SIZE_IN_LONGS \
|
||||
(sizeof(struct lock_trace) / sizeof(unsigned long))
|
||||
|
@ -141,7 +141,6 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
* set up.
|
||||
*/
|
||||
#ifndef CONFIG_DEBUG_RT_MUTEXES
|
||||
# define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
|
||||
# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
|
||||
# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
|
||||
|
||||
@ -202,7 +201,6 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
|
||||
}
|
||||
|
||||
#else
|
||||
# define rt_mutex_cmpxchg_relaxed(l,c,n) (0)
|
||||
# define rt_mutex_cmpxchg_acquire(l,c,n) (0)
|
||||
# define rt_mutex_cmpxchg_release(l,c,n) (0)
|
||||
|
||||
|
@ -6,9 +6,11 @@
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
|
||||
#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff
|
||||
#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000
|
||||
#define PRINTK_NMI_CONTEXT_MASK 0x80000000
|
||||
#define PRINTK_SAFE_CONTEXT_MASK 0x007ffffff
|
||||
#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x008000000
|
||||
#define PRINTK_NMI_CONTEXT_MASK 0xff0000000
|
||||
|
||||
#define PRINTK_NMI_CONTEXT_OFFSET 0x010000000
|
||||
|
||||
extern raw_spinlock_t logbuf_lock;
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/kprobes.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -293,14 +294,14 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
|
||||
return printk_safe_log_store(s, fmt, args);
|
||||
}
|
||||
|
||||
void notrace printk_nmi_enter(void)
|
||||
void noinstr printk_nmi_enter(void)
|
||||
{
|
||||
this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
|
||||
this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
|
||||
}
|
||||
|
||||
void notrace printk_nmi_exit(void)
|
||||
void noinstr printk_nmi_exit(void)
|
||||
{
|
||||
this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
|
||||
this_cpu_sub(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -70,13 +70,37 @@ config TREE_SRCU
|
||||
help
|
||||
This option selects the full-fledged version of SRCU.
|
||||
|
||||
config TASKS_RCU_GENERIC
|
||||
def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
|
||||
select SRCU
|
||||
help
|
||||
This option enables generic infrastructure code supporting
|
||||
task-based RCU implementations. Not for manual selection.
|
||||
|
||||
config TASKS_RCU
|
||||
def_bool PREEMPTION
|
||||
select SRCU
|
||||
help
|
||||
This option enables a task-based RCU implementation that uses
|
||||
only voluntary context switch (not preemption!), idle, and
|
||||
user-mode execution as quiescent states.
|
||||
user-mode execution as quiescent states. Not for manual selection.
|
||||
|
||||
config TASKS_RUDE_RCU
|
||||
def_bool 0
|
||||
help
|
||||
This option enables a task-based RCU implementation that uses
|
||||
only context switch (including preemption) and user-mode
|
||||
execution as quiescent states. It forces IPIs and context
|
||||
switches on all online CPUs, including idle ones, so use
|
||||
with caution.
|
||||
|
||||
config TASKS_TRACE_RCU
|
||||
def_bool 0
|
||||
help
|
||||
This option enables a task-based RCU implementation that uses
|
||||
explicit rcu_read_lock_trace() read-side markers, and allows
|
||||
these readers to appear in the idle loop as well as on the CPU
|
||||
hotplug code paths. It can force IPIs on online CPUs, including
|
||||
idle ones, so use with caution.
|
||||
|
||||
config RCU_STALL_COMMON
|
||||
def_bool TREE_RCU
|
||||
@ -210,4 +234,22 @@ config RCU_NOCB_CPU
|
||||
Say Y here if you want to help to debug reduced OS jitter.
|
||||
Say N here if you are unsure.
|
||||
|
||||
config TASKS_TRACE_RCU_READ_MB
|
||||
bool "Tasks Trace RCU readers use memory barriers in user and idle"
|
||||
depends on RCU_EXPERT
|
||||
default PREEMPT_RT || NR_CPUS < 8
|
||||
help
|
||||
Use this option to further reduce the number of IPIs sent
|
||||
to CPUs executing in userspace or idle during tasks trace
|
||||
RCU grace periods. Given that a reasonable setting of
|
||||
the rcupdate.rcu_task_ipi_delay kernel boot parameter
|
||||
eliminates such IPIs for many workloads, proper setting
|
||||
of this Kconfig option is important mostly for aggressive
|
||||
real-time installations and for battery-powered devices,
|
||||
hence the default chosen above.
|
||||
|
||||
Say Y here if you hate IPIs.
|
||||
Say N here if you hate read-side memory barriers.
|
||||
Take the default if you are unsure.
|
||||
|
||||
endmenu # "RCU Subsystem"
|
||||
|
@ -29,6 +29,8 @@ config RCU_PERF_TEST
|
||||
select TORTURE_TEST
|
||||
select SRCU
|
||||
select TASKS_RCU
|
||||
select TASKS_RUDE_RCU
|
||||
select TASKS_TRACE_RCU
|
||||
default n
|
||||
help
|
||||
This option provides a kernel module that runs performance
|
||||
@ -46,6 +48,8 @@ config RCU_TORTURE_TEST
|
||||
select TORTURE_TEST
|
||||
select SRCU
|
||||
select TASKS_RCU
|
||||
select TASKS_RUDE_RCU
|
||||
select TASKS_TRACE_RCU
|
||||
default n
|
||||
help
|
||||
This option provides a kernel module that runs torture tests
|
||||
|
@ -431,6 +431,7 @@ bool rcu_gp_is_expedited(void); /* Internal RCU use. */
|
||||
void rcu_expedite_gp(void);
|
||||
void rcu_unexpedite_gp(void);
|
||||
void rcupdate_announce_bootup_oddness(void);
|
||||
void show_rcu_tasks_gp_kthreads(void);
|
||||
void rcu_request_urgent_qs_task(struct task_struct *t);
|
||||
#endif /* #else #ifdef CONFIG_TINY_RCU */
|
||||
|
||||
@ -441,6 +442,8 @@ void rcu_request_urgent_qs_task(struct task_struct *t);
|
||||
enum rcutorture_type {
|
||||
RCU_FLAVOR,
|
||||
RCU_TASKS_FLAVOR,
|
||||
RCU_TASKS_RUDE_FLAVOR,
|
||||
RCU_TASKS_TRACING_FLAVOR,
|
||||
RCU_TRIVIAL_FLAVOR,
|
||||
SRCU_FLAVOR,
|
||||
INVALID_RCU_FLAVOR
|
||||
@ -454,6 +457,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
|
||||
unsigned long secs,
|
||||
unsigned long c_old,
|
||||
unsigned long c);
|
||||
void rcu_gp_set_torture_wait(int duration);
|
||||
#else
|
||||
static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
int *flags, unsigned long *gp_seq)
|
||||
@ -471,6 +475,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
|
||||
#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
|
||||
do { } while (0)
|
||||
#endif
|
||||
static inline void rcu_gp_set_torture_wait(int duration) { }
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
|
||||
@ -498,6 +503,7 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
static inline bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) { return false; }
|
||||
static inline unsigned long rcu_get_gp_seq(void) { return 0; }
|
||||
static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
|
||||
static inline unsigned long
|
||||
@ -507,6 +513,7 @@ static inline void show_rcu_gp_kthreads(void) { }
|
||||
static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
|
||||
static inline void rcu_fwd_progress_check(unsigned long j) { }
|
||||
#else /* #ifdef CONFIG_TINY_RCU */
|
||||
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
|
||||
unsigned long rcu_get_gp_seq(void);
|
||||
unsigned long rcu_exp_batches_completed(void);
|
||||
unsigned long srcu_batches_completed(struct srcu_struct *sp);
|
||||
|
@ -88,6 +88,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
|
||||
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
|
||||
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
|
||||
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
|
||||
torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
|
||||
|
||||
static char *perf_type = "rcu";
|
||||
module_param(perf_type, charp, 0444);
|
||||
@ -635,7 +636,7 @@ kfree_perf_thread(void *arg)
|
||||
}
|
||||
|
||||
for (i = 0; i < kfree_alloc_num; i++) {
|
||||
alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
|
||||
alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
|
||||
if (!alloc_ptr)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -722,6 +723,8 @@ kfree_perf_init(void)
|
||||
schedule_timeout_uninterruptible(1);
|
||||
}
|
||||
|
||||
pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
|
||||
|
||||
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
if (kfree_reader_tasks == NULL) {
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <linux/err.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/rcupdate_wait.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <uapi/linux/sched/types.h>
|
||||
@ -45,12 +45,25 @@
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/oom.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
|
||||
#include "rcu.h"
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
|
||||
|
||||
#ifndef data_race
|
||||
#define data_race(expr) \
|
||||
({ \
|
||||
expr; \
|
||||
})
|
||||
#endif
|
||||
#ifndef ASSERT_EXCLUSIVE_WRITER
|
||||
#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
|
||||
#endif
|
||||
#ifndef ASSERT_EXCLUSIVE_ACCESS
|
||||
#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
|
||||
#endif
|
||||
|
||||
/* Bits for ->extendables field, extendables param, and related definitions. */
|
||||
#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */
|
||||
@ -102,6 +115,9 @@ torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
|
||||
torture_param(int, stall_cpu_holdoff, 10,
|
||||
"Time to wait before starting stall (s).");
|
||||
torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
|
||||
torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
|
||||
torture_param(int, stall_gp_kthread, 0,
|
||||
"Grace-period kthread stall duration (s).");
|
||||
torture_param(int, stat_interval, 60,
|
||||
"Number of seconds between stats printk()s");
|
||||
torture_param(int, stutter, 5, "Number of seconds to run/halt test");
|
||||
@ -665,6 +681,11 @@ static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
|
||||
call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
|
||||
}
|
||||
|
||||
static void synchronize_rcu_mult_test(void)
|
||||
{
|
||||
synchronize_rcu_mult(call_rcu_tasks, call_rcu);
|
||||
}
|
||||
|
||||
static struct rcu_torture_ops tasks_ops = {
|
||||
.ttype = RCU_TASKS_FLAVOR,
|
||||
.init = rcu_sync_torture_init,
|
||||
@ -674,7 +695,7 @@ static struct rcu_torture_ops tasks_ops = {
|
||||
.get_gp_seq = rcu_no_completed,
|
||||
.deferred_free = rcu_tasks_torture_deferred_free,
|
||||
.sync = synchronize_rcu_tasks,
|
||||
.exp_sync = synchronize_rcu_tasks,
|
||||
.exp_sync = synchronize_rcu_mult_test,
|
||||
.call = call_rcu_tasks,
|
||||
.cb_barrier = rcu_barrier_tasks,
|
||||
.fqs = NULL,
|
||||
@ -725,6 +746,72 @@ static struct rcu_torture_ops trivial_ops = {
|
||||
.name = "trivial"
|
||||
};
|
||||
|
||||
/*
|
||||
* Definitions for rude RCU-tasks torture testing.
|
||||
*/
|
||||
|
||||
static void rcu_tasks_rude_torture_deferred_free(struct rcu_torture *p)
|
||||
{
|
||||
call_rcu_tasks_rude(&p->rtort_rcu, rcu_torture_cb);
|
||||
}
|
||||
|
||||
static struct rcu_torture_ops tasks_rude_ops = {
|
||||
.ttype = RCU_TASKS_RUDE_FLAVOR,
|
||||
.init = rcu_sync_torture_init,
|
||||
.readlock = rcu_torture_read_lock_trivial,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = rcu_torture_read_unlock_trivial,
|
||||
.get_gp_seq = rcu_no_completed,
|
||||
.deferred_free = rcu_tasks_rude_torture_deferred_free,
|
||||
.sync = synchronize_rcu_tasks_rude,
|
||||
.exp_sync = synchronize_rcu_tasks_rude,
|
||||
.call = call_rcu_tasks_rude,
|
||||
.cb_barrier = rcu_barrier_tasks_rude,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
.name = "tasks-rude"
|
||||
};
|
||||
|
||||
/*
|
||||
* Definitions for tracing RCU-tasks torture testing.
|
||||
*/
|
||||
|
||||
static int tasks_tracing_torture_read_lock(void)
|
||||
{
|
||||
rcu_read_lock_trace();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void tasks_tracing_torture_read_unlock(int idx)
|
||||
{
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
static void rcu_tasks_tracing_torture_deferred_free(struct rcu_torture *p)
|
||||
{
|
||||
call_rcu_tasks_trace(&p->rtort_rcu, rcu_torture_cb);
|
||||
}
|
||||
|
||||
static struct rcu_torture_ops tasks_tracing_ops = {
|
||||
.ttype = RCU_TASKS_TRACING_FLAVOR,
|
||||
.init = rcu_sync_torture_init,
|
||||
.readlock = tasks_tracing_torture_read_lock,
|
||||
.read_delay = srcu_read_delay, /* just reuse srcu's version. */
|
||||
.readunlock = tasks_tracing_torture_read_unlock,
|
||||
.get_gp_seq = rcu_no_completed,
|
||||
.deferred_free = rcu_tasks_tracing_torture_deferred_free,
|
||||
.sync = synchronize_rcu_tasks_trace,
|
||||
.exp_sync = synchronize_rcu_tasks_trace,
|
||||
.call = call_rcu_tasks_trace,
|
||||
.cb_barrier = rcu_barrier_tasks_trace,
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
.slow_gps = 1,
|
||||
.name = "tasks-tracing"
|
||||
};
|
||||
|
||||
static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
|
||||
{
|
||||
if (!cur_ops->gp_diff)
|
||||
@ -734,7 +821,7 @@ static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
|
||||
|
||||
static bool __maybe_unused torturing_tasks(void)
|
||||
{
|
||||
return cur_ops == &tasks_ops;
|
||||
return cur_ops == &tasks_ops || cur_ops == &tasks_rude_ops;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -833,7 +920,7 @@ static int rcu_torture_boost(void *arg)
|
||||
|
||||
/* Wait for the next test interval. */
|
||||
oldstarttime = boost_starttime;
|
||||
while (ULONG_CMP_LT(jiffies, oldstarttime)) {
|
||||
while (time_before(jiffies, oldstarttime)) {
|
||||
schedule_timeout_interruptible(oldstarttime - jiffies);
|
||||
stutter_wait("rcu_torture_boost");
|
||||
if (torture_must_stop())
|
||||
@ -843,7 +930,7 @@ static int rcu_torture_boost(void *arg)
|
||||
/* Do one boost-test interval. */
|
||||
endtime = oldstarttime + test_boost_duration * HZ;
|
||||
call_rcu_time = jiffies;
|
||||
while (ULONG_CMP_LT(jiffies, endtime)) {
|
||||
while (time_before(jiffies, endtime)) {
|
||||
/* If we don't have a callback in flight, post one. */
|
||||
if (!smp_load_acquire(&rbi.inflight)) {
|
||||
/* RCU core before ->inflight = 1. */
|
||||
@ -914,7 +1001,7 @@ rcu_torture_fqs(void *arg)
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_fqs task started");
|
||||
do {
|
||||
fqs_resume_time = jiffies + fqs_stutter * HZ;
|
||||
while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
|
||||
while (time_before(jiffies, fqs_resume_time) &&
|
||||
!kthread_should_stop()) {
|
||||
schedule_timeout_interruptible(1);
|
||||
}
|
||||
@ -1147,6 +1234,7 @@ static void rcutorture_one_extend(int *readstate, int newstate,
|
||||
struct torture_random_state *trsp,
|
||||
struct rt_read_seg *rtrsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
int idxnew = -1;
|
||||
int idxold = *readstate;
|
||||
int statesnew = ~*readstate & newstate;
|
||||
@ -1181,8 +1269,15 @@ static void rcutorture_one_extend(int *readstate, int newstate,
|
||||
rcu_read_unlock_bh();
|
||||
if (statesold & RCUTORTURE_RDR_SCHED)
|
||||
rcu_read_unlock_sched();
|
||||
if (statesold & RCUTORTURE_RDR_RCU)
|
||||
if (statesold & RCUTORTURE_RDR_RCU) {
|
||||
bool lockit = !statesnew && !(torture_random(trsp) & 0xffff);
|
||||
|
||||
if (lockit)
|
||||
raw_spin_lock_irqsave(¤t->pi_lock, flags);
|
||||
cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT);
|
||||
if (lockit)
|
||||
raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
|
||||
}
|
||||
|
||||
/* Delay if neither beginning nor end and there was a change. */
|
||||
if ((statesnew || statesold) && *readstate && newstate)
|
||||
@ -1283,6 +1378,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
|
||||
rcu_read_lock_bh_held() ||
|
||||
rcu_read_lock_sched_held() ||
|
||||
srcu_read_lock_held(srcu_ctlp) ||
|
||||
rcu_read_lock_trace_held() ||
|
||||
torturing_tasks());
|
||||
if (p == NULL) {
|
||||
/* Wait for rcu_torture_writer to get underway */
|
||||
@ -1444,9 +1540,9 @@ rcu_torture_stats_print(void)
|
||||
atomic_long_read(&n_rcu_torture_timers));
|
||||
torture_onoff_stats();
|
||||
pr_cont("barrier: %ld/%ld:%ld\n",
|
||||
n_barrier_successes,
|
||||
n_barrier_attempts,
|
||||
n_rcu_torture_barrier_error);
|
||||
data_race(n_barrier_successes),
|
||||
data_race(n_barrier_attempts),
|
||||
data_race(n_rcu_torture_barrier_error));
|
||||
|
||||
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
|
||||
if (atomic_read(&n_rcu_torture_mberror) ||
|
||||
@ -1536,6 +1632,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
|
||||
"test_boost=%d/%d test_boost_interval=%d "
|
||||
"test_boost_duration=%d shutdown_secs=%d "
|
||||
"stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d "
|
||||
"stall_cpu_block=%d "
|
||||
"n_barrier_cbs=%d "
|
||||
"onoff_interval=%d onoff_holdoff=%d\n",
|
||||
torture_type, tag, nrealreaders, nfakewriters,
|
||||
@ -1544,6 +1641,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
|
||||
test_boost, cur_ops->can_boost,
|
||||
test_boost_interval, test_boost_duration, shutdown_secs,
|
||||
stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff,
|
||||
stall_cpu_block,
|
||||
n_barrier_cbs,
|
||||
onoff_interval, onoff_holdoff);
|
||||
}
|
||||
@ -1599,6 +1697,7 @@ static int rcutorture_booster_init(unsigned int cpu)
|
||||
*/
|
||||
static int rcu_torture_stall(void *args)
|
||||
{
|
||||
int idx;
|
||||
unsigned long stop_at;
|
||||
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_stall task started");
|
||||
@ -1607,26 +1706,37 @@ static int rcu_torture_stall(void *args)
|
||||
schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff");
|
||||
}
|
||||
if (!kthread_should_stop()) {
|
||||
if (!kthread_should_stop() && stall_gp_kthread > 0) {
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_stall begin GP stall");
|
||||
rcu_gp_set_torture_wait(stall_gp_kthread * HZ);
|
||||
for (idx = 0; idx < stall_gp_kthread + 2; idx++) {
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
schedule_timeout_uninterruptible(HZ);
|
||||
}
|
||||
}
|
||||
if (!kthread_should_stop() && stall_cpu > 0) {
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_stall begin CPU stall");
|
||||
stop_at = ktime_get_seconds() + stall_cpu;
|
||||
/* RCU CPU stall is expected behavior in following code. */
|
||||
rcu_read_lock();
|
||||
idx = cur_ops->readlock();
|
||||
if (stall_cpu_irqsoff)
|
||||
local_irq_disable();
|
||||
else
|
||||
else if (!stall_cpu_block)
|
||||
preempt_disable();
|
||||
pr_alert("rcu_torture_stall start on CPU %d.\n",
|
||||
smp_processor_id());
|
||||
raw_smp_processor_id());
|
||||
while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
|
||||
stop_at))
|
||||
continue; /* Induce RCU CPU stall warning. */
|
||||
if (stall_cpu_block)
|
||||
schedule_timeout_uninterruptible(HZ);
|
||||
if (stall_cpu_irqsoff)
|
||||
local_irq_enable();
|
||||
else
|
||||
else if (!stall_cpu_block)
|
||||
preempt_enable();
|
||||
rcu_read_unlock();
|
||||
pr_alert("rcu_torture_stall end.\n");
|
||||
cur_ops->readunlock(idx);
|
||||
}
|
||||
pr_alert("rcu_torture_stall end.\n");
|
||||
torture_shutdown_absorb("rcu_torture_stall");
|
||||
while (!kthread_should_stop())
|
||||
schedule_timeout_interruptible(10 * HZ);
|
||||
@ -1636,7 +1746,7 @@ static int rcu_torture_stall(void *args)
|
||||
/* Spawn CPU-stall kthread, if stall_cpu specified. */
|
||||
static int __init rcu_torture_stall_init(void)
|
||||
{
|
||||
if (stall_cpu <= 0)
|
||||
if (stall_cpu <= 0 && stall_gp_kthread <= 0)
|
||||
return 0;
|
||||
return torture_create_kthread(rcu_torture_stall, NULL, stall_task);
|
||||
}
|
||||
@ -1692,8 +1802,8 @@ struct rcu_fwd {
|
||||
unsigned long rcu_launder_gp_seq_start;
|
||||
};
|
||||
|
||||
struct rcu_fwd *rcu_fwds;
|
||||
bool rcu_fwd_emergency_stop;
|
||||
static struct rcu_fwd *rcu_fwds;
|
||||
static bool rcu_fwd_emergency_stop;
|
||||
|
||||
static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
|
||||
{
|
||||
@ -2400,7 +2510,8 @@ rcu_torture_init(void)
|
||||
int firsterr = 0;
|
||||
static struct rcu_torture_ops *torture_ops[] = {
|
||||
&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
|
||||
&busted_srcud_ops, &tasks_ops, &trivial_ops,
|
||||
&busted_srcud_ops, &tasks_ops, &tasks_rude_ops,
|
||||
&tasks_tracing_ops, &trivial_ops,
|
||||
};
|
||||
|
||||
if (!torture_init_begin(torture_type, verbose))
|
||||
|
@ -29,6 +29,19 @@
|
||||
#include "rcu.h"
|
||||
#include "rcu_segcblist.h"
|
||||
|
||||
#ifndef data_race
|
||||
#define data_race(expr) \
|
||||
({ \
|
||||
expr; \
|
||||
})
|
||||
#endif
|
||||
#ifndef ASSERT_EXCLUSIVE_WRITER
|
||||
#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
|
||||
#endif
|
||||
#ifndef ASSERT_EXCLUSIVE_ACCESS
|
||||
#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
|
||||
#endif
|
||||
|
||||
/* Holdoff in nanoseconds for auto-expediting. */
|
||||
#define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
|
||||
static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
|
||||
@ -1268,8 +1281,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
|
||||
struct srcu_data *sdp;
|
||||
|
||||
sdp = per_cpu_ptr(ssp->sda, cpu);
|
||||
u0 = sdp->srcu_unlock_count[!idx];
|
||||
u1 = sdp->srcu_unlock_count[idx];
|
||||
u0 = data_race(sdp->srcu_unlock_count[!idx]);
|
||||
u1 = data_race(sdp->srcu_unlock_count[idx]);
|
||||
|
||||
/*
|
||||
* Make sure that a lock is always counted if the corresponding
|
||||
@ -1277,8 +1290,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
l0 = sdp->srcu_lock_count[!idx];
|
||||
l1 = sdp->srcu_lock_count[idx];
|
||||
l0 = data_race(sdp->srcu_lock_count[!idx]);
|
||||
l1 = data_race(sdp->srcu_lock_count[idx]);
|
||||
|
||||
c0 = l0 - u0;
|
||||
c1 = l1 - u1;
|
||||
|
1193
kernel/rcu/tasks.h
Normal file
1193
kernel/rcu/tasks.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -67,6 +67,19 @@
|
||||
#endif
|
||||
#define MODULE_PARAM_PREFIX "rcutree."
|
||||
|
||||
#ifndef data_race
|
||||
#define data_race(expr) \
|
||||
({ \
|
||||
expr; \
|
||||
})
|
||||
#endif
|
||||
#ifndef ASSERT_EXCLUSIVE_WRITER
|
||||
#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
|
||||
#endif
|
||||
#ifndef ASSERT_EXCLUSIVE_ACCESS
|
||||
#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
|
||||
#endif
|
||||
|
||||
/* Data structures. */
|
||||
|
||||
/*
|
||||
@ -75,9 +88,6 @@
|
||||
*/
|
||||
#define RCU_DYNTICK_CTRL_MASK 0x1
|
||||
#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
|
||||
#ifndef rcu_eqs_special_exit
|
||||
#define rcu_eqs_special_exit() do { } while (0)
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
|
||||
.dynticks_nesting = 1,
|
||||
@ -100,7 +110,7 @@ static struct rcu_state rcu_state = {
|
||||
static bool dump_tree;
|
||||
module_param(dump_tree, bool, 0444);
|
||||
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
|
||||
static bool use_softirq = 1;
|
||||
static bool use_softirq = true;
|
||||
module_param(use_softirq, bool, 0444);
|
||||
/* Control rcu_node-tree auto-balancing at boot time. */
|
||||
static bool rcu_fanout_exact;
|
||||
@ -225,9 +235,11 @@ void rcu_softirq_qs(void)
|
||||
|
||||
/*
|
||||
* Record entry into an extended quiescent state. This is only to be
|
||||
* called when not already in an extended quiescent state.
|
||||
* called when not already in an extended quiescent state, that is,
|
||||
* RCU is watching prior to the call to this function and is no longer
|
||||
* watching upon return.
|
||||
*/
|
||||
static void rcu_dynticks_eqs_enter(void)
|
||||
static noinstr void rcu_dynticks_eqs_enter(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
int seq;
|
||||
@ -237,8 +249,9 @@ static void rcu_dynticks_eqs_enter(void)
|
||||
* critical sections, and we also must force ordering with the
|
||||
* next idle sojourn.
|
||||
*/
|
||||
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
|
||||
seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
|
||||
/* Better be in an extended quiescent state! */
|
||||
// RCU is no longer watching. Better be in extended quiescent state!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
(seq & RCU_DYNTICK_CTRL_CTR));
|
||||
/* Better not have special action (TLB flush) pending! */
|
||||
@ -248,9 +261,10 @@ static void rcu_dynticks_eqs_enter(void)
|
||||
|
||||
/*
|
||||
* Record exit from an extended quiescent state. This is only to be
|
||||
* called from an extended quiescent state.
|
||||
* called from an extended quiescent state, that is, RCU is not watching
|
||||
* prior to the call to this function and is watching upon return.
|
||||
*/
|
||||
static void rcu_dynticks_eqs_exit(void)
|
||||
static noinstr void rcu_dynticks_eqs_exit(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
int seq;
|
||||
@ -261,13 +275,13 @@ static void rcu_dynticks_eqs_exit(void)
|
||||
* critical section.
|
||||
*/
|
||||
seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
|
||||
// RCU is now watching. Better not be in an extended quiescent state!
|
||||
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
!(seq & RCU_DYNTICK_CTRL_CTR));
|
||||
if (seq & RCU_DYNTICK_CTRL_MASK) {
|
||||
atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
|
||||
smp_mb__after_atomic(); /* _exit after clearing mask. */
|
||||
/* Prefer duplicate flushes to losing a flush. */
|
||||
rcu_eqs_special_exit();
|
||||
}
|
||||
}
|
||||
|
||||
@ -295,7 +309,7 @@ static void rcu_dynticks_eqs_online(void)
|
||||
*
|
||||
* No ordering, as we are sampling CPU-local information.
|
||||
*/
|
||||
static bool rcu_dynticks_curr_cpu_in_eqs(void)
|
||||
static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
@ -332,6 +346,28 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
|
||||
return snap != rcu_dynticks_snap(rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the referenced integer is zero while the specified
|
||||
* CPU remains within a single extended quiescent state.
|
||||
*/
|
||||
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
|
||||
{
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
int snap;
|
||||
|
||||
// If not quiescent, force back to earlier extended quiescent state.
|
||||
snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK |
|
||||
RCU_DYNTICK_CTRL_CTR);
|
||||
|
||||
smp_rmb(); // Order ->dynticks and *vp reads.
|
||||
if (READ_ONCE(*vp))
|
||||
return false; // Non-zero, so report failure;
|
||||
smp_rmb(); // Order *vp read and ->dynticks re-read.
|
||||
|
||||
// If still in the same extended quiescent state, we are good!
|
||||
return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the special (bottom) bit of the specified CPU so that it
|
||||
* will take special action (such as flushing its TLB) on the
|
||||
@ -382,16 +418,23 @@ void rcu_momentary_dyntick_idle(void)
|
||||
EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
|
||||
|
||||
/**
|
||||
* rcu_is_cpu_rrupt_from_idle - see if interrupted from idle
|
||||
* rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
|
||||
*
|
||||
* If the current CPU is idle and running at a first-level (not nested)
|
||||
* interrupt from idle, return true. The caller must have at least
|
||||
* disabled preemption.
|
||||
* interrupt, or directly, from idle, return true.
|
||||
*
|
||||
* The caller must have at least disabled IRQs.
|
||||
*/
|
||||
static int rcu_is_cpu_rrupt_from_idle(void)
|
||||
{
|
||||
/* Called only from within the scheduling-clock interrupt */
|
||||
lockdep_assert_in_irq();
|
||||
long nesting;
|
||||
|
||||
/*
|
||||
* Usually called from the tick; but also used from smp_function_call()
|
||||
* for expedited grace periods. This latter can result in running from
|
||||
* the idle task, instead of an actual IPI.
|
||||
*/
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* Check for counter underflows */
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
|
||||
@ -400,9 +443,15 @@ static int rcu_is_cpu_rrupt_from_idle(void)
|
||||
"RCU dynticks_nmi_nesting counter underflow/zero!");
|
||||
|
||||
/* Are we at first interrupt nesting level? */
|
||||
if (__this_cpu_read(rcu_data.dynticks_nmi_nesting) != 1)
|
||||
nesting = __this_cpu_read(rcu_data.dynticks_nmi_nesting);
|
||||
if (nesting > 1)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we're not in an interrupt, we must be in the idle task!
|
||||
*/
|
||||
WARN_ON_ONCE(!nesting && !is_idle_task(current));
|
||||
|
||||
/* Does CPU appear to be idle from an RCU standpoint? */
|
||||
return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
|
||||
}
|
||||
@ -562,7 +611,7 @@ EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
|
||||
* the possibility of usermode upcalls having messed up our count
|
||||
* of interrupt nesting level during the prior busy period.
|
||||
*/
|
||||
static void rcu_eqs_enter(bool user)
|
||||
static noinstr void rcu_eqs_enter(bool user)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
@ -571,19 +620,24 @@ static void rcu_eqs_enter(bool user)
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
rdp->dynticks_nesting == 0);
|
||||
if (rdp->dynticks_nesting != 1) {
|
||||
// RCU will still be watching, so just do accounting and leave.
|
||||
rdp->dynticks_nesting--;
|
||||
return;
|
||||
}
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
instrumentation_begin();
|
||||
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
do_nocb_deferred_wakeup(rdp);
|
||||
rcu_prepare_for_idle();
|
||||
rcu_preempt_deferred_qs(current);
|
||||
instrumentation_end();
|
||||
WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
|
||||
// RCU is watching here ...
|
||||
rcu_dynticks_eqs_enter();
|
||||
// ... but is no longer watching here.
|
||||
rcu_dynticks_task_enter();
|
||||
}
|
||||
|
||||
@ -616,23 +670,25 @@ void rcu_idle_enter(void)
|
||||
* If you add or remove a call to rcu_user_enter(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_user_enter(void)
|
||||
noinstr void rcu_user_enter(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
rcu_eqs_enter(true);
|
||||
}
|
||||
#endif /* CONFIG_NO_HZ_FULL */
|
||||
|
||||
/*
|
||||
/**
|
||||
* rcu_nmi_exit - inform RCU of exit from NMI context
|
||||
*
|
||||
* If we are returning from the outermost NMI handler that interrupted an
|
||||
* RCU-idle period, update rdp->dynticks and rdp->dynticks_nmi_nesting
|
||||
* to let the RCU grace-period handling know that the CPU is back to
|
||||
* being RCU-idle.
|
||||
*
|
||||
* If you add or remove a call to rcu_nmi_exit_common(), be sure to test
|
||||
* If you add or remove a call to rcu_nmi_exit(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
static __always_inline void rcu_nmi_exit_common(bool irq)
|
||||
noinstr void rcu_nmi_exit(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
@ -649,37 +705,32 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
|
||||
* leave it in non-RCU-idle state.
|
||||
*/
|
||||
if (rdp->dynticks_nmi_nesting != 1) {
|
||||
instrumentation_begin();
|
||||
trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
|
||||
atomic_read(&rdp->dynticks));
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
|
||||
rdp->dynticks_nmi_nesting - 2);
|
||||
instrumentation_end();
|
||||
return;
|
||||
}
|
||||
|
||||
instrumentation_begin();
|
||||
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
|
||||
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
|
||||
|
||||
if (irq)
|
||||
if (!in_nmi())
|
||||
rcu_prepare_for_idle();
|
||||
instrumentation_end();
|
||||
|
||||
// RCU is watching here ...
|
||||
rcu_dynticks_eqs_enter();
|
||||
// ... but is no longer watching here.
|
||||
|
||||
if (irq)
|
||||
if (!in_nmi())
|
||||
rcu_dynticks_task_enter();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_nmi_exit - inform RCU of exit from NMI context
|
||||
*
|
||||
* If you add or remove a call to rcu_nmi_exit(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_nmi_exit(void)
|
||||
{
|
||||
rcu_nmi_exit_common(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
|
||||
*
|
||||
@ -699,12 +750,52 @@ void rcu_nmi_exit(void)
|
||||
* If you add or remove a call to rcu_irq_exit(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_irq_exit(void)
|
||||
void noinstr rcu_irq_exit(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
rcu_nmi_exit_common(true);
|
||||
rcu_nmi_exit();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_irq_exit_preempt - Inform RCU that current CPU is exiting irq
|
||||
* towards in kernel preemption
|
||||
*
|
||||
* Same as rcu_irq_exit() but has a sanity check that scheduling is safe
|
||||
* from RCU point of view. Invoked from return from interrupt before kernel
|
||||
* preemption.
|
||||
*/
|
||||
void rcu_irq_exit_preempt(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
rcu_nmi_exit();
|
||||
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,
|
||||
"RCU dynticks_nesting counter underflow/zero!");
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=
|
||||
DYNTICK_IRQ_NONIDLE,
|
||||
"Bad RCU dynticks_nmi_nesting counter\n");
|
||||
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
|
||||
"RCU in extended quiescent state!");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
/**
|
||||
* rcu_irq_exit_check_preempt - Validate that scheduling is possible
|
||||
*/
|
||||
void rcu_irq_exit_check_preempt(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,
|
||||
"RCU dynticks_nesting counter underflow/zero!");
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=
|
||||
DYNTICK_IRQ_NONIDLE,
|
||||
"Bad RCU dynticks_nmi_nesting counter\n");
|
||||
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
|
||||
"RCU in extended quiescent state!");
|
||||
}
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU */
|
||||
|
||||
/*
|
||||
* Wrapper for rcu_irq_exit() where interrupts are enabled.
|
||||
*
|
||||
@ -728,7 +819,7 @@ void rcu_irq_exit_irqson(void)
|
||||
* allow for the possibility of usermode upcalls messing up our count of
|
||||
* interrupt nesting level during the busy period that is just now starting.
|
||||
*/
|
||||
static void rcu_eqs_exit(bool user)
|
||||
static void noinstr rcu_eqs_exit(bool user)
|
||||
{
|
||||
struct rcu_data *rdp;
|
||||
long oldval;
|
||||
@ -738,17 +829,22 @@ static void rcu_eqs_exit(bool user)
|
||||
oldval = rdp->dynticks_nesting;
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
|
||||
if (oldval) {
|
||||
// RCU was already watching, so just do accounting and leave.
|
||||
rdp->dynticks_nesting++;
|
||||
return;
|
||||
}
|
||||
rcu_dynticks_task_exit();
|
||||
// RCU is not watching here ...
|
||||
rcu_dynticks_eqs_exit();
|
||||
// ... but is watching here.
|
||||
instrumentation_begin();
|
||||
rcu_cleanup_after_idle();
|
||||
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
WRITE_ONCE(rdp->dynticks_nesting, 1);
|
||||
WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -779,14 +875,75 @@ void rcu_idle_exit(void)
|
||||
* If you add or remove a call to rcu_user_exit(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_user_exit(void)
|
||||
void noinstr rcu_user_exit(void)
|
||||
{
|
||||
rcu_eqs_exit(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
|
||||
*
|
||||
* The scheduler tick is not normally enabled when CPUs enter the kernel
|
||||
* from nohz_full userspace execution. After all, nohz_full userspace
|
||||
* execution is an RCU quiescent state and the time executing in the kernel
|
||||
* is quite short. Except of course when it isn't. And it is not hard to
|
||||
* cause a large system to spend tens of seconds or even minutes looping
|
||||
* in the kernel, which can cause a number of problems, include RCU CPU
|
||||
* stall warnings.
|
||||
*
|
||||
* Therefore, if a nohz_full CPU fails to report a quiescent state
|
||||
* in a timely manner, the RCU grace-period kthread sets that CPU's
|
||||
* ->rcu_urgent_qs flag with the expectation that the next interrupt or
|
||||
* exception will invoke this function, which will turn on the scheduler
|
||||
* tick, which will enable RCU to detect that CPU's quiescent states,
|
||||
* for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
|
||||
* The tick will be disabled once a quiescent state is reported for
|
||||
* this CPU.
|
||||
*
|
||||
* Of course, in carefully tuned systems, there might never be an
|
||||
* interrupt or exception. In that case, the RCU grace-period kthread
|
||||
* will eventually cause one to happen. However, in less carefully
|
||||
* controlled environments, this function allows RCU to get what it
|
||||
* needs without creating otherwise useless interruptions.
|
||||
*/
|
||||
void __rcu_irq_enter_check_tick(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
// Enabling the tick is unsafe in NMI handlers.
|
||||
if (WARN_ON_ONCE(in_nmi()))
|
||||
return;
|
||||
|
||||
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
|
||||
"Illegal rcu_irq_enter_check_tick() from extended quiescent state");
|
||||
|
||||
if (!tick_nohz_full_cpu(rdp->cpu) ||
|
||||
!READ_ONCE(rdp->rcu_urgent_qs) ||
|
||||
READ_ONCE(rdp->rcu_forced_tick)) {
|
||||
// RCU doesn't need nohz_full help from this CPU, or it is
|
||||
// already getting that help.
|
||||
return;
|
||||
}
|
||||
|
||||
// We get here only when not in an extended quiescent state and
|
||||
// from interrupts (as opposed to NMIs). Therefore, (1) RCU is
|
||||
// already watching and (2) The fact that we are in an interrupt
|
||||
// handler and that the rcu_node lock is an irq-disabled lock
|
||||
// prevents self-deadlock. So we can safely recheck under the lock.
|
||||
// Note that the nohz_full state currently cannot change.
|
||||
raw_spin_lock_rcu_node(rdp->mynode);
|
||||
if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
|
||||
// A nohz_full CPU is in the kernel and RCU needs a
|
||||
// quiescent state. Turn on the tick!
|
||||
WRITE_ONCE(rdp->rcu_forced_tick, true);
|
||||
tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
|
||||
}
|
||||
raw_spin_unlock_rcu_node(rdp->mynode);
|
||||
}
|
||||
#endif /* CONFIG_NO_HZ_FULL */
|
||||
|
||||
/**
|
||||
* rcu_nmi_enter_common - inform RCU of entry to NMI context
|
||||
* rcu_nmi_enter - inform RCU of entry to NMI context
|
||||
* @irq: Is this call from rcu_irq_enter?
|
||||
*
|
||||
* If the CPU was idle from RCU's viewpoint, update rdp->dynticks and
|
||||
@ -795,10 +952,10 @@ void rcu_user_exit(void)
|
||||
* long as the nesting level does not overflow an int. (You will probably
|
||||
* run out of stack space first.)
|
||||
*
|
||||
* If you add or remove a call to rcu_nmi_enter_common(), be sure to test
|
||||
* If you add or remove a call to rcu_nmi_enter(), be sure to test
|
||||
* with CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
static __always_inline void rcu_nmi_enter_common(bool irq)
|
||||
noinstr void rcu_nmi_enter(void)
|
||||
{
|
||||
long incby = 2;
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
@ -816,44 +973,32 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
|
||||
*/
|
||||
if (rcu_dynticks_curr_cpu_in_eqs()) {
|
||||
|
||||
if (irq)
|
||||
if (!in_nmi())
|
||||
rcu_dynticks_task_exit();
|
||||
|
||||
// RCU is not watching here ...
|
||||
rcu_dynticks_eqs_exit();
|
||||
// ... but is watching here.
|
||||
|
||||
if (irq)
|
||||
if (!in_nmi())
|
||||
rcu_cleanup_after_idle();
|
||||
|
||||
incby = 1;
|
||||
} else if (irq && tick_nohz_full_cpu(rdp->cpu) &&
|
||||
rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
|
||||
READ_ONCE(rdp->rcu_urgent_qs) &&
|
||||
!READ_ONCE(rdp->rcu_forced_tick)) {
|
||||
raw_spin_lock_rcu_node(rdp->mynode);
|
||||
// Recheck under lock.
|
||||
if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
|
||||
WRITE_ONCE(rdp->rcu_forced_tick, true);
|
||||
tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
|
||||
}
|
||||
raw_spin_unlock_rcu_node(rdp->mynode);
|
||||
} else if (!in_nmi()) {
|
||||
instrumentation_begin();
|
||||
rcu_irq_enter_check_tick();
|
||||
instrumentation_end();
|
||||
}
|
||||
instrumentation_begin();
|
||||
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
|
||||
rdp->dynticks_nmi_nesting,
|
||||
rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
|
||||
instrumentation_end();
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
|
||||
rdp->dynticks_nmi_nesting + incby);
|
||||
barrier();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_nmi_enter - inform RCU of entry to NMI context
|
||||
*/
|
||||
void rcu_nmi_enter(void)
|
||||
{
|
||||
rcu_nmi_enter_common(false);
|
||||
}
|
||||
NOKPROBE_SYMBOL(rcu_nmi_enter);
|
||||
|
||||
/**
|
||||
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
|
||||
*
|
||||
@ -876,10 +1021,10 @@ NOKPROBE_SYMBOL(rcu_nmi_enter);
|
||||
* If you add or remove a call to rcu_irq_enter(), be sure to test with
|
||||
* CONFIG_RCU_EQS_DEBUG=y.
|
||||
*/
|
||||
void rcu_irq_enter(void)
|
||||
noinstr void rcu_irq_enter(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
rcu_nmi_enter_common(true);
|
||||
rcu_nmi_enter();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -913,6 +1058,11 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
|
||||
}
|
||||
}
|
||||
|
||||
noinstr bool __rcu_is_watching(void)
|
||||
{
|
||||
return !rcu_dynticks_curr_cpu_in_eqs();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_is_watching - see if RCU thinks that the current CPU is not idle
|
||||
*
|
||||
@ -921,7 +1071,7 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
|
||||
* if the current CPU is not in its idle loop or is in an interrupt or
|
||||
* NMI handler, return true.
|
||||
*/
|
||||
bool notrace rcu_is_watching(void)
|
||||
bool rcu_is_watching(void)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
@ -973,12 +1123,12 @@ bool rcu_lockdep_current_cpu_online(void)
|
||||
|
||||
if (in_nmi() || !rcu_scheduler_fully_active)
|
||||
return true;
|
||||
preempt_disable();
|
||||
preempt_disable_notrace();
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
rnp = rdp->mynode;
|
||||
if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
|
||||
ret = true;
|
||||
preempt_enable();
|
||||
preempt_enable_notrace();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
|
||||
@ -1217,7 +1367,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
|
||||
trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
|
||||
goto unlock_out;
|
||||
}
|
||||
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
|
||||
trace_rcu_grace_period(rcu_state.name, data_race(rcu_state.gp_seq), TPS("newreq"));
|
||||
ret = true; /* Caller must wake GP kthread. */
|
||||
unlock_out:
|
||||
/* Push furthest requested GP to leaf node and rcu_data structure. */
|
||||
@ -1473,6 +1623,31 @@ static void rcu_gp_slow(int delay)
|
||||
schedule_timeout_uninterruptible(delay);
|
||||
}
|
||||
|
||||
static unsigned long sleep_duration;
|
||||
|
||||
/* Allow rcutorture to stall the grace-period kthread. */
|
||||
void rcu_gp_set_torture_wait(int duration)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0)
|
||||
WRITE_ONCE(sleep_duration, duration);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait);
|
||||
|
||||
/* Actually implement the aforementioned wait. */
|
||||
static void rcu_gp_torture_wait(void)
|
||||
{
|
||||
unsigned long duration;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST))
|
||||
return;
|
||||
duration = xchg(&sleep_duration, 0UL);
|
||||
if (duration > 0) {
|
||||
pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);
|
||||
schedule_timeout_uninterruptible(duration);
|
||||
pr_alert("%s: Wait complete\n", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a new grace period. Return false if no grace period required.
|
||||
*/
|
||||
@ -1506,6 +1681,7 @@ static bool rcu_gp_init(void)
|
||||
record_gp_stall_check_time();
|
||||
/* Record GP times before starting GP, hence rcu_seq_start(). */
|
||||
rcu_seq_start(&rcu_state.gp_seq);
|
||||
ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
|
||||
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
|
||||
@ -1611,12 +1787,16 @@ static bool rcu_gp_fqs_check_wake(int *gfp)
|
||||
{
|
||||
struct rcu_node *rnp = rcu_get_root();
|
||||
|
||||
/* Someone like call_rcu() requested a force-quiescent-state scan. */
|
||||
// If under overload conditions, force an immediate FQS scan.
|
||||
if (*gfp & RCU_GP_FLAG_OVLD)
|
||||
return true;
|
||||
|
||||
// Someone like call_rcu() requested a force-quiescent-state scan.
|
||||
*gfp = READ_ONCE(rcu_state.gp_flags);
|
||||
if (*gfp & RCU_GP_FLAG_FQS)
|
||||
return true;
|
||||
|
||||
/* The current grace period has completed. */
|
||||
// The current grace period has completed.
|
||||
if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
|
||||
return true;
|
||||
|
||||
@ -1654,13 +1834,15 @@ static void rcu_gp_fqs(bool first_time)
|
||||
static void rcu_gp_fqs_loop(void)
|
||||
{
|
||||
bool first_gp_fqs;
|
||||
int gf;
|
||||
int gf = 0;
|
||||
unsigned long j;
|
||||
int ret;
|
||||
struct rcu_node *rnp = rcu_get_root();
|
||||
|
||||
first_gp_fqs = true;
|
||||
j = READ_ONCE(jiffies_till_first_fqs);
|
||||
if (rcu_state.cbovld)
|
||||
gf = RCU_GP_FLAG_OVLD;
|
||||
ret = 0;
|
||||
for (;;) {
|
||||
if (!ret) {
|
||||
@ -1673,6 +1855,7 @@ static void rcu_gp_fqs_loop(void)
|
||||
rcu_state.gp_state = RCU_GP_WAIT_FQS;
|
||||
ret = swait_event_idle_timeout_exclusive(
|
||||
rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
|
||||
rcu_gp_torture_wait();
|
||||
rcu_state.gp_state = RCU_GP_DOING_FQS;
|
||||
/* Locking provides needed memory barriers. */
|
||||
/* If grace period done, leave loop. */
|
||||
@ -1680,12 +1863,16 @@ static void rcu_gp_fqs_loop(void)
|
||||
!rcu_preempt_blocked_readers_cgp(rnp))
|
||||
break;
|
||||
/* If time for quiescent-state forcing, do it. */
|
||||
if (ULONG_CMP_GE(jiffies, rcu_state.jiffies_force_qs) ||
|
||||
if (!time_after(rcu_state.jiffies_force_qs, jiffies) ||
|
||||
(gf & RCU_GP_FLAG_FQS)) {
|
||||
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
|
||||
TPS("fqsstart"));
|
||||
rcu_gp_fqs(first_gp_fqs);
|
||||
first_gp_fqs = false;
|
||||
gf = 0;
|
||||
if (first_gp_fqs) {
|
||||
first_gp_fqs = false;
|
||||
gf = rcu_state.cbovld ? RCU_GP_FLAG_OVLD : 0;
|
||||
}
|
||||
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
|
||||
TPS("fqsend"));
|
||||
cond_resched_tasks_rcu_qs();
|
||||
@ -1705,6 +1892,7 @@ static void rcu_gp_fqs_loop(void)
|
||||
j = 1;
|
||||
else
|
||||
j = rcu_state.jiffies_force_qs - j;
|
||||
gf = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1781,6 +1969,7 @@ static void rcu_gp_cleanup(void)
|
||||
/* Declare grace period done, trace first to use old GP number. */
|
||||
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
|
||||
rcu_seq_end(&rcu_state.gp_seq);
|
||||
ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
|
||||
rcu_state.gp_state = RCU_GP_IDLE;
|
||||
/* Check for GP requests since above loop. */
|
||||
rdp = this_cpu_ptr(&rcu_data);
|
||||
@ -1821,6 +2010,7 @@ static int __noreturn rcu_gp_kthread(void *unused)
|
||||
swait_event_idle_exclusive(rcu_state.gp_wq,
|
||||
READ_ONCE(rcu_state.gp_flags) &
|
||||
RCU_GP_FLAG_INIT);
|
||||
rcu_gp_torture_wait();
|
||||
rcu_state.gp_state = RCU_GP_DONE_GPS;
|
||||
/* Locking provides needed memory barrier. */
|
||||
if (rcu_gp_init())
|
||||
@ -2811,6 +3001,8 @@ struct kfree_rcu_cpu {
|
||||
struct delayed_work monitor_work;
|
||||
bool monitor_todo;
|
||||
bool initialized;
|
||||
// Number of objects for which GP not started
|
||||
int count;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
|
||||
@ -2924,6 +3116,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
|
||||
krcp->head = NULL;
|
||||
}
|
||||
|
||||
WRITE_ONCE(krcp->count, 0);
|
||||
|
||||
/*
|
||||
* One work is per one batch, so there are two "free channels",
|
||||
* "bhead_free" and "head_free" the batch can handle. It can be
|
||||
@ -3060,6 +3254,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
krcp->head = head;
|
||||
}
|
||||
|
||||
WRITE_ONCE(krcp->count, krcp->count + 1);
|
||||
|
||||
// Set timer to drain after KFREE_DRAIN_JIFFIES.
|
||||
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
|
||||
!krcp->monitor_todo) {
|
||||
@ -3074,6 +3270,56 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kfree_call_rcu);
|
||||
|
||||
static unsigned long
|
||||
kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long count = 0;
|
||||
|
||||
/* Snapshot count of all CPUs */
|
||||
for_each_online_cpu(cpu) {
|
||||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
count += READ_ONCE(krcp->count);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
int cpu, freed = 0;
|
||||
unsigned long flags;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
int count;
|
||||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
count = krcp->count;
|
||||
spin_lock_irqsave(&krcp->lock, flags);
|
||||
if (krcp->monitor_todo)
|
||||
kfree_rcu_drain_unlock(krcp, flags);
|
||||
else
|
||||
spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
|
||||
sc->nr_to_scan -= count;
|
||||
freed += count;
|
||||
|
||||
if (sc->nr_to_scan <= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return freed;
|
||||
}
|
||||
|
||||
static struct shrinker kfree_rcu_shrinker = {
|
||||
.count_objects = kfree_rcu_shrink_count,
|
||||
.scan_objects = kfree_rcu_shrink_scan,
|
||||
.batch = 0,
|
||||
.seeks = DEFAULT_SEEKS,
|
||||
};
|
||||
|
||||
void __init kfree_rcu_scheduler_running(void)
|
||||
{
|
||||
int cpu;
|
||||
@ -3599,6 +3845,7 @@ void rcu_cpu_starting(unsigned int cpu)
|
||||
nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
|
||||
/* Allow lockless access for expedited grace periods. */
|
||||
smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + nbits); /* ^^^ */
|
||||
ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus);
|
||||
rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
|
||||
rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
|
||||
rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
|
||||
@ -3994,6 +4241,8 @@ static void __init kfree_rcu_batch_init(void)
|
||||
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
|
||||
krcp->initialized = true;
|
||||
}
|
||||
if (register_shrinker(&kfree_rcu_shrinker))
|
||||
pr_err("Failed to register kfree_rcu() shrinker!\n");
|
||||
}
|
||||
|
||||
void __init rcu_init(void)
|
||||
|
@ -359,6 +359,7 @@ struct rcu_state {
|
||||
/* Values for rcu_state structure's gp_flags field. */
|
||||
#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */
|
||||
#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
|
||||
#define RCU_GP_FLAG_OVLD 0x4 /* Experiencing callback overload. */
|
||||
|
||||
/* Values for rcu_state structure's gp_state field. */
|
||||
#define RCU_GP_IDLE 0 /* Initial state and no GP in progress. */
|
||||
@ -454,6 +455,8 @@ static void rcu_bind_gp_kthread(void);
|
||||
static bool rcu_nohz_full_cpu(void);
|
||||
static void rcu_dynticks_task_enter(void);
|
||||
static void rcu_dynticks_task_exit(void);
|
||||
static void rcu_dynticks_task_trace_enter(void);
|
||||
static void rcu_dynticks_task_trace_exit(void);
|
||||
|
||||
/* Forward declarations for tree_stall.h */
|
||||
static void record_gp_stall_check_time(void);
|
||||
|
@ -150,7 +150,7 @@ static void __maybe_unused sync_exp_reset_tree(void)
|
||||
static bool sync_rcu_exp_done(struct rcu_node *rnp)
|
||||
{
|
||||
raw_lockdep_assert_held_rcu_node(rnp);
|
||||
return rnp->exp_tasks == NULL &&
|
||||
return READ_ONCE(rnp->exp_tasks) == NULL &&
|
||||
READ_ONCE(rnp->expmask) == 0;
|
||||
}
|
||||
|
||||
@ -373,7 +373,7 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
|
||||
* until such time as the ->expmask bits are cleared.
|
||||
*/
|
||||
if (rcu_preempt_has_tasks(rnp))
|
||||
rnp->exp_tasks = rnp->blkd_tasks.next;
|
||||
WRITE_ONCE(rnp->exp_tasks, rnp->blkd_tasks.next);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
|
||||
/* IPI the remaining CPUs for expedited quiescent state. */
|
||||
@ -542,8 +542,8 @@ static void synchronize_rcu_expedited_wait(void)
|
||||
}
|
||||
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
|
||||
jiffies - jiffies_start, rcu_state.expedited_sequence,
|
||||
READ_ONCE(rnp_root->expmask),
|
||||
".T"[!!rnp_root->exp_tasks]);
|
||||
data_race(rnp_root->expmask),
|
||||
".T"[!!data_race(rnp_root->exp_tasks)]);
|
||||
if (ndetected) {
|
||||
pr_err("blocking rcu_node structures:");
|
||||
rcu_for_each_node_breadth_first(rnp) {
|
||||
@ -553,8 +553,8 @@ static void synchronize_rcu_expedited_wait(void)
|
||||
continue;
|
||||
pr_cont(" l=%u:%d-%d:%#lx/%c",
|
||||
rnp->level, rnp->grplo, rnp->grphi,
|
||||
READ_ONCE(rnp->expmask),
|
||||
".T"[!!rnp->exp_tasks]);
|
||||
data_race(rnp->expmask),
|
||||
".T"[!!data_race(rnp->exp_tasks)]);
|
||||
}
|
||||
pr_cont("\n");
|
||||
}
|
||||
@ -639,6 +639,7 @@ static void wait_rcu_exp_gp(struct work_struct *wp)
|
||||
*/
|
||||
static void rcu_exp_handler(void *unused)
|
||||
{
|
||||
int depth = rcu_preempt_depth();
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
@ -649,7 +650,7 @@ static void rcu_exp_handler(void *unused)
|
||||
* critical section. If also enabled or idle, immediately
|
||||
* report the quiescent state, otherwise defer.
|
||||
*/
|
||||
if (!rcu_preempt_depth()) {
|
||||
if (!depth) {
|
||||
if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
|
||||
rcu_dynticks_curr_cpu_in_eqs()) {
|
||||
rcu_report_exp_rdp(rdp);
|
||||
@ -673,7 +674,7 @@ static void rcu_exp_handler(void *unused)
|
||||
* can have caused this quiescent state to already have been
|
||||
* reported, so we really do need to check ->expmask.
|
||||
*/
|
||||
if (rcu_preempt_depth() > 0) {
|
||||
if (depth > 0) {
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (rnp->expmask & rdp->grpmask) {
|
||||
rdp->exp_deferred_qs = true;
|
||||
@ -683,30 +684,8 @@ static void rcu_exp_handler(void *unused)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The final and least likely case is where the interrupted
|
||||
* code was just about to or just finished exiting the RCU-preempt
|
||||
* read-side critical section, and no, we can't tell which.
|
||||
* So either way, set ->deferred_qs to flag later code that
|
||||
* a quiescent state is required.
|
||||
*
|
||||
* If the CPU is fully enabled (or if some buggy RCU-preempt
|
||||
* read-side critical section is being used from idle), just
|
||||
* invoke rcu_preempt_deferred_qs() to immediately report the
|
||||
* quiescent state. We cannot use rcu_read_unlock_special()
|
||||
* because we are in an interrupt handler, which will cause that
|
||||
* function to take an early exit without doing anything.
|
||||
*
|
||||
* Otherwise, force a context switch after the CPU enables everything.
|
||||
*/
|
||||
rdp->exp_deferred_qs = true;
|
||||
if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
|
||||
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) {
|
||||
rcu_preempt_deferred_qs(t);
|
||||
} else {
|
||||
set_tsk_need_resched(t);
|
||||
set_preempt_need_resched();
|
||||
}
|
||||
// Finally, negative nesting depth should not happen.
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
/* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */
|
||||
@ -721,17 +700,20 @@ static void sync_sched_exp_online_cleanup(int cpu)
|
||||
*/
|
||||
static int rcu_print_task_exp_stall(struct rcu_node *rnp)
|
||||
{
|
||||
struct task_struct *t;
|
||||
unsigned long flags;
|
||||
int ndetected = 0;
|
||||
struct task_struct *t;
|
||||
|
||||
if (!rnp->exp_tasks)
|
||||
if (!READ_ONCE(rnp->exp_tasks))
|
||||
return 0;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
t = list_entry(rnp->exp_tasks->prev,
|
||||
struct task_struct, rcu_node_entry);
|
||||
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
|
||||
pr_cont(" P%d", t->pid);
|
||||
ndetected++;
|
||||
}
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return ndetected;
|
||||
}
|
||||
|
||||
|
@ -226,7 +226,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
|
||||
}
|
||||
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
|
||||
rnp->exp_tasks = &t->rcu_node_entry;
|
||||
WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
|
||||
WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
|
||||
!(rnp->qsmask & rdp->grpmask));
|
||||
WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
|
||||
@ -331,6 +331,7 @@ void rcu_note_context_switch(bool preempt)
|
||||
rcu_qs();
|
||||
if (rdp->exp_deferred_qs)
|
||||
rcu_report_exp_rdp(rdp);
|
||||
rcu_tasks_qs(current, preempt);
|
||||
trace_rcu_utilization(TPS("End context switch"));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
||||
@ -345,9 +346,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
|
||||
return READ_ONCE(rnp->gp_tasks) != NULL;
|
||||
}
|
||||
|
||||
/* Bias and limit values for ->rcu_read_lock_nesting. */
|
||||
#define RCU_NEST_BIAS INT_MAX
|
||||
#define RCU_NEST_NMAX (-INT_MAX / 2)
|
||||
/* limit value for ->rcu_read_lock_nesting. */
|
||||
#define RCU_NEST_PMAX (INT_MAX / 2)
|
||||
|
||||
static void rcu_preempt_read_enter(void)
|
||||
@ -355,9 +354,9 @@ static void rcu_preempt_read_enter(void)
|
||||
current->rcu_read_lock_nesting++;
|
||||
}
|
||||
|
||||
static void rcu_preempt_read_exit(void)
|
||||
static int rcu_preempt_read_exit(void)
|
||||
{
|
||||
current->rcu_read_lock_nesting--;
|
||||
return --current->rcu_read_lock_nesting;
|
||||
}
|
||||
|
||||
static void rcu_preempt_depth_set(int val)
|
||||
@ -390,21 +389,15 @@ void __rcu_read_unlock(void)
|
||||
{
|
||||
struct task_struct *t = current;
|
||||
|
||||
if (rcu_preempt_depth() != 1) {
|
||||
rcu_preempt_read_exit();
|
||||
} else {
|
||||
if (rcu_preempt_read_exit() == 0) {
|
||||
barrier(); /* critical section before exit code. */
|
||||
rcu_preempt_depth_set(-RCU_NEST_BIAS);
|
||||
barrier(); /* assign before ->rcu_read_unlock_special load */
|
||||
if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
|
||||
rcu_read_unlock_special(t);
|
||||
barrier(); /* ->rcu_read_unlock_special load before assign */
|
||||
rcu_preempt_depth_set(0);
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
|
||||
int rrln = rcu_preempt_depth();
|
||||
|
||||
WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX);
|
||||
WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
|
||||
@ -500,12 +493,12 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
if (&t->rcu_node_entry == rnp->gp_tasks)
|
||||
WRITE_ONCE(rnp->gp_tasks, np);
|
||||
if (&t->rcu_node_entry == rnp->exp_tasks)
|
||||
rnp->exp_tasks = np;
|
||||
WRITE_ONCE(rnp->exp_tasks, np);
|
||||
if (IS_ENABLED(CONFIG_RCU_BOOST)) {
|
||||
/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
|
||||
drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
|
||||
if (&t->rcu_node_entry == rnp->boost_tasks)
|
||||
rnp->boost_tasks = np;
|
||||
WRITE_ONCE(rnp->boost_tasks, np);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -556,7 +549,7 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
|
||||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
|
||||
rcu_preempt_depth() <= 0;
|
||||
rcu_preempt_depth() == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -569,16 +562,11 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
static void rcu_preempt_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool couldrecurse = rcu_preempt_depth() >= 0;
|
||||
|
||||
if (!rcu_preempt_need_deferred_qs(t))
|
||||
return;
|
||||
if (couldrecurse)
|
||||
rcu_preempt_depth_set(rcu_preempt_depth() - RCU_NEST_BIAS);
|
||||
local_irq_save(flags);
|
||||
rcu_preempt_deferred_qs_irqrestore(t, flags);
|
||||
if (couldrecurse)
|
||||
rcu_preempt_depth_set(rcu_preempt_depth() + RCU_NEST_BIAS);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -615,19 +603,18 @@ static void rcu_read_unlock_special(struct task_struct *t)
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
|
||||
exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
|
||||
(rdp->grpmask & READ_ONCE(rnp->expmask)) ||
|
||||
tick_nohz_full_cpu(rdp->cpu);
|
||||
exp = (t->rcu_blocked_node &&
|
||||
READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
|
||||
(rdp->grpmask & READ_ONCE(rnp->expmask));
|
||||
// Need to defer quiescent state until everything is enabled.
|
||||
if (irqs_were_disabled && use_softirq &&
|
||||
(in_interrupt() ||
|
||||
(exp && !t->rcu_read_unlock_special.b.deferred_qs))) {
|
||||
// Using softirq, safe to awaken, and we get
|
||||
// no help from enabling irqs, unlike bh/preempt.
|
||||
if (use_softirq && (in_irq() || (exp && !irqs_were_disabled))) {
|
||||
// Using softirq, safe to awaken, and either the
|
||||
// wakeup is free or there is an expedited GP.
|
||||
raise_softirq_irqoff(RCU_SOFTIRQ);
|
||||
} else {
|
||||
// Enabling BH or preempt does reschedule, so...
|
||||
// Also if no expediting or NO_HZ_FULL, slow is OK.
|
||||
// Also if no expediting, slow is OK.
|
||||
// Plus nohz_full CPUs eventually get tick enabled.
|
||||
set_tsk_need_resched(current);
|
||||
set_preempt_need_resched();
|
||||
if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
|
||||
@ -640,7 +627,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
|
||||
irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
|
||||
}
|
||||
}
|
||||
t->rcu_read_unlock_special.b.deferred_qs = true;
|
||||
local_irq_restore(flags);
|
||||
return;
|
||||
}
|
||||
@ -699,7 +685,7 @@ static void rcu_flavor_sched_clock_irq(int user)
|
||||
} else if (rcu_preempt_need_deferred_qs(t)) {
|
||||
rcu_preempt_deferred_qs(t); /* Report deferred QS. */
|
||||
return;
|
||||
} else if (!rcu_preempt_depth()) {
|
||||
} else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
|
||||
rcu_qs(); /* Report immediate QS. */
|
||||
return;
|
||||
}
|
||||
@ -760,8 +746,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
|
||||
pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
|
||||
__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
|
||||
pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
|
||||
__func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,
|
||||
rnp->exp_tasks);
|
||||
__func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
|
||||
READ_ONCE(rnp->exp_tasks));
|
||||
pr_info("%s: ->blkd_tasks", __func__);
|
||||
i = 0;
|
||||
list_for_each(lhp, &rnp->blkd_tasks) {
|
||||
@ -854,8 +840,7 @@ void rcu_note_context_switch(bool preempt)
|
||||
this_cpu_write(rcu_data.rcu_urgent_qs, false);
|
||||
if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
|
||||
rcu_momentary_dyntick_idle();
|
||||
if (!preempt)
|
||||
rcu_tasks_qs(current);
|
||||
rcu_tasks_qs(current, preempt);
|
||||
out:
|
||||
trace_rcu_utilization(TPS("End context switch"));
|
||||
}
|
||||
@ -1036,7 +1021,8 @@ static int rcu_boost_kthread(void *arg)
|
||||
for (;;) {
|
||||
WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
|
||||
trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
|
||||
rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
|
||||
rcu_wait(READ_ONCE(rnp->boost_tasks) ||
|
||||
READ_ONCE(rnp->exp_tasks));
|
||||
trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
|
||||
WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
|
||||
more2boost = rcu_boost(rnp);
|
||||
@ -1079,9 +1065,9 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
|
||||
(rnp->gp_tasks != NULL &&
|
||||
rnp->boost_tasks == NULL &&
|
||||
rnp->qsmask == 0 &&
|
||||
(ULONG_CMP_GE(jiffies, rnp->boost_time) || rcu_state.cbovld))) {
|
||||
(!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
|
||||
if (rnp->exp_tasks == NULL)
|
||||
rnp->boost_tasks = rnp->gp_tasks;
|
||||
WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
rcu_wake_cond(rnp->boost_kthread_task,
|
||||
READ_ONCE(rnp->boost_kthread_status));
|
||||
@ -2536,7 +2522,7 @@ static bool rcu_nohz_full_cpu(void)
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
if (tick_nohz_full_cpu(smp_processor_id()) &&
|
||||
(!rcu_gp_in_progress() ||
|
||||
ULONG_CMP_LT(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
|
||||
time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
|
||||
return true;
|
||||
#endif /* #ifdef CONFIG_NO_HZ_FULL */
|
||||
return false;
|
||||
@ -2553,7 +2539,7 @@ static void rcu_bind_gp_kthread(void)
|
||||
}
|
||||
|
||||
/* Record the current task on dyntick-idle entry. */
|
||||
static void rcu_dynticks_task_enter(void)
|
||||
static void noinstr rcu_dynticks_task_enter(void)
|
||||
{
|
||||
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
|
||||
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
|
||||
@ -2561,9 +2547,27 @@ static void rcu_dynticks_task_enter(void)
|
||||
}
|
||||
|
||||
/* Record no current task on dyntick-idle exit. */
|
||||
static void rcu_dynticks_task_exit(void)
|
||||
static void noinstr rcu_dynticks_task_exit(void)
|
||||
{
|
||||
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
|
||||
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
|
||||
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
|
||||
}
|
||||
|
||||
/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
|
||||
static void rcu_dynticks_task_trace_enter(void)
|
||||
{
|
||||
#ifdef CONFIG_TASKS_RCU_TRACE
|
||||
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
|
||||
current->trc_reader_special.b.need_mb = true;
|
||||
#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
|
||||
}
|
||||
|
||||
/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
|
||||
static void rcu_dynticks_task_trace_exit(void)
|
||||
{
|
||||
#ifdef CONFIG_TASKS_RCU_TRACE
|
||||
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
|
||||
current->trc_reader_special.b.need_mb = false;
|
||||
#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
|
||||
}
|
||||
|
@ -15,10 +15,12 @@
|
||||
int sysctl_panic_on_rcu_stall __read_mostly;
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
#define RCU_STALL_DELAY_DELTA (5 * HZ)
|
||||
#define RCU_STALL_DELAY_DELTA (5 * HZ)
|
||||
#else
|
||||
#define RCU_STALL_DELAY_DELTA 0
|
||||
#define RCU_STALL_DELAY_DELTA 0
|
||||
#endif
|
||||
#define RCU_STALL_MIGHT_DIV 8
|
||||
#define RCU_STALL_MIGHT_MIN (2 * HZ)
|
||||
|
||||
/* Limit-check stall timeouts specified at boottime and runtime. */
|
||||
int rcu_jiffies_till_stall_check(void)
|
||||
@ -40,6 +42,36 @@ int rcu_jiffies_till_stall_check(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
|
||||
|
||||
/**
|
||||
* rcu_gp_might_be_stalled - Is it likely that the grace period is stalled?
|
||||
*
|
||||
* Returns @true if the current grace period is sufficiently old that
|
||||
* it is reasonable to assume that it might be stalled. This can be
|
||||
* useful when deciding whether to allocate memory to enable RCU-mediated
|
||||
* freeing on the one hand or just invoking synchronize_rcu() on the other.
|
||||
* The latter is preferable when the grace period is stalled.
|
||||
*
|
||||
* Note that sampling of the .gp_start and .gp_seq fields must be done
|
||||
* carefully to avoid false positives at the beginnings and ends of
|
||||
* grace periods.
|
||||
*/
|
||||
bool rcu_gp_might_be_stalled(void)
|
||||
{
|
||||
unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV;
|
||||
unsigned long j = jiffies;
|
||||
|
||||
if (d < RCU_STALL_MIGHT_MIN)
|
||||
d = RCU_STALL_MIGHT_MIN;
|
||||
smp_mb(); // jiffies before .gp_seq to avoid false positives.
|
||||
if (!rcu_gp_in_progress())
|
||||
return false;
|
||||
// Long delays at this point avoids false positive, but a delay
|
||||
// of ULONG_MAX/4 jiffies voids your no-false-positive warranty.
|
||||
smp_mb(); // .gp_seq before second .gp_start
|
||||
// And ditto here.
|
||||
return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
|
||||
}
|
||||
|
||||
/* Don't do RCU CPU stall warnings during long sysrq printouts. */
|
||||
void rcu_sysrq_start(void)
|
||||
{
|
||||
@ -104,8 +136,8 @@ static void record_gp_stall_check_time(void)
|
||||
|
||||
WRITE_ONCE(rcu_state.gp_start, j);
|
||||
j1 = rcu_jiffies_till_stall_check();
|
||||
/* Record ->gp_start before ->jiffies_stall. */
|
||||
smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
|
||||
smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
|
||||
WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
|
||||
rcu_state.jiffies_resched = j + j1 / 2;
|
||||
rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
|
||||
}
|
||||
@ -192,14 +224,40 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
// Communicate task state back to the RCU CPU stall warning request.
|
||||
struct rcu_stall_chk_rdr {
|
||||
int nesting;
|
||||
union rcu_special rs;
|
||||
bool on_blkd_list;
|
||||
};
|
||||
|
||||
/*
|
||||
* Report out the state of a not-running task that is stalling the
|
||||
* current RCU grace period.
|
||||
*/
|
||||
static bool check_slow_task(struct task_struct *t, void *arg)
|
||||
{
|
||||
struct rcu_node *rnp;
|
||||
struct rcu_stall_chk_rdr *rscrp = arg;
|
||||
|
||||
if (task_curr(t))
|
||||
return false; // It is running, so decline to inspect it.
|
||||
rscrp->nesting = t->rcu_read_lock_nesting;
|
||||
rscrp->rs = t->rcu_read_unlock_special;
|
||||
rnp = t->rcu_blocked_node;
|
||||
rscrp->on_blkd_list = !list_empty(&t->rcu_node_entry);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan the current list of tasks blocked within RCU read-side critical
|
||||
* sections, printing out the tid of each.
|
||||
*/
|
||||
static int rcu_print_task_stall(struct rcu_node *rnp)
|
||||
{
|
||||
struct task_struct *t;
|
||||
int ndetected = 0;
|
||||
struct rcu_stall_chk_rdr rscr;
|
||||
struct task_struct *t;
|
||||
|
||||
if (!rcu_preempt_blocked_readers_cgp(rnp))
|
||||
return 0;
|
||||
@ -208,7 +266,15 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
|
||||
t = list_entry(rnp->gp_tasks->prev,
|
||||
struct task_struct, rcu_node_entry);
|
||||
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
|
||||
pr_cont(" P%d", t->pid);
|
||||
if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
|
||||
pr_cont(" P%d", t->pid);
|
||||
else
|
||||
pr_cont(" P%d/%d:%c%c%c%c",
|
||||
t->pid, rscr.nesting,
|
||||
".b"[rscr.rs.b.blocked],
|
||||
".q"[rscr.rs.b.need_qs],
|
||||
".e"[rscr.rs.b.exp_hint],
|
||||
".l"[rscr.on_blkd_list]);
|
||||
ndetected++;
|
||||
}
|
||||
pr_cont("\n");
|
||||
@ -299,6 +365,16 @@ static const char *gp_state_getname(short gs)
|
||||
return gp_state_names[gs];
|
||||
}
|
||||
|
||||
/* Is the RCU grace-period kthread being starved of CPU time? */
|
||||
static bool rcu_is_gp_kthread_starving(unsigned long *jp)
|
||||
{
|
||||
unsigned long j = jiffies - READ_ONCE(rcu_state.gp_activity);
|
||||
|
||||
if (jp)
|
||||
*jp = j;
|
||||
return j > 2 * HZ;
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out diagnostic information for the specified stalled CPU.
|
||||
*
|
||||
@ -313,6 +389,7 @@ static const char *gp_state_getname(short gs)
|
||||
static void print_cpu_stall_info(int cpu)
|
||||
{
|
||||
unsigned long delta;
|
||||
bool falsepositive;
|
||||
char fast_no_hz[72];
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
char *ticks_title;
|
||||
@ -333,7 +410,9 @@ static void print_cpu_stall_info(int cpu)
|
||||
}
|
||||
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
|
||||
delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
|
||||
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
|
||||
falsepositive = rcu_is_gp_kthread_starving(NULL) &&
|
||||
rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
|
||||
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n",
|
||||
cpu,
|
||||
"O."[!!cpu_online(cpu)],
|
||||
"o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
|
||||
@ -345,8 +424,9 @@ static void print_cpu_stall_info(int cpu)
|
||||
rcu_dynticks_snap(rdp) & 0xfff,
|
||||
rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
|
||||
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
|
||||
READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
|
||||
fast_no_hz);
|
||||
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
|
||||
fast_no_hz,
|
||||
falsepositive ? " (false positive?)" : "");
|
||||
}
|
||||
|
||||
/* Complain about starvation of grace-period kthread. */
|
||||
@ -355,15 +435,15 @@ static void rcu_check_gp_kthread_starvation(void)
|
||||
struct task_struct *gpk = rcu_state.gp_kthread;
|
||||
unsigned long j;
|
||||
|
||||
j = jiffies - READ_ONCE(rcu_state.gp_activity);
|
||||
if (j > 2 * HZ) {
|
||||
if (rcu_is_gp_kthread_starving(&j)) {
|
||||
pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
|
||||
rcu_state.name, j,
|
||||
(long)rcu_seq_current(&rcu_state.gp_seq),
|
||||
READ_ONCE(rcu_state.gp_flags),
|
||||
data_race(rcu_state.gp_flags),
|
||||
gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
|
||||
gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
|
||||
if (gpk) {
|
||||
pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
|
||||
pr_err("RCU grace-period kthread stack dump:\n");
|
||||
sched_show_task(gpk);
|
||||
wake_up_process(gpk);
|
||||
@ -371,7 +451,7 @@ static void rcu_check_gp_kthread_starvation(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void print_other_cpu_stall(unsigned long gp_seq)
|
||||
static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
@ -408,7 +488,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
|
||||
for_each_possible_cpu(cpu)
|
||||
totqlen += rcu_get_n_cbs_cpu(cpu);
|
||||
pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
|
||||
smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
|
||||
smp_processor_id(), (long)(jiffies - gps),
|
||||
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
|
||||
if (ndetected) {
|
||||
rcu_dump_cpu_stacks();
|
||||
@ -421,13 +501,11 @@ static void print_other_cpu_stall(unsigned long gp_seq)
|
||||
pr_err("INFO: Stall ended before state dump start\n");
|
||||
} else {
|
||||
j = jiffies;
|
||||
gpa = READ_ONCE(rcu_state.gp_activity);
|
||||
gpa = data_race(rcu_state.gp_activity);
|
||||
pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
|
||||
rcu_state.name, j - gpa, j, gpa,
|
||||
READ_ONCE(jiffies_till_next_fqs),
|
||||
data_race(jiffies_till_next_fqs),
|
||||
rcu_get_root()->qsmask);
|
||||
/* In this case, the current CPU might be at fault. */
|
||||
sched_show_task(current);
|
||||
}
|
||||
}
|
||||
/* Rewrite if needed in case of slow consoles. */
|
||||
@ -442,7 +520,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
|
||||
rcu_force_quiescent_state(); /* Kick them all. */
|
||||
}
|
||||
|
||||
static void print_cpu_stall(void)
|
||||
static void print_cpu_stall(unsigned long gps)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
@ -467,7 +545,7 @@ static void print_cpu_stall(void)
|
||||
for_each_possible_cpu(cpu)
|
||||
totqlen += rcu_get_n_cbs_cpu(cpu);
|
||||
pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
|
||||
jiffies - rcu_state.gp_start,
|
||||
jiffies - gps,
|
||||
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
|
||||
|
||||
rcu_check_gp_kthread_starvation();
|
||||
@ -546,7 +624,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
|
||||
cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
|
||||
|
||||
/* We haven't checked in, so go dump stack. */
|
||||
print_cpu_stall();
|
||||
print_cpu_stall(gps);
|
||||
if (rcu_cpu_stall_ftrace_dump)
|
||||
rcu_ftrace_dump(DUMP_ALL);
|
||||
|
||||
@ -555,7 +633,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
|
||||
cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
|
||||
|
||||
/* They had a few time units to dump stack, so complain. */
|
||||
print_other_cpu_stall(gs2);
|
||||
print_other_cpu_stall(gs2, gps);
|
||||
if (rcu_cpu_stall_ftrace_dump)
|
||||
rcu_ftrace_dump(DUMP_ALL);
|
||||
}
|
||||
@ -581,23 +659,23 @@ void show_rcu_gp_kthreads(void)
|
||||
struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
|
||||
|
||||
j = jiffies;
|
||||
ja = j - READ_ONCE(rcu_state.gp_activity);
|
||||
jr = j - READ_ONCE(rcu_state.gp_req_activity);
|
||||
jw = j - READ_ONCE(rcu_state.gp_wake_time);
|
||||
ja = j - data_race(rcu_state.gp_activity);
|
||||
jr = j - data_race(rcu_state.gp_req_activity);
|
||||
jw = j - data_race(rcu_state.gp_wake_time);
|
||||
pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
|
||||
rcu_state.name, gp_state_getname(rcu_state.gp_state),
|
||||
rcu_state.gp_state, t ? t->state : 0x1ffffL,
|
||||
ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
|
||||
(long)READ_ONCE(rcu_state.gp_seq),
|
||||
(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
|
||||
READ_ONCE(rcu_state.gp_flags));
|
||||
ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
|
||||
(long)data_race(rcu_state.gp_seq),
|
||||
(long)data_race(rcu_get_root()->gp_seq_needed),
|
||||
data_race(rcu_state.gp_flags));
|
||||
rcu_for_each_node_breadth_first(rnp) {
|
||||
if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq),
|
||||
READ_ONCE(rnp->gp_seq_needed)))
|
||||
continue;
|
||||
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
|
||||
rnp->grplo, rnp->grphi, (long)READ_ONCE(rnp->gp_seq),
|
||||
(long)READ_ONCE(rnp->gp_seq_needed));
|
||||
rnp->grplo, rnp->grphi, (long)data_race(rnp->gp_seq),
|
||||
(long)data_race(rnp->gp_seq_needed));
|
||||
if (!rcu_is_leaf_node(rnp))
|
||||
continue;
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu) {
|
||||
@ -607,7 +685,7 @@ void show_rcu_gp_kthreads(void)
|
||||
READ_ONCE(rdp->gp_seq_needed)))
|
||||
continue;
|
||||
pr_info("\tcpu %d ->gp_seq_needed %ld\n",
|
||||
cpu, (long)READ_ONCE(rdp->gp_seq_needed));
|
||||
cpu, (long)data_race(rdp->gp_seq_needed));
|
||||
}
|
||||
}
|
||||
for_each_possible_cpu(cpu) {
|
||||
@ -615,7 +693,7 @@ void show_rcu_gp_kthreads(void)
|
||||
if (rcu_segcblist_is_offloaded(&rdp->cblist))
|
||||
show_rcu_nocb_state(rdp);
|
||||
}
|
||||
/* sched_show_task(rcu_state.gp_kthread); */
|
||||
show_rcu_tasks_gp_kthreads();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/irq_work.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
||||
@ -51,6 +52,19 @@
|
||||
#endif
|
||||
#define MODULE_PARAM_PREFIX "rcupdate."
|
||||
|
||||
#ifndef data_race
|
||||
#define data_race(expr) \
|
||||
({ \
|
||||
expr; \
|
||||
})
|
||||
#endif
|
||||
#ifndef ASSERT_EXCLUSIVE_WRITER
|
||||
#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
|
||||
#endif
|
||||
#ifndef ASSERT_EXCLUSIVE_ACCESS
|
||||
#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_TINY_RCU
|
||||
module_param(rcu_expedited, int, 0);
|
||||
module_param(rcu_normal, int, 0);
|
||||
@ -63,12 +77,12 @@ module_param(rcu_normal_after_boot, int, 0);
|
||||
* rcu_read_lock_held_common() - might we be in RCU-sched read-side critical section?
|
||||
* @ret: Best guess answer if lockdep cannot be relied on
|
||||
*
|
||||
* Returns true if lockdep must be ignored, in which case *ret contains
|
||||
* Returns true if lockdep must be ignored, in which case ``*ret`` contains
|
||||
* the best guess described below. Otherwise returns false, in which
|
||||
* case *ret tells the caller nothing and the caller should instead
|
||||
* case ``*ret`` tells the caller nothing and the caller should instead
|
||||
* consult lockdep.
|
||||
*
|
||||
* If CONFIG_DEBUG_LOCK_ALLOC is selected, set *ret to nonzero iff in an
|
||||
* If CONFIG_DEBUG_LOCK_ALLOC is selected, set ``*ret`` to nonzero iff in an
|
||||
* RCU-sched read-side critical section. In absence of
|
||||
* CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
|
||||
* critical section unless it can prove otherwise. Note that disabling
|
||||
@ -82,7 +96,7 @@ module_param(rcu_normal_after_boot, int, 0);
|
||||
*
|
||||
* Note that if the CPU is in the idle loop from an RCU point of view (ie:
|
||||
* that we are in the section between rcu_idle_enter() and rcu_idle_exit())
|
||||
* then rcu_read_lock_held() sets *ret to false even if the CPU did an
|
||||
* then rcu_read_lock_held() sets ``*ret`` to false even if the CPU did an
|
||||
* rcu_read_lock(). The reason for this is that RCU ignores CPUs that are
|
||||
* in such a section, considering these as in extended quiescent state,
|
||||
* so such a CPU is effectively never in an RCU read-side critical section
|
||||
@ -98,15 +112,15 @@ module_param(rcu_normal_after_boot, int, 0);
|
||||
static bool rcu_read_lock_held_common(bool *ret)
|
||||
{
|
||||
if (!debug_lockdep_rcu_enabled()) {
|
||||
*ret = 1;
|
||||
*ret = true;
|
||||
return true;
|
||||
}
|
||||
if (!rcu_is_watching()) {
|
||||
*ret = 0;
|
||||
*ret = false;
|
||||
return true;
|
||||
}
|
||||
if (!rcu_lockdep_current_cpu_online()) {
|
||||
*ret = 0;
|
||||
*ret = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -270,13 +284,12 @@ struct lockdep_map rcu_callback_map =
|
||||
STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
|
||||
EXPORT_SYMBOL_GPL(rcu_callback_map);
|
||||
|
||||
int notrace debug_lockdep_rcu_enabled(void)
|
||||
noinstr int notrace debug_lockdep_rcu_enabled(void)
|
||||
{
|
||||
return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
|
||||
current->lockdep_recursion == 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
|
||||
NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled);
|
||||
|
||||
/**
|
||||
* rcu_read_lock_held() - might we be in RCU read-side critical section?
|
||||
@ -501,370 +514,6 @@ int rcu_cpu_stall_suppress_at_boot __read_mostly; // !0 = suppress boot stalls.
|
||||
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_at_boot);
|
||||
module_param(rcu_cpu_stall_suppress_at_boot, int, 0444);
|
||||
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
|
||||
/*
|
||||
* Simple variant of RCU whose quiescent states are voluntary context
|
||||
* switch, cond_resched_rcu_qs(), user-space execution, and idle.
|
||||
* As such, grace periods can take one good long time. There are no
|
||||
* read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
|
||||
* because this implementation is intended to get the system into a safe
|
||||
* state for some of the manipulations involved in tracing and the like.
|
||||
* Finally, this implementation does not support high call_rcu_tasks()
|
||||
* rates from multiple CPUs. If this is required, per-CPU callback lists
|
||||
* will be needed.
|
||||
*/
|
||||
|
||||
/* Global list of callbacks and associated lock. */
|
||||
static struct rcu_head *rcu_tasks_cbs_head;
|
||||
static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
|
||||
static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
|
||||
static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
|
||||
|
||||
/* Track exiting tasks in order to allow them to be waited for. */
|
||||
DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
|
||||
|
||||
/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
|
||||
#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
|
||||
static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
|
||||
module_param(rcu_task_stall_timeout, int, 0644);
|
||||
|
||||
static struct task_struct *rcu_tasks_kthread_ptr;
|
||||
|
||||
/**
|
||||
* call_rcu_tasks() - Queue an RCU for invocation task-based grace period
|
||||
* @rhp: structure to be used for queueing the RCU updates.
|
||||
* @func: actual callback function to be invoked after the grace period
|
||||
*
|
||||
* The callback function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. call_rcu_tasks() assumes
|
||||
* that the read-side critical sections end at a voluntary context
|
||||
* switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
|
||||
* or transition to usermode execution. As such, there are no read-side
|
||||
* primitives analogous to rcu_read_lock() and rcu_read_unlock() because
|
||||
* this primitive is intended to determine that all tasks have passed
|
||||
* through a safe state, not so much for data-strcuture synchronization.
|
||||
*
|
||||
* See the description of call_rcu() for more detailed information on
|
||||
* memory ordering guarantees.
|
||||
*/
|
||||
void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool needwake;
|
||||
|
||||
rhp->next = NULL;
|
||||
rhp->func = func;
|
||||
raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
|
||||
needwake = !rcu_tasks_cbs_head;
|
||||
WRITE_ONCE(*rcu_tasks_cbs_tail, rhp);
|
||||
rcu_tasks_cbs_tail = &rhp->next;
|
||||
raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
|
||||
/* We can't create the thread unless interrupts are enabled. */
|
||||
if (needwake && READ_ONCE(rcu_tasks_kthread_ptr))
|
||||
wake_up(&rcu_tasks_cbs_wq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu_tasks);
|
||||
|
||||
/**
|
||||
* synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
|
||||
*
|
||||
* Control will return to the caller some time after a full rcu-tasks
|
||||
* grace period has elapsed, in other words after all currently
|
||||
* executing rcu-tasks read-side critical sections have elapsed. These
|
||||
* read-side critical sections are delimited by calls to schedule(),
|
||||
* cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
|
||||
* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
|
||||
*
|
||||
* This is a very specialized primitive, intended only for a few uses in
|
||||
* tracing and other situations requiring manipulation of function
|
||||
* preambles and profiling hooks. The synchronize_rcu_tasks() function
|
||||
* is not (yet) intended for heavy use from multiple CPUs.
|
||||
*
|
||||
* Note that this guarantee implies further memory-ordering guarantees.
|
||||
* On systems with more than one CPU, when synchronize_rcu_tasks() returns,
|
||||
* each CPU is guaranteed to have executed a full memory barrier since the
|
||||
* end of its last RCU-tasks read-side critical section whose beginning
|
||||
* preceded the call to synchronize_rcu_tasks(). In addition, each CPU
|
||||
* having an RCU-tasks read-side critical section that extends beyond
|
||||
* the return from synchronize_rcu_tasks() is guaranteed to have executed
|
||||
* a full memory barrier after the beginning of synchronize_rcu_tasks()
|
||||
* and before the beginning of that RCU-tasks read-side critical section.
|
||||
* Note that these guarantees include CPUs that are offline, idle, or
|
||||
* executing in user mode, as well as CPUs that are executing in the kernel.
|
||||
*
|
||||
* Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
|
||||
* to its caller on CPU B, then both CPU A and CPU B are guaranteed
|
||||
* to have executed a full memory barrier during the execution of
|
||||
* synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
|
||||
* (but again only if the system has more than one CPU).
|
||||
*/
|
||||
void synchronize_rcu_tasks(void)
|
||||
{
|
||||
/* Complain if the scheduler has not started. */
|
||||
RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
|
||||
"synchronize_rcu_tasks called too soon");
|
||||
|
||||
/* Wait for the grace period. */
|
||||
wait_rcu_gp(call_rcu_tasks);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
|
||||
|
||||
/**
|
||||
* rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
|
||||
*
|
||||
* Although the current implementation is guaranteed to wait, it is not
|
||||
* obligated to, for example, if there are no pending callbacks.
|
||||
*/
|
||||
void rcu_barrier_tasks(void)
|
||||
{
|
||||
/* There is only one callback queue, so this is easy. ;-) */
|
||||
synchronize_rcu_tasks();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
|
||||
|
||||
/* See if tasks are still holding out, complain if so. */
|
||||
static void check_holdout_task(struct task_struct *t,
|
||||
bool needreport, bool *firstreport)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!READ_ONCE(t->rcu_tasks_holdout) ||
|
||||
t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
|
||||
!READ_ONCE(t->on_rq) ||
|
||||
(IS_ENABLED(CONFIG_NO_HZ_FULL) &&
|
||||
!is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
|
||||
WRITE_ONCE(t->rcu_tasks_holdout, false);
|
||||
list_del_init(&t->rcu_tasks_holdout_list);
|
||||
put_task_struct(t);
|
||||
return;
|
||||
}
|
||||
rcu_request_urgent_qs_task(t);
|
||||
if (!needreport)
|
||||
return;
|
||||
if (*firstreport) {
|
||||
pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
|
||||
*firstreport = false;
|
||||
}
|
||||
cpu = task_cpu(t);
|
||||
pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
|
||||
t, ".I"[is_idle_task(t)],
|
||||
"N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
|
||||
t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
|
||||
t->rcu_tasks_idle_cpu, cpu);
|
||||
sched_show_task(t);
|
||||
}
|
||||
|
||||
/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
|
||||
static int __noreturn rcu_tasks_kthread(void *arg)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_struct *g, *t;
|
||||
unsigned long lastreport;
|
||||
struct rcu_head *list;
|
||||
struct rcu_head *next;
|
||||
LIST_HEAD(rcu_tasks_holdouts);
|
||||
int fract;
|
||||
|
||||
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
|
||||
housekeeping_affine(current, HK_FLAG_RCU);
|
||||
|
||||
/*
|
||||
* Each pass through the following loop makes one check for
|
||||
* newly arrived callbacks, and, if there are some, waits for
|
||||
* one RCU-tasks grace period and then invokes the callbacks.
|
||||
* This loop is terminated by the system going down. ;-)
|
||||
*/
|
||||
for (;;) {
|
||||
|
||||
/* Pick up any new callbacks. */
|
||||
raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
|
||||
list = rcu_tasks_cbs_head;
|
||||
rcu_tasks_cbs_head = NULL;
|
||||
rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
|
||||
raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
|
||||
|
||||
/* If there were none, wait a bit and start over. */
|
||||
if (!list) {
|
||||
wait_event_interruptible(rcu_tasks_cbs_wq,
|
||||
READ_ONCE(rcu_tasks_cbs_head));
|
||||
if (!rcu_tasks_cbs_head) {
|
||||
WARN_ON(signal_pending(current));
|
||||
schedule_timeout_interruptible(HZ/10);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for all pre-existing t->on_rq and t->nvcsw
|
||||
* transitions to complete. Invoking synchronize_rcu()
|
||||
* suffices because all these transitions occur with
|
||||
* interrupts disabled. Without this synchronize_rcu(),
|
||||
* a read-side critical section that started before the
|
||||
* grace period might be incorrectly seen as having started
|
||||
* after the grace period.
|
||||
*
|
||||
* This synchronize_rcu() also dispenses with the
|
||||
* need for a memory barrier on the first store to
|
||||
* ->rcu_tasks_holdout, as it forces the store to happen
|
||||
* after the beginning of the grace period.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
/*
|
||||
* There were callbacks, so we need to wait for an
|
||||
* RCU-tasks grace period. Start off by scanning
|
||||
* the task list for tasks that are not already
|
||||
* voluntarily blocked. Mark these tasks and make
|
||||
* a list of them in rcu_tasks_holdouts.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for_each_process_thread(g, t) {
|
||||
if (t != current && READ_ONCE(t->on_rq) &&
|
||||
!is_idle_task(t)) {
|
||||
get_task_struct(t);
|
||||
t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
|
||||
WRITE_ONCE(t->rcu_tasks_holdout, true);
|
||||
list_add(&t->rcu_tasks_holdout_list,
|
||||
&rcu_tasks_holdouts);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* Wait for tasks that are in the process of exiting.
|
||||
* This does only part of the job, ensuring that all
|
||||
* tasks that were previously exiting reach the point
|
||||
* where they have disabled preemption, allowing the
|
||||
* later synchronize_rcu() to finish the job.
|
||||
*/
|
||||
synchronize_srcu(&tasks_rcu_exit_srcu);
|
||||
|
||||
/*
|
||||
* Each pass through the following loop scans the list
|
||||
* of holdout tasks, removing any that are no longer
|
||||
* holdouts. When the list is empty, we are done.
|
||||
*/
|
||||
lastreport = jiffies;
|
||||
|
||||
/* Start off with HZ/10 wait and slowly back off to 1 HZ wait*/
|
||||
fract = 10;
|
||||
|
||||
for (;;) {
|
||||
bool firstreport;
|
||||
bool needreport;
|
||||
int rtst;
|
||||
struct task_struct *t1;
|
||||
|
||||
if (list_empty(&rcu_tasks_holdouts))
|
||||
break;
|
||||
|
||||
/* Slowly back off waiting for holdouts */
|
||||
schedule_timeout_interruptible(HZ/fract);
|
||||
|
||||
if (fract > 1)
|
||||
fract--;
|
||||
|
||||
rtst = READ_ONCE(rcu_task_stall_timeout);
|
||||
needreport = rtst > 0 &&
|
||||
time_after(jiffies, lastreport + rtst);
|
||||
if (needreport)
|
||||
lastreport = jiffies;
|
||||
firstreport = true;
|
||||
WARN_ON(signal_pending(current));
|
||||
list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
|
||||
rcu_tasks_holdout_list) {
|
||||
check_holdout_task(t, needreport, &firstreport);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Because ->on_rq and ->nvcsw are not guaranteed
|
||||
* to have a full memory barriers prior to them in the
|
||||
* schedule() path, memory reordering on other CPUs could
|
||||
* cause their RCU-tasks read-side critical sections to
|
||||
* extend past the end of the grace period. However,
|
||||
* because these ->nvcsw updates are carried out with
|
||||
* interrupts disabled, we can use synchronize_rcu()
|
||||
* to force the needed ordering on all such CPUs.
|
||||
*
|
||||
* This synchronize_rcu() also confines all
|
||||
* ->rcu_tasks_holdout accesses to be within the grace
|
||||
* period, avoiding the need for memory barriers for
|
||||
* ->rcu_tasks_holdout accesses.
|
||||
*
|
||||
* In addition, this synchronize_rcu() waits for exiting
|
||||
* tasks to complete their final preempt_disable() region
|
||||
* of execution, cleaning up after the synchronize_srcu()
|
||||
* above.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
/* Invoke the callbacks. */
|
||||
while (list) {
|
||||
next = list->next;
|
||||
local_bh_disable();
|
||||
list->func(list);
|
||||
local_bh_enable();
|
||||
list = next;
|
||||
cond_resched();
|
||||
}
|
||||
/* Paranoid sleep to keep this from entering a tight loop */
|
||||
schedule_timeout_uninterruptible(HZ/10);
|
||||
}
|
||||
}
|
||||
|
||||
/* Spawn rcu_tasks_kthread() at core_initcall() time. */
|
||||
static int __init rcu_spawn_tasks_kthread(void)
|
||||
{
|
||||
struct task_struct *t;
|
||||
|
||||
t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
|
||||
if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__))
|
||||
return 0;
|
||||
smp_mb(); /* Ensure others see full kthread. */
|
||||
WRITE_ONCE(rcu_tasks_kthread_ptr, t);
|
||||
return 0;
|
||||
}
|
||||
core_initcall(rcu_spawn_tasks_kthread);
|
||||
|
||||
/* Do the srcu_read_lock() for the above synchronize_srcu(). */
|
||||
void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
|
||||
{
|
||||
preempt_disable();
|
||||
current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
|
||||
void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
|
||||
{
|
||||
preempt_disable();
|
||||
__srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#endif /* #ifdef CONFIG_TASKS_RCU */
|
||||
|
||||
#ifndef CONFIG_TINY_RCU
|
||||
|
||||
/*
|
||||
* Print any non-default Tasks RCU settings.
|
||||
*/
|
||||
static void __init rcu_tasks_bootup_oddness(void)
|
||||
{
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
|
||||
pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
|
||||
else
|
||||
pr_info("\tTasks RCU enabled.\n");
|
||||
#endif /* #ifdef CONFIG_TASKS_RCU */
|
||||
}
|
||||
|
||||
#endif /* #ifndef CONFIG_TINY_RCU */
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
|
||||
/*
|
||||
@ -935,6 +584,8 @@ late_initcall(rcu_verify_early_boot_tests);
|
||||
void rcu_early_boot_tests(void) {}
|
||||
#endif /* CONFIG_PROVE_RCU */
|
||||
|
||||
#include "tasks.h"
|
||||
|
||||
#ifndef CONFIG_TINY_RCU
|
||||
|
||||
/*
|
||||
|
@ -2562,6 +2562,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
*
|
||||
* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
|
||||
* __schedule(). See the comment for smp_mb__after_spinlock().
|
||||
*
|
||||
* A similar smb_rmb() lives in try_invoke_on_locked_down_task().
|
||||
*/
|
||||
smp_rmb();
|
||||
if (p->on_rq && ttwu_remote(p, wake_flags))
|
||||
@ -2635,6 +2637,52 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* try_invoke_on_locked_down_task - Invoke a function on task in fixed state
|
||||
* @p: Process for which the function is to be invoked.
|
||||
* @func: Function to invoke.
|
||||
* @arg: Argument to function.
|
||||
*
|
||||
* If the specified task can be quickly locked into a definite state
|
||||
* (either sleeping or on a given runqueue), arrange to keep it in that
|
||||
* state while invoking @func(@arg). This function can use ->on_rq and
|
||||
* task_curr() to work out what the state is, if required. Given that
|
||||
* @func can be invoked with a runqueue lock held, it had better be quite
|
||||
* lightweight.
|
||||
*
|
||||
* Returns:
|
||||
* @false if the task slipped out from under the locks.
|
||||
* @true if the task was locked onto a runqueue or is sleeping.
|
||||
* However, @func can override this by returning @false.
|
||||
*/
|
||||
bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
|
||||
{
|
||||
bool ret = false;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
lockdep_assert_irqs_enabled();
|
||||
raw_spin_lock_irq(&p->pi_lock);
|
||||
if (p->on_rq) {
|
||||
rq = __task_rq_lock(p, &rf);
|
||||
if (task_rq(p) == rq)
|
||||
ret = func(p, arg);
|
||||
rq_unlock(rq, &rf);
|
||||
} else {
|
||||
switch (p->state) {
|
||||
case TASK_RUNNING:
|
||||
case TASK_WAKING:
|
||||
break;
|
||||
default:
|
||||
smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
|
||||
if (!p->on_rq)
|
||||
ret = func(p, arg);
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irq(&p->pi_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* wake_up_process - Wake up a specific process
|
||||
* @p: The process to be woken up.
|
||||
|
@ -10,11 +10,6 @@ config USER_STACKTRACE_SUPPORT
|
||||
config NOP_TRACER
|
||||
bool
|
||||
|
||||
config HAVE_FTRACE_NMI_ENTER
|
||||
bool
|
||||
help
|
||||
See Documentation/trace/ftrace-design.rst
|
||||
|
||||
config HAVE_FUNCTION_TRACER
|
||||
bool
|
||||
help
|
||||
@ -72,11 +67,6 @@ config RING_BUFFER
|
||||
select TRACE_CLOCK
|
||||
select IRQ_WORK
|
||||
|
||||
config FTRACE_NMI_ENTER
|
||||
bool
|
||||
depends on HAVE_FTRACE_NMI_ENTER
|
||||
default y
|
||||
|
||||
config EVENT_TRACING
|
||||
select CONTEXT_SWITCH_TRACER
|
||||
select GLOB
|
||||
@ -158,6 +148,7 @@ config FUNCTION_TRACER
|
||||
select CONTEXT_SWITCH_TRACER
|
||||
select GLOB
|
||||
select TASKS_RCU if PREEMPTION
|
||||
select TASKS_RUDE_RCU
|
||||
help
|
||||
Enable the kernel to trace every kernel function. This is done
|
||||
by using a compiler feature to insert a small, 5-byte No-Operation
|
||||
|
@ -160,17 +160,6 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
|
||||
op->saved_func(ip, parent_ip, op, regs);
|
||||
}
|
||||
|
||||
static void ftrace_sync(struct work_struct *work)
|
||||
{
|
||||
/*
|
||||
* This function is just a stub to implement a hard force
|
||||
* of synchronize_rcu(). This requires synchronizing
|
||||
* tasks even in userspace and idle.
|
||||
*
|
||||
* Yes, function tracing is rude.
|
||||
*/
|
||||
}
|
||||
|
||||
static void ftrace_sync_ipi(void *data)
|
||||
{
|
||||
/* Probably not needed, but do it anyway */
|
||||
@ -256,7 +245,7 @@ static void update_ftrace_function(void)
|
||||
* Make sure all CPUs see this. Yes this is slow, but static
|
||||
* tracing is slow and nasty to have enabled.
|
||||
*/
|
||||
schedule_on_each_cpu(ftrace_sync);
|
||||
synchronize_rcu_tasks_rude();
|
||||
/* Now all cpus are using the list ops. */
|
||||
function_trace_op = set_function_trace_op;
|
||||
/* Make sure the function_trace_op is visible on all CPUs */
|
||||
@ -2932,7 +2921,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
|
||||
* infrastructure to do the synchronization, thus we must do it
|
||||
* ourselves.
|
||||
*/
|
||||
schedule_on_each_cpu(ftrace_sync);
|
||||
synchronize_rcu_tasks_rude();
|
||||
|
||||
/*
|
||||
* When the kernel is preeptive, tasks can be preempted
|
||||
@ -5888,7 +5877,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
|
||||
* infrastructure to do the synchronization, thus we must do it
|
||||
* ourselves.
|
||||
*/
|
||||
schedule_on_each_cpu(ftrace_sync);
|
||||
synchronize_rcu_tasks_rude();
|
||||
|
||||
free_ftrace_hash(old_hash);
|
||||
}
|
||||
|
@ -369,6 +369,11 @@ config STACK_VALIDATION
|
||||
For more information, see
|
||||
tools/objtool/Documentation/stack-validation.txt.
|
||||
|
||||
config VMLINUX_VALIDATION
|
||||
bool
|
||||
depends on STACK_VALIDATION && DEBUG_ENTRY && !PARAVIRT
|
||||
default y
|
||||
|
||||
config DEBUG_FORCE_WEAK_PER_CPU
|
||||
bool "Force weak per-cpu definitions"
|
||||
depends on DEBUG_KERNEL
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/local_lock.h>
|
||||
#include <linux/preempt.h> /* in_interrupt() */
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/rcupdate.h>
|
||||
@ -27,7 +28,6 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
|
||||
/*
|
||||
* Radix tree node cache.
|
||||
*/
|
||||
@ -58,12 +58,10 @@ struct kmem_cache *radix_tree_node_cachep;
|
||||
/*
|
||||
* Per-cpu pool of preloaded nodes
|
||||
*/
|
||||
struct radix_tree_preload {
|
||||
unsigned nr;
|
||||
/* nodes->parent points to next preallocated node */
|
||||
struct radix_tree_node *nodes;
|
||||
DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
|
||||
.lock = INIT_LOCAL_LOCK(lock),
|
||||
};
|
||||
static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);
|
||||
|
||||
static inline struct radix_tree_node *entry_to_node(void *ptr)
|
||||
{
|
||||
@ -332,14 +330,14 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
|
||||
*/
|
||||
gfp_mask &= ~__GFP_ACCOUNT;
|
||||
|
||||
preempt_disable();
|
||||
local_lock(&radix_tree_preloads.lock);
|
||||
rtp = this_cpu_ptr(&radix_tree_preloads);
|
||||
while (rtp->nr < nr) {
|
||||
preempt_enable();
|
||||
local_unlock(&radix_tree_preloads.lock);
|
||||
node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
|
||||
if (node == NULL)
|
||||
goto out;
|
||||
preempt_disable();
|
||||
local_lock(&radix_tree_preloads.lock);
|
||||
rtp = this_cpu_ptr(&radix_tree_preloads);
|
||||
if (rtp->nr < nr) {
|
||||
node->parent = rtp->nodes;
|
||||
@ -381,7 +379,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
|
||||
if (gfpflags_allow_blocking(gfp_mask))
|
||||
return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
|
||||
/* Preloading doesn't help anything with this gfp mask, skip it */
|
||||
preempt_disable();
|
||||
local_lock(&radix_tree_preloads.lock);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(radix_tree_maybe_preload);
|
||||
@ -1470,7 +1468,7 @@ EXPORT_SYMBOL(radix_tree_tagged);
|
||||
void idr_preload(gfp_t gfp_mask)
|
||||
{
|
||||
if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
|
||||
preempt_disable();
|
||||
local_lock(&radix_tree_preloads.lock);
|
||||
}
|
||||
EXPORT_SYMBOL(idr_preload);
|
||||
|
||||
|
@ -2243,15 +2243,11 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
|
||||
* would succeed.
|
||||
*/
|
||||
if (cc->order > 0 && last_migrated_pfn) {
|
||||
int cpu;
|
||||
unsigned long current_block_start =
|
||||
block_start_pfn(cc->migrate_pfn, cc->order);
|
||||
|
||||
if (last_migrated_pfn < current_block_start) {
|
||||
cpu = get_cpu();
|
||||
lru_add_drain_cpu(cpu);
|
||||
drain_local_pages(cc->zone);
|
||||
put_cpu();
|
||||
lru_add_drain_cpu_zone(cc->zone);
|
||||
/* No more flushing until we migrate again */
|
||||
last_migrated_pfn = 0;
|
||||
}
|
||||
|
118
mm/swap.c
118
mm/swap.c
@ -35,6 +35,7 @@
|
||||
#include <linux/uio.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/page_idle.h>
|
||||
#include <linux/local_lock.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -44,14 +45,32 @@
|
||||
/* How many pages do we try to swap or page in/out together? */
|
||||
int page_cluster;
|
||||
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
|
||||
static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
|
||||
/* Protecting only lru_rotate.pvec which requires disabling interrupts */
|
||||
struct lru_rotate {
|
||||
local_lock_t lock;
|
||||
struct pagevec pvec;
|
||||
};
|
||||
static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = {
|
||||
.lock = INIT_LOCAL_LOCK(lock),
|
||||
};
|
||||
|
||||
/*
|
||||
* The following struct pagevec are grouped together because they are protected
|
||||
* by disabling preemption (and interrupts remain enabled).
|
||||
*/
|
||||
struct lru_pvecs {
|
||||
local_lock_t lock;
|
||||
struct pagevec lru_add;
|
||||
struct pagevec lru_deactivate_file;
|
||||
struct pagevec lru_deactivate;
|
||||
struct pagevec lru_lazyfree;
|
||||
#ifdef CONFIG_SMP
|
||||
static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
|
||||
struct pagevec activate_page;
|
||||
#endif
|
||||
};
|
||||
static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
|
||||
.lock = INIT_LOCAL_LOCK(lock),
|
||||
};
|
||||
|
||||
/*
|
||||
* This path almost never happens for VM activity - pages are normally
|
||||
@ -254,11 +273,11 @@ void rotate_reclaimable_page(struct page *page)
|
||||
unsigned long flags;
|
||||
|
||||
get_page(page);
|
||||
local_irq_save(flags);
|
||||
pvec = this_cpu_ptr(&lru_rotate_pvecs);
|
||||
local_lock_irqsave(&lru_rotate.lock, flags);
|
||||
pvec = this_cpu_ptr(&lru_rotate.pvec);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_move_tail(pvec);
|
||||
local_irq_restore(flags);
|
||||
local_unlock_irqrestore(&lru_rotate.lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@ -293,7 +312,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
|
||||
#ifdef CONFIG_SMP
|
||||
static void activate_page_drain(int cpu)
|
||||
{
|
||||
struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
|
||||
struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu);
|
||||
|
||||
if (pagevec_count(pvec))
|
||||
pagevec_lru_move_fn(pvec, __activate_page, NULL);
|
||||
@ -301,19 +320,21 @@ static void activate_page_drain(int cpu)
|
||||
|
||||
static bool need_activate_page_drain(int cpu)
|
||||
{
|
||||
return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
|
||||
return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
|
||||
}
|
||||
|
||||
void activate_page(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
|
||||
struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
|
||||
struct pagevec *pvec;
|
||||
|
||||
local_lock(&lru_pvecs.lock);
|
||||
pvec = this_cpu_ptr(&lru_pvecs.activate_page);
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, __activate_page, NULL);
|
||||
put_cpu_var(activate_page_pvecs);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -335,9 +356,12 @@ void activate_page(struct page *page)
|
||||
|
||||
static void __lru_cache_activate_page(struct page *page)
|
||||
{
|
||||
struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
|
||||
struct pagevec *pvec;
|
||||
int i;
|
||||
|
||||
local_lock(&lru_pvecs.lock);
|
||||
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
|
||||
|
||||
/*
|
||||
* Search backwards on the optimistic assumption that the page being
|
||||
* activated has just been added to this pagevec. Note that only
|
||||
@ -357,7 +381,7 @@ static void __lru_cache_activate_page(struct page *page)
|
||||
}
|
||||
}
|
||||
|
||||
put_cpu_var(lru_add_pvec);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -385,7 +409,7 @@ void mark_page_accessed(struct page *page)
|
||||
} else if (!PageActive(page)) {
|
||||
/*
|
||||
* If the page is on the LRU, queue it for activation via
|
||||
* activate_page_pvecs. Otherwise, assume the page is on a
|
||||
* lru_pvecs.activate_page. Otherwise, assume the page is on a
|
||||
* pagevec, mark it active and it'll be moved to the active
|
||||
* LRU on the next drain.
|
||||
*/
|
||||
@ -404,12 +428,14 @@ EXPORT_SYMBOL(mark_page_accessed);
|
||||
|
||||
static void __lru_cache_add(struct page *page)
|
||||
{
|
||||
struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
|
||||
struct pagevec *pvec;
|
||||
|
||||
local_lock(&lru_pvecs.lock);
|
||||
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
__pagevec_lru_add(pvec);
|
||||
put_cpu_var(lru_add_pvec);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -593,30 +619,30 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
|
||||
*/
|
||||
void lru_add_drain_cpu(int cpu)
|
||||
{
|
||||
struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
|
||||
struct pagevec *pvec = &per_cpu(lru_pvecs.lru_add, cpu);
|
||||
|
||||
if (pagevec_count(pvec))
|
||||
__pagevec_lru_add(pvec);
|
||||
|
||||
pvec = &per_cpu(lru_rotate_pvecs, cpu);
|
||||
pvec = &per_cpu(lru_rotate.pvec, cpu);
|
||||
if (pagevec_count(pvec)) {
|
||||
unsigned long flags;
|
||||
|
||||
/* No harm done if a racing interrupt already did this */
|
||||
local_irq_save(flags);
|
||||
local_lock_irqsave(&lru_rotate.lock, flags);
|
||||
pagevec_move_tail(pvec);
|
||||
local_irq_restore(flags);
|
||||
local_unlock_irqrestore(&lru_rotate.lock, flags);
|
||||
}
|
||||
|
||||
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
|
||||
pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
|
||||
if (pagevec_count(pvec))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
|
||||
|
||||
pvec = &per_cpu(lru_deactivate_pvecs, cpu);
|
||||
pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu);
|
||||
if (pagevec_count(pvec))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
|
||||
|
||||
pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
|
||||
pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu);
|
||||
if (pagevec_count(pvec))
|
||||
pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
|
||||
|
||||
@ -641,11 +667,14 @@ void deactivate_file_page(struct page *page)
|
||||
return;
|
||||
|
||||
if (likely(get_page_unless_zero(page))) {
|
||||
struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
|
||||
struct pagevec *pvec;
|
||||
|
||||
local_lock(&lru_pvecs.lock);
|
||||
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
|
||||
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
|
||||
put_cpu_var(lru_deactivate_file_pvecs);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -660,12 +689,14 @@ void deactivate_file_page(struct page *page)
|
||||
void deactivate_page(struct page *page)
|
||||
{
|
||||
if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
|
||||
struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
|
||||
struct pagevec *pvec;
|
||||
|
||||
local_lock(&lru_pvecs.lock);
|
||||
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
|
||||
put_cpu_var(lru_deactivate_pvecs);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -680,19 +711,30 @@ void mark_page_lazyfree(struct page *page)
|
||||
{
|
||||
if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
|
||||
!PageSwapCache(page) && !PageUnevictable(page)) {
|
||||
struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
|
||||
struct pagevec *pvec;
|
||||
|
||||
local_lock(&lru_pvecs.lock);
|
||||
pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
|
||||
put_cpu_var(lru_lazyfree_pvecs);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
}
|
||||
|
||||
void lru_add_drain(void)
|
||||
{
|
||||
lru_add_drain_cpu(get_cpu());
|
||||
put_cpu();
|
||||
local_lock(&lru_pvecs.lock);
|
||||
lru_add_drain_cpu(smp_processor_id());
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
|
||||
void lru_add_drain_cpu_zone(struct zone *zone)
|
||||
{
|
||||
local_lock(&lru_pvecs.lock);
|
||||
lru_add_drain_cpu(smp_processor_id());
|
||||
drain_local_pages(zone);
|
||||
local_unlock(&lru_pvecs.lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@ -743,11 +785,11 @@ void lru_add_drain_all(void)
|
||||
for_each_online_cpu(cpu) {
|
||||
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
|
||||
|
||||
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
|
||||
if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
|
||||
need_activate_page_drain(cpu)) {
|
||||
INIT_WORK(work, lru_add_drain_per_cpu);
|
||||
queue_work_on(cpu, mm_percpu_wq, work);
|
||||
|
@ -20,18 +20,22 @@ static unsigned long my_ip = (unsigned long)schedule;
|
||||
|
||||
asm (
|
||||
" .pushsection .text, \"ax\", @progbits\n"
|
||||
" .type my_tramp1, @function\n"
|
||||
" my_tramp1:"
|
||||
" pushq %rbp\n"
|
||||
" movq %rsp, %rbp\n"
|
||||
" call my_direct_func1\n"
|
||||
" leave\n"
|
||||
" .size my_tramp1, .-my_tramp1\n"
|
||||
" ret\n"
|
||||
" .type my_tramp2, @function\n"
|
||||
" my_tramp2:"
|
||||
" pushq %rbp\n"
|
||||
" movq %rsp, %rbp\n"
|
||||
" call my_direct_func2\n"
|
||||
" leave\n"
|
||||
" ret\n"
|
||||
" .size my_tramp2, .-my_tramp2\n"
|
||||
" .popsection\n"
|
||||
);
|
||||
|
||||
|
@ -15,6 +15,7 @@ extern void my_tramp(void *);
|
||||
|
||||
asm (
|
||||
" .pushsection .text, \"ax\", @progbits\n"
|
||||
" .type my_tramp, @function\n"
|
||||
" my_tramp:"
|
||||
" pushq %rbp\n"
|
||||
" movq %rsp, %rbp\n"
|
||||
@ -27,6 +28,7 @@ asm (
|
||||
" popq %rdi\n"
|
||||
" leave\n"
|
||||
" ret\n"
|
||||
" .size my_tramp, .-my_tramp\n"
|
||||
" .popsection\n"
|
||||
);
|
||||
|
||||
|
@ -13,6 +13,7 @@ extern void my_tramp(void *);
|
||||
|
||||
asm (
|
||||
" .pushsection .text, \"ax\", @progbits\n"
|
||||
" .type my_tramp, @function\n"
|
||||
" my_tramp:"
|
||||
" pushq %rbp\n"
|
||||
" movq %rsp, %rbp\n"
|
||||
@ -21,6 +22,7 @@ asm (
|
||||
" popq %rdi\n"
|
||||
" leave\n"
|
||||
" ret\n"
|
||||
" .size my_tramp, .-my_tramp\n"
|
||||
" .popsection\n"
|
||||
);
|
||||
|
||||
|
@ -102,6 +102,29 @@ recordmcount()
|
||||
fi
|
||||
}
|
||||
|
||||
objtool_link()
|
||||
{
|
||||
local objtoolopt;
|
||||
|
||||
if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
|
||||
objtoolopt="check"
|
||||
if [ -z "${CONFIG_FRAME_POINTER}" ]; then
|
||||
objtoolopt="${objtoolopt} --no-fp"
|
||||
fi
|
||||
if [ -n "${CONFIG_GCOV_KERNEL}" ]; then
|
||||
objtoolopt="${objtoolopt} --no-unreachable"
|
||||
fi
|
||||
if [ -n "${CONFIG_RETPOLINE}" ]; then
|
||||
objtoolopt="${objtoolopt} --retpoline"
|
||||
fi
|
||||
if [ -n "${CONFIG_X86_SMAP}" ]; then
|
||||
objtoolopt="${objtoolopt} --uaccess"
|
||||
fi
|
||||
info OBJTOOL ${1}
|
||||
tools/objtool/objtool ${objtoolopt} ${1}
|
||||
fi
|
||||
}
|
||||
|
||||
# Link of vmlinux
|
||||
# ${1} - output file
|
||||
# ${2}, ${3}, ... - optional extra .o files
|
||||
@ -307,6 +330,7 @@ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1
|
||||
|
||||
#link vmlinux.o
|
||||
modpost_link vmlinux.o
|
||||
objtool_link vmlinux.o
|
||||
|
||||
# modpost vmlinux.o to check for section mismatches
|
||||
${MAKE} -f "${srctree}/scripts/Makefile.modpost" MODPOST_VMLINUX=1
|
||||
|
@ -58,8 +58,7 @@
|
||||
#define ORC_TYPE_CALL 0
|
||||
#define ORC_TYPE_REGS 1
|
||||
#define ORC_TYPE_REGS_IRET 2
|
||||
#define UNWIND_HINT_TYPE_SAVE 3
|
||||
#define UNWIND_HINT_TYPE_RESTORE 4
|
||||
#define UNWIND_HINT_TYPE_RET_OFFSET 3
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
/*
|
||||
|
@ -1,11 +1,16 @@
|
||||
objtool-y += arch/$(SRCARCH)/
|
||||
|
||||
objtool-y += weak.o
|
||||
|
||||
objtool-$(SUBCMD_CHECK) += check.o
|
||||
objtool-$(SUBCMD_CHECK) += special.o
|
||||
objtool-$(SUBCMD_ORC) += check.o
|
||||
objtool-$(SUBCMD_ORC) += orc_gen.o
|
||||
objtool-$(SUBCMD_ORC) += orc_dump.o
|
||||
|
||||
objtool-y += builtin-check.o
|
||||
objtool-y += builtin-orc.o
|
||||
objtool-y += check.o
|
||||
objtool-y += orc_gen.o
|
||||
objtool-y += orc_dump.o
|
||||
objtool-y += elf.o
|
||||
objtool-y += special.o
|
||||
objtool-y += objtool.o
|
||||
|
||||
objtool-y += libstring.o
|
||||
|
@ -289,6 +289,47 @@ they mean, and suggestions for how to fix them.
|
||||
might be corrupt due to a gcc bug. For more details, see:
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
|
||||
|
||||
9. file.o: warning: objtool: funcA() call to funcB() with UACCESS enabled
|
||||
|
||||
This means that an unexpected call to a non-whitelisted function exists
|
||||
outside of arch-specific guards.
|
||||
X86: SMAP (stac/clac): __uaccess_begin()/__uaccess_end()
|
||||
ARM: PAN: uaccess_enable()/uaccess_disable()
|
||||
|
||||
These functions should be called to denote a minimal critical section around
|
||||
access to __user variables. See also: https://lwn.net/Articles/517475/
|
||||
|
||||
The intention of the warning is to prevent calls to funcB() from eventually
|
||||
calling schedule(), potentially leaking the AC flags state, and not
|
||||
restoring them correctly.
|
||||
|
||||
It also helps verify that there are no unexpected calls to funcB() which may
|
||||
access user space pages with protections against doing so disabled.
|
||||
|
||||
To fix, either:
|
||||
1) remove explicit calls to funcB() from funcA().
|
||||
2) add the correct guards before and after calls to low level functions like
|
||||
__get_user_size()/__put_user_size().
|
||||
3) add funcB to uaccess_safe_builtin whitelist in tools/objtool/check.c, if
|
||||
funcB obviously does not call schedule(), and is marked notrace (since
|
||||
function tracing inserts additional calls, which is not obvious from the
|
||||
sources).
|
||||
|
||||
10. file.o: warning: func()+0x5c: alternative modifies stack
|
||||
|
||||
This means that an alternative includes instructions that modify the
|
||||
stack. The problem is that there is only one ORC unwind table, this means
|
||||
that the ORC unwind entries must be valid for each of the alternatives.
|
||||
The easiest way to enforce this is to ensure alternatives do not contain
|
||||
any ORC entries, which in turn implies the above constraint.
|
||||
|
||||
11. file.o: warning: unannotated intra-function call
|
||||
|
||||
This warning means that a direct call is done to a destination which
|
||||
is not at the beginning of a function. If this is a legit call, you
|
||||
can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
|
||||
directive right before the call.
|
||||
|
||||
|
||||
If the error doesn't seem to make sense, it could be a bug in objtool.
|
||||
Feel free to ask the objtool maintainer for help.
|
||||
|
@ -35,7 +35,8 @@ all: $(OBJTOOL)
|
||||
|
||||
INCLUDES := -I$(srctree)/tools/include \
|
||||
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
|
||||
-I$(srctree)/tools/arch/$(SRCARCH)/include
|
||||
-I$(srctree)/tools/arch/$(SRCARCH)/include \
|
||||
-I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
|
||||
WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
|
||||
CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
|
||||
LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
|
||||
@ -45,14 +46,24 @@ elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E -
|
||||
CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
|
||||
|
||||
AWK = awk
|
||||
|
||||
SUBCMD_CHECK := n
|
||||
SUBCMD_ORC := n
|
||||
|
||||
ifeq ($(SRCARCH),x86)
|
||||
SUBCMD_CHECK := y
|
||||
SUBCMD_ORC := y
|
||||
endif
|
||||
|
||||
export SUBCMD_CHECK SUBCMD_ORC
|
||||
export srctree OUTPUT CFLAGS SRCARCH AWK
|
||||
include $(srctree)/tools/build/Makefile.include
|
||||
|
||||
$(OBJTOOL_IN): fixdep FORCE
|
||||
@$(CONFIG_SHELL) ./sync-check.sh
|
||||
@$(MAKE) $(build)=objtool
|
||||
|
||||
$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
|
||||
@$(CONFIG_SHELL) ./sync-check.sh
|
||||
$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
|
||||
|
||||
|
||||
|
@ -8,9 +8,11 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <linux/list.h>
|
||||
#include "elf.h"
|
||||
#include "objtool.h"
|
||||
#include "cfi.h"
|
||||
|
||||
#include <asm/orc_types.h>
|
||||
|
||||
enum insn_type {
|
||||
INSN_JUMP_CONDITIONAL,
|
||||
INSN_JUMP_UNCONDITIONAL,
|
||||
@ -20,7 +22,6 @@ enum insn_type {
|
||||
INSN_CALL_DYNAMIC,
|
||||
INSN_RETURN,
|
||||
INSN_CONTEXT_SWITCH,
|
||||
INSN_STACK,
|
||||
INSN_BUG,
|
||||
INSN_NOP,
|
||||
INSN_STAC,
|
||||
@ -64,15 +65,23 @@ struct op_src {
|
||||
struct stack_op {
|
||||
struct op_dest dest;
|
||||
struct op_src src;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
void arch_initial_func_cfi_state(struct cfi_state *state);
|
||||
struct instruction;
|
||||
|
||||
int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
void arch_initial_func_cfi_state(struct cfi_init_state *state);
|
||||
|
||||
int arch_decode_instruction(const struct elf *elf, const struct section *sec,
|
||||
unsigned long offset, unsigned int maxlen,
|
||||
unsigned int *len, enum insn_type *type,
|
||||
unsigned long *immediate, struct stack_op *op);
|
||||
unsigned long *immediate,
|
||||
struct list_head *ops_list);
|
||||
|
||||
bool arch_callee_saved_reg(unsigned char reg);
|
||||
|
||||
unsigned long arch_jump_destination(struct instruction *insn);
|
||||
|
||||
unsigned long arch_dest_rela_offset(int addend);
|
||||
|
||||
#endif /* _ARCH_H */
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "../../../arch/x86/lib/inat.c"
|
||||
#include "../../../arch/x86/lib/insn.c"
|
||||
|
||||
#include "../../check.h"
|
||||
#include "../../elf.h"
|
||||
#include "../../arch.h"
|
||||
#include "../../warn.h"
|
||||
@ -26,7 +27,7 @@ static unsigned char op_to_cfi_reg[][2] = {
|
||||
{CFI_DI, CFI_R15},
|
||||
};
|
||||
|
||||
static int is_x86_64(struct elf *elf)
|
||||
static int is_x86_64(const struct elf *elf)
|
||||
{
|
||||
switch (elf->ehdr.e_machine) {
|
||||
case EM_X86_64:
|
||||
@ -66,16 +67,34 @@ bool arch_callee_saved_reg(unsigned char reg)
|
||||
}
|
||||
}
|
||||
|
||||
int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
unsigned long arch_dest_rela_offset(int addend)
|
||||
{
|
||||
return addend + 4;
|
||||
}
|
||||
|
||||
unsigned long arch_jump_destination(struct instruction *insn)
|
||||
{
|
||||
return insn->offset + insn->len + insn->immediate;
|
||||
}
|
||||
|
||||
#define ADD_OP(op) \
|
||||
if (!(op = calloc(1, sizeof(*op)))) \
|
||||
return -1; \
|
||||
else for (list_add_tail(&op->list, ops_list); op; op = NULL)
|
||||
|
||||
int arch_decode_instruction(const struct elf *elf, const struct section *sec,
|
||||
unsigned long offset, unsigned int maxlen,
|
||||
unsigned int *len, enum insn_type *type,
|
||||
unsigned long *immediate, struct stack_op *op)
|
||||
unsigned long *immediate,
|
||||
struct list_head *ops_list)
|
||||
{
|
||||
struct insn insn;
|
||||
int x86_64, sign;
|
||||
unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
|
||||
rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
|
||||
modrm_reg = 0, sib = 0;
|
||||
struct stack_op *op = NULL;
|
||||
struct symbol *sym;
|
||||
|
||||
x86_64 = is_x86_64(elf);
|
||||
if (x86_64 == -1)
|
||||
@ -85,7 +104,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
insn_get_length(&insn);
|
||||
|
||||
if (!insn_complete(&insn)) {
|
||||
WARN_FUNC("can't decode instruction", sec, offset);
|
||||
WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -123,40 +142,44 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
|
||||
|
||||
/* add/sub reg, %rsp */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x50 ... 0x57:
|
||||
|
||||
/* push reg */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 0x58 ... 0x5f:
|
||||
|
||||
/* pop reg */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_POP;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_POP;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 0x68:
|
||||
case 0x6a:
|
||||
/* push immediate */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x70 ... 0x7f:
|
||||
@ -170,12 +193,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
|
||||
if (modrm == 0xe4) {
|
||||
/* and imm, %rsp */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_AND;
|
||||
op->src.reg = CFI_SP;
|
||||
op->src.offset = insn.immediate.value;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_AND;
|
||||
op->src.reg = CFI_SP;
|
||||
op->src.offset = insn.immediate.value;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -187,34 +211,37 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
break;
|
||||
|
||||
/* add/sub imm, %rsp */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_SP;
|
||||
op->src.offset = insn.immediate.value * sign;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_SP;
|
||||
op->src.offset = insn.immediate.value * sign;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x89:
|
||||
if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
|
||||
|
||||
/* mov %rsp, reg */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = CFI_SP;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = CFI_SP;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
|
||||
|
||||
/* mov reg, %rsp */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -224,22 +251,24 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
(modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
|
||||
|
||||
/* mov reg, disp(%rbp) */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
op->dest.type = OP_DEST_REG_INDIRECT;
|
||||
op->dest.reg = CFI_BP;
|
||||
op->dest.offset = insn.displacement.value;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
op->dest.type = OP_DEST_REG_INDIRECT;
|
||||
op->dest.reg = CFI_BP;
|
||||
op->dest.offset = insn.displacement.value;
|
||||
}
|
||||
|
||||
} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
|
||||
|
||||
/* mov reg, disp(%rsp) */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
op->dest.type = OP_DEST_REG_INDIRECT;
|
||||
op->dest.reg = CFI_SP;
|
||||
op->dest.offset = insn.displacement.value;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_REG;
|
||||
op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
op->dest.type = OP_DEST_REG_INDIRECT;
|
||||
op->dest.reg = CFI_SP;
|
||||
op->dest.offset = insn.displacement.value;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
@ -248,23 +277,25 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
|
||||
|
||||
/* mov disp(%rbp), reg */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_REG_INDIRECT;
|
||||
op->src.reg = CFI_BP;
|
||||
op->src.offset = insn.displacement.value;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_REG_INDIRECT;
|
||||
op->src.reg = CFI_BP;
|
||||
op->src.offset = insn.displacement.value;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
}
|
||||
|
||||
} else if (rex_w && !rex_b && sib == 0x24 &&
|
||||
modrm_mod != 3 && modrm_rm == 4) {
|
||||
|
||||
/* mov disp(%rsp), reg */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_REG_INDIRECT;
|
||||
op->src.reg = CFI_SP;
|
||||
op->src.offset = insn.displacement.value;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_REG_INDIRECT;
|
||||
op->src.reg = CFI_SP;
|
||||
op->src.offset = insn.displacement.value;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
@ -272,28 +303,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
case 0x8d:
|
||||
if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
|
||||
|
||||
*type = INSN_STACK;
|
||||
if (!insn.displacement.value) {
|
||||
/* lea (%rsp), reg */
|
||||
op->src.type = OP_SRC_REG;
|
||||
} else {
|
||||
/* lea disp(%rsp), reg */
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.offset = insn.displacement.value;
|
||||
ADD_OP(op) {
|
||||
if (!insn.displacement.value) {
|
||||
/* lea (%rsp), reg */
|
||||
op->src.type = OP_SRC_REG;
|
||||
} else {
|
||||
/* lea disp(%rsp), reg */
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.offset = insn.displacement.value;
|
||||
}
|
||||
op->src.reg = CFI_SP;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
}
|
||||
op->src.reg = CFI_SP;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
|
||||
|
||||
} else if (rex == 0x48 && modrm == 0x65) {
|
||||
|
||||
/* lea disp(%rbp), %rsp */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_BP;
|
||||
op->src.offset = insn.displacement.value;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_BP;
|
||||
op->src.offset = insn.displacement.value;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
}
|
||||
|
||||
} else if (rex == 0x49 && modrm == 0x62 &&
|
||||
insn.displacement.value == -8) {
|
||||
@ -304,12 +337,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
* Restoring rsp back to its original value after a
|
||||
* stack realignment.
|
||||
*/
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_R10;
|
||||
op->src.offset = -8;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_R10;
|
||||
op->src.offset = -8;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
}
|
||||
|
||||
} else if (rex == 0x49 && modrm == 0x65 &&
|
||||
insn.displacement.value == -16) {
|
||||
@ -320,21 +354,23 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
* Restoring rsp back to its original value after a
|
||||
* stack realignment.
|
||||
*/
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_R13;
|
||||
op->src.offset = -16;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_R13;
|
||||
op->src.offset = -16;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 0x8f:
|
||||
/* pop to mem */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_POP;
|
||||
op->dest.type = OP_DEST_MEM;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_POP;
|
||||
op->dest.type = OP_DEST_MEM;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x90:
|
||||
@ -343,16 +379,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
|
||||
case 0x9c:
|
||||
/* pushf */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSHF;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSHF;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x9d:
|
||||
/* popf */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_POPF;
|
||||
op->dest.type = OP_DEST_MEM;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_POPF;
|
||||
op->dest.type = OP_DEST_MEM;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x0f:
|
||||
@ -387,16 +425,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
} else if (op2 == 0xa0 || op2 == 0xa8) {
|
||||
|
||||
/* push fs/gs */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
}
|
||||
|
||||
} else if (op2 == 0xa1 || op2 == 0xa9) {
|
||||
|
||||
/* pop fs/gs */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_POP;
|
||||
op->dest.type = OP_DEST_MEM;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_POP;
|
||||
op->dest.type = OP_DEST_MEM;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
@ -409,8 +449,8 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
* mov bp, sp
|
||||
* pop bp
|
||||
*/
|
||||
*type = INSN_STACK;
|
||||
op->dest.type = OP_DEST_LEAVE;
|
||||
ADD_OP(op)
|
||||
op->dest.type = OP_DEST_LEAVE;
|
||||
|
||||
break;
|
||||
|
||||
@ -429,14 +469,41 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
*type = INSN_RETURN;
|
||||
break;
|
||||
|
||||
case 0xcf: /* iret */
|
||||
/*
|
||||
* Handle sync_core(), which has an IRET to self.
|
||||
* All other IRET are in STT_NONE entry code.
|
||||
*/
|
||||
sym = find_symbol_containing(sec, offset);
|
||||
if (sym && sym->type == STT_FUNC) {
|
||||
ADD_OP(op) {
|
||||
/* add $40, %rsp */
|
||||
op->src.type = OP_SRC_ADD;
|
||||
op->src.reg = CFI_SP;
|
||||
op->src.offset = 5*8;
|
||||
op->dest.type = OP_DEST_REG;
|
||||
op->dest.reg = CFI_SP;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* fallthrough */
|
||||
|
||||
case 0xca: /* retf */
|
||||
case 0xcb: /* retf */
|
||||
case 0xcf: /* iret */
|
||||
*type = INSN_CONTEXT_SWITCH;
|
||||
break;
|
||||
|
||||
case 0xe8:
|
||||
*type = INSN_CALL;
|
||||
/*
|
||||
* For the impact on the stack, a CALL behaves like
|
||||
* a PUSH of an immediate value (the return address).
|
||||
*/
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0xfc:
|
||||
@ -464,9 +531,10 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
else if (modrm_reg == 6) {
|
||||
|
||||
/* push from mem */
|
||||
*type = INSN_STACK;
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
ADD_OP(op) {
|
||||
op->src.type = OP_SRC_CONST;
|
||||
op->dest.type = OP_DEST_PUSH;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
@ -480,7 +548,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void arch_initial_func_cfi_state(struct cfi_state *state)
|
||||
void arch_initial_func_cfi_state(struct cfi_init_state *state)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
25
tools/objtool/arch/x86/include/cfi_regs.h
Normal file
25
tools/objtool/arch/x86/include/cfi_regs.h
Normal file
@ -0,0 +1,25 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#ifndef _OBJTOOL_CFI_REGS_H
|
||||
#define _OBJTOOL_CFI_REGS_H
|
||||
|
||||
#define CFI_AX 0
|
||||
#define CFI_DX 1
|
||||
#define CFI_CX 2
|
||||
#define CFI_BX 3
|
||||
#define CFI_SI 4
|
||||
#define CFI_DI 5
|
||||
#define CFI_BP 6
|
||||
#define CFI_SP 7
|
||||
#define CFI_R8 8
|
||||
#define CFI_R9 9
|
||||
#define CFI_R10 10
|
||||
#define CFI_R11 11
|
||||
#define CFI_R12 12
|
||||
#define CFI_R13 13
|
||||
#define CFI_R14 14
|
||||
#define CFI_R15 15
|
||||
#define CFI_RA 16
|
||||
#define CFI_NUM_REGS 17
|
||||
|
||||
#endif /* _OBJTOOL_CFI_REGS_H */
|
@ -14,10 +14,11 @@
|
||||
*/
|
||||
|
||||
#include <subcmd/parse-options.h>
|
||||
#include <string.h>
|
||||
#include "builtin.h"
|
||||
#include "check.h"
|
||||
#include "objtool.h"
|
||||
|
||||
bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats;
|
||||
bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
|
||||
|
||||
static const char * const check_usage[] = {
|
||||
"objtool check [<options>] file.o",
|
||||
@ -32,12 +33,14 @@ const struct option check_options[] = {
|
||||
OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
|
||||
OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
|
||||
OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
|
||||
OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
|
||||
OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
|
||||
OPT_END(),
|
||||
};
|
||||
|
||||
int cmd_check(int argc, const char **argv)
|
||||
{
|
||||
const char *objname;
|
||||
const char *objname, *s;
|
||||
|
||||
argc = parse_options(argc, argv, check_options, check_usage, 0);
|
||||
|
||||
@ -46,5 +49,9 @@ int cmd_check(int argc, const char **argv)
|
||||
|
||||
objname = argv[0];
|
||||
|
||||
s = strstr(objname, "vmlinux.o");
|
||||
if (s && !s[9])
|
||||
vmlinux = true;
|
||||
|
||||
return check(objname, false);
|
||||
}
|
||||
|
@ -14,8 +14,7 @@
|
||||
|
||||
#include <string.h>
|
||||
#include "builtin.h"
|
||||
#include "check.h"
|
||||
|
||||
#include "objtool.h"
|
||||
|
||||
static const char *orc_usage[] = {
|
||||
"objtool orc generate [<options>] file.o",
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <subcmd/parse-options.h>
|
||||
|
||||
extern const struct option check_options[];
|
||||
extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats;
|
||||
extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
|
||||
|
||||
extern int cmd_check(int argc, const char **argv);
|
||||
extern int cmd_orc(int argc, const char **argv);
|
||||
|
@ -6,38 +6,33 @@
|
||||
#ifndef _OBJTOOL_CFI_H
|
||||
#define _OBJTOOL_CFI_H
|
||||
|
||||
#include "cfi_regs.h"
|
||||
|
||||
#define CFI_UNDEFINED -1
|
||||
#define CFI_CFA -2
|
||||
#define CFI_SP_INDIRECT -3
|
||||
#define CFI_BP_INDIRECT -4
|
||||
|
||||
#define CFI_AX 0
|
||||
#define CFI_DX 1
|
||||
#define CFI_CX 2
|
||||
#define CFI_BX 3
|
||||
#define CFI_SI 4
|
||||
#define CFI_DI 5
|
||||
#define CFI_BP 6
|
||||
#define CFI_SP 7
|
||||
#define CFI_R8 8
|
||||
#define CFI_R9 9
|
||||
#define CFI_R10 10
|
||||
#define CFI_R11 11
|
||||
#define CFI_R12 12
|
||||
#define CFI_R13 13
|
||||
#define CFI_R14 14
|
||||
#define CFI_R15 15
|
||||
#define CFI_RA 16
|
||||
#define CFI_NUM_REGS 17
|
||||
|
||||
struct cfi_reg {
|
||||
int base;
|
||||
int offset;
|
||||
};
|
||||
|
||||
struct cfi_state {
|
||||
struct cfi_reg cfa;
|
||||
struct cfi_init_state {
|
||||
struct cfi_reg regs[CFI_NUM_REGS];
|
||||
struct cfi_reg cfa;
|
||||
};
|
||||
|
||||
struct cfi_state {
|
||||
struct cfi_reg regs[CFI_NUM_REGS];
|
||||
struct cfi_reg vals[CFI_NUM_REGS];
|
||||
struct cfi_reg cfa;
|
||||
int stack_size;
|
||||
int drap_reg, drap_offset;
|
||||
unsigned char type;
|
||||
bool bp_scratch;
|
||||
bool drap;
|
||||
bool end;
|
||||
};
|
||||
|
||||
#endif /* _OBJTOOL_CFI_H */
|
||||
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user