rcu: Remove expedited GP funnel-lock bypass
Commit #cdacbe1f91264 ("rcu: Add fastpath bypassing funnel locking") turns out to be a pessimization at high load because it forces a tree full of tasks to wait for an expedited grace period that they probably do not need. This commit therefore removes this optimization. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
parent
4f41530245
commit
e2fd9d3584
@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
|
|||||||
|
|
||||||
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
|
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
|
||||||
|
|
||||||
s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
|
s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
|
||||||
|
|
||||||
These fields are as follows:
|
These fields are as follows:
|
||||||
|
|
||||||
o "s" is the sequence number, with an odd number indicating that
|
o "s" is the sequence number, with an odd number indicating that
|
||||||
an expedited grace period is in progress.
|
an expedited grace period is in progress.
|
||||||
|
|
||||||
o "wd0", "wd1", "wd2", and "wd3" are the number of times that an
|
o "wd1", "wd2", and "wd3" are the number of times that an attempt
|
||||||
attempt to start an expedited grace period found that someone
|
to start an expedited grace period found that someone else had
|
||||||
else had completed an expedited grace period that satisfies the
|
completed an expedited grace period that satisfies the attempted
|
||||||
attempted request. "Our work is done."
|
request. "Our work is done."
|
||||||
|
|
||||||
o "n" is number of times that a concurrent CPU-hotplug operation
|
o "n" is number of times that a concurrent CPU-hotplug operation
|
||||||
forced a fallback to a normal grace period.
|
forced a fallback to a normal grace period.
|
||||||
|
@ -3616,25 +3616,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
|||||||
struct rcu_node *rnp0;
|
struct rcu_node *rnp0;
|
||||||
struct rcu_node *rnp1 = NULL;
|
struct rcu_node *rnp1 = NULL;
|
||||||
|
|
||||||
/*
|
|
||||||
* First try directly acquiring the root lock in order to reduce
|
|
||||||
* latency in the common case where expedited grace periods are
|
|
||||||
* rare. We check mutex_is_locked() to avoid pathological levels of
|
|
||||||
* memory contention on ->exp_funnel_mutex in the heavy-load case.
|
|
||||||
*/
|
|
||||||
rnp0 = rcu_get_root(rsp);
|
|
||||||
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
|
|
||||||
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
|
|
||||||
trace_rcu_exp_funnel_lock(rsp->name, rnp0->level,
|
|
||||||
rnp0->grplo, rnp0->grphi,
|
|
||||||
TPS("acq"));
|
|
||||||
if (sync_exp_work_done(rsp, rnp0, NULL,
|
|
||||||
&rdp->expedited_workdone0, s))
|
|
||||||
return NULL;
|
|
||||||
return rnp0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Each pass through the following loop works its way
|
* Each pass through the following loop works its way
|
||||||
* up the rcu_node tree, returning if others have done the
|
* up the rcu_node tree, returning if others have done the
|
||||||
|
@ -388,7 +388,6 @@ struct rcu_data {
|
|||||||
struct rcu_head oom_head;
|
struct rcu_head oom_head;
|
||||||
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||||
struct mutex exp_funnel_mutex;
|
struct mutex exp_funnel_mutex;
|
||||||
atomic_long_t expedited_workdone0; /* # done by others #0. */
|
|
||||||
atomic_long_t expedited_workdone1; /* # done by others #1. */
|
atomic_long_t expedited_workdone1; /* # done by others #1. */
|
||||||
atomic_long_t expedited_workdone2; /* # done by others #2. */
|
atomic_long_t expedited_workdone2; /* # done by others #2. */
|
||||||
atomic_long_t expedited_workdone3; /* # done by others #3. */
|
atomic_long_t expedited_workdone3; /* # done by others #3. */
|
||||||
|
@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
|
|||||||
int cpu;
|
int cpu;
|
||||||
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
||||||
struct rcu_data *rdp;
|
struct rcu_data *rdp;
|
||||||
unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
|
unsigned long s1 = 0, s2 = 0, s3 = 0;
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||||
s0 += atomic_long_read(&rdp->expedited_workdone0);
|
|
||||||
s1 += atomic_long_read(&rdp->expedited_workdone1);
|
s1 += atomic_long_read(&rdp->expedited_workdone1);
|
||||||
s2 += atomic_long_read(&rdp->expedited_workdone2);
|
s2 += atomic_long_read(&rdp->expedited_workdone2);
|
||||||
s3 += atomic_long_read(&rdp->expedited_workdone3);
|
s3 += atomic_long_read(&rdp->expedited_workdone3);
|
||||||
}
|
}
|
||||||
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
|
seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
|
||||||
rsp->expedited_sequence, s0, s1, s2, s3,
|
rsp->expedited_sequence, s1, s2, s3,
|
||||||
atomic_long_read(&rsp->expedited_normal),
|
atomic_long_read(&rsp->expedited_normal),
|
||||||
atomic_read(&rsp->expedited_need_qs),
|
atomic_read(&rsp->expedited_need_qs),
|
||||||
rsp->expedited_sequence / 2);
|
rsp->expedited_sequence / 2);
|
||||||
|
Loading…
Reference in New Issue
Block a user