ANDROID: sched: Introducing PELT multiplier

The new sysctl sched_pelt_multiplier allows a user to set a clock
multiplier x2 or x4 (x1 being the default). This clock multiplier
artificially speed-up PELT ramp up/down similarly to a faster half-life.
Indeed, if we write PELT as a first order filter:

  y(t) = G * (1 - exp(t/tau))

Then we can see that multiplying the time by a constant X, is the same
as
dividing the time constant tau by X.

  y(t) = G * (1 - exp((t*X)/tau))
  y(t) = G * (1 - exp(t/(tau/X)))

Tau being half-life*ln(2), multiplying the PELT time is the same as
dividing the half-life:

  - x1: 32ms half-life
  - x2: 16ms half-life
  - x4: 8ms  half-life

Internally, a new clock is created: rq->clock_task_mult. It sits in the
clock hierarchy between rq->clock_task and rq->clock_pelt.

Bug: 177593580
Bug: 237219700
Change-Id: I67e6ca7994bebea22bf75732ee11d2b10e0d6b7e
Suggested-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Signed-off-by: JianMin Liu <jian-min.liu@mediatek.com>
This commit is contained in:
JianMin Liu 2022-06-29 21:13:56 +08:00 committed by Todd Kjos
parent b2e5773ea4
commit 4442801a43
6 changed files with 80 additions and 5 deletions

View File

@ -92,6 +92,13 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer,
int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
#ifdef CONFIG_SMP
extern unsigned int sysctl_sched_pelt_multiplier;
int sched_pelt_multiplier(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
#endif
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
extern unsigned int sysctl_sched_energy_aware;
int sched_energy_aware_handler(struct ctl_table *table, int write,

View File

@ -4788,7 +4788,7 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
cfs_rq->throttle_count--;
if (!cfs_rq->throttle_count) {
cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
cfs_rq->throttled_clock_task_time += rq_clock_task_mult(rq) -
cfs_rq->throttled_clock_task;
/* Add cfs_rq with already running entity in the list */
@ -4806,7 +4806,7 @@ static int tg_throttle_down(struct task_group *tg, void *data)
/* group is entering throttled state, stop time */
if (!cfs_rq->throttle_count) {
cfs_rq->throttled_clock_task = rq_clock_task(rq);
cfs_rq->throttled_clock_task = rq_clock_task_mult(rq);
list_del_leaf_cfs_rq(cfs_rq);
}
cfs_rq->throttle_count++;
@ -5224,7 +5224,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
pcfs_rq = tg->parent->cfs_rq[cpu];
cfs_rq->throttle_count = pcfs_rq->throttle_count;
cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
cfs_rq->throttled_clock_task = rq_clock_task_mult(cpu_rq(cpu));
}
/* conditionally throttle active cfs_rq's from put_prev_entity() */

View File

@ -531,3 +531,45 @@ int update_irq_load_avg(struct rq *rq, u64 running)
return ret;
}
#endif
DEFINE_PER_CPU(u64, clock_task_mult);
unsigned int sysctl_sched_pelt_multiplier = 1;
__read_mostly unsigned int sched_pelt_lshift;
int sched_pelt_multiplier(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
static DEFINE_MUTEX(mutex);
unsigned int old;
int ret;
mutex_lock(&mutex);
old = sysctl_sched_pelt_multiplier;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret)
goto undo;
if (!write)
goto done;
switch (sysctl_sched_pelt_multiplier) {
case 1:
fallthrough;
case 2:
fallthrough;
case 4:
WRITE_ONCE(sched_pelt_lshift,
sysctl_sched_pelt_multiplier >> 1);
goto done;
default:
ret = -EINVAL;
}
undo:
sysctl_sched_pelt_multiplier = old;
done:
mutex_unlock(&mutex);
return ret;
}

View File

@ -61,6 +61,8 @@ static inline void cfs_se_util_change(struct sched_avg *avg)
WRITE_ONCE(avg->util_est.enqueued, enqueued);
}
extern unsigned int sched_pelt_lshift;
/*
* The clock_pelt scales the time to reflect the effective amount of
* computation done during the running delta time but then sync back to
@ -75,9 +77,13 @@ static inline void cfs_se_util_change(struct sched_avg *avg)
*/
static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
{
delta <<= READ_ONCE(sched_pelt_lshift);
per_cpu(clock_task_mult, rq->cpu) += delta;
if (unlikely(is_idle_task(rq->curr))) {
/* The rq is idle, we can sync to clock_task */
rq->clock_pelt = rq_clock_task(rq);
rq->clock_pelt = rq_clock_task_mult(rq);
return;
}
@ -129,7 +135,8 @@ static inline void update_idle_rq_clock_pelt(struct rq *rq)
* rq's clock_task.
*/
if (util_sum >= divider)
rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt;
rq->lost_idle_time += rq_clock_task_mult(rq) -
rq->clock_pelt;
}
static inline u64 rq_clock_pelt(struct rq *rq)

View File

@ -1193,6 +1193,16 @@ static inline u64 rq_clock_task(struct rq *rq)
return rq->clock_task;
}
DECLARE_PER_CPU(u64, clock_task_mult);
static inline u64 rq_clock_task_mult(struct rq *rq)
{
lockdep_assert_held(&rq->lock);
assert_clock_updated(rq);
return per_cpu(clock_task_mult, rq->cpu);
}
/**
* By default the decay is the default pelt decay period.
* The decay shift can change the decay period in

View File

@ -1829,6 +1829,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rr_handler,
},
#ifdef CONFIG_SMP
{
.procname = "sched_pelt_multiplier",
.data = &sysctl_sched_pelt_multiplier,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_pelt_multiplier,
},
#endif
#ifdef CONFIG_UCLAMP_TASK
{
.procname = "sched_util_clamp_min",