Merge "sched/walt: Introduce shared rail sibling"

This commit is contained in:
qctecmdr 2023-07-18 19:06:28 -07:00 committed by Gerrit - the friendly Code Review server
commit 826f36ec77
2 changed files with 95 additions and 20 deletions

View File

@ -67,6 +67,7 @@ static struct irq_work walt_cpufreq_irq_work;
struct irq_work walt_migration_irq_work;
unsigned int walt_rotation_enabled;
cpumask_t asym_cap_sibling_cpus = CPU_MASK_NONE;
cpumask_t shared_rail_sibling_cpus = CPU_MASK_NONE;
unsigned int __read_mostly sched_ravg_window = 20000000;
int min_possible_cluster_id;
@ -83,6 +84,8 @@ unsigned int __read_mostly sched_init_task_load_windows;
*/
unsigned int __read_mostly sched_load_granule;
int enable_shared_rail_boost;
u64 walt_sched_clock(void)
{
if (unlikely(walt_clock_suspended))
@ -678,14 +681,42 @@ __cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *rea
return (util >= capacity) ? capacity : util;
}
#define ADJUSTED_SHARED_RAIL_UTIL(orig, prime, x) \
(max(orig, mult_frac(prime, x, 100)))
#define PRIME_FACTOR 90
unsigned long
cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *reason)
{
struct walt_cpu_load wl_other = {0};
struct walt_cpu_load wl_prime = {0};
unsigned long util = 0, util_other = 0;
unsigned long capacity = capacity_orig_of(cpu);
int i;
unsigned long max_nl = 0, max_pl = 0;
int i, mpct = PRIME_FACTOR;
unsigned long max_nl_other = 0, max_pl_other = 0;
bool shared_rail = false;
if (cpumask_test_cpu(cpu, &shared_rail_sibling_cpus) &&
enable_shared_rail_boost) {
for_each_cpu(i, &shared_rail_sibling_cpus) {
if (i == (num_possible_cpus() - 1))
util = __cpu_util_freq_walt(i, &wl_prime, reason);
else {
util_other = max(util_other,
__cpu_util_freq_walt(i, &wl_other, reason));
max_nl_other = max(max_nl_other, wl_other.nl);
max_pl_other = max(max_pl_other, wl_other.pl);
}
}
if (cpu == (num_possible_cpus() - 1))
mpct = 100;
util = ADJUSTED_SHARED_RAIL_UTIL(util_other, util, mpct);
walt_load->nl = ADJUSTED_SHARED_RAIL_UTIL(max_nl_other, wl_prime.nl, mpct);
walt_load->pl = ADJUSTED_SHARED_RAIL_UTIL(max_pl_other, wl_prime.pl, mpct);
shared_rail = true;
}
if (!cpumask_test_cpu(cpu, &asym_cap_sibling_cpus))
goto finish;
@ -695,19 +726,22 @@ cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *reaso
for_each_cpu(i, &asym_cap_sibling_cpus) {
if (i == cpu)
util = __cpu_util_freq_walt(cpu, walt_load, reason);
util = max(util, __cpu_util_freq_walt(cpu, walt_load, reason));
else {
util_other = max(util_other, __cpu_util_freq_walt(i, &wl_other, reason));
max_nl = max(max_nl, wl_other.nl);
max_pl = max(max_pl, wl_other.pl);
max_nl_other = max(max_nl_other, wl_other.nl);
max_pl_other = max(max_pl_other, wl_other.pl);
}
}
util = max(util, util_other);
walt_load->nl = max(walt_load->nl, max_nl);
walt_load->pl = max(walt_load->pl, max_pl);
walt_load->nl = max(walt_load->nl, max_nl_other);
walt_load->pl = max(walt_load->pl, max_pl_other);
return (util >= capacity) ? capacity : util;
finish:
if (shared_rail)
return (util >= capacity) ? capacity : util;
return __cpu_util_freq_walt(cpu, walt_load, reason);
}
@ -2791,11 +2825,16 @@ static void walt_update_cluster_topology(void)
if (num_sched_clusters == 4) {
cluster = NULL;
cpumask_clear(&asym_cap_sibling_cpus);
cpumask_clear(&shared_rail_sibling_cpus);
for_each_sched_cluster(cluster) {
if (cluster->id != 0 && cluster->id != num_sched_clusters - 1) {
cpumask_or(&asym_cap_sibling_cpus,
&asym_cap_sibling_cpus, &cluster->cpus);
}
if (cluster->id == 1 || cluster->id == num_sched_clusters - 1) {
cpumask_or(&shared_rail_sibling_cpus,
&shared_rail_sibling_cpus, &cluster->cpus);
}
}
}
@ -3675,6 +3714,33 @@ int remove_heavy(struct walt_task_struct *wts)
cpumask_t cpus_for_pipeline = { CPU_BITS_NONE };
/* always set boost for max cluster, for pipeline tasks */
static inline void pipeline_set_boost(bool boost, int flag)
{
static bool isolation_boost;
struct walt_sched_cluster *cluster;
if (isolation_boost && !boost) {
isolation_boost = false;
for_each_sched_cluster(cluster) {
if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
core_ctl_set_cluster_boost(cluster->id, false);
}
enable_shared_rail_boost &= ~flag;
} else if (!isolation_boost && boost) {
isolation_boost = true;
for_each_sched_cluster(cluster) {
if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
core_ctl_set_cluster_boost(cluster->id, true);
}
enable_shared_rail_boost |= flag;
}
}
cpumask_t last_available_big_cpus = CPU_MASK_NONE;
int have_heavy_list;
void find_heaviest_topapp(u64 window_start)
@ -3683,7 +3749,6 @@ void find_heaviest_topapp(u64 window_start)
struct walt_task_struct *wts;
unsigned long flags;
static u64 last_rearrange_ns;
static bool isolation_boost;
int i, j;
struct walt_task_struct *heavy_wts_to_drop[WALT_NR_CPUS];
int sched_heavy_nr = sysctl_sched_heavy_nr;
@ -3708,10 +3773,8 @@ void find_heaviest_topapp(u64 window_start)
}
raw_spin_unlock_irqrestore(&heavy_lock, flags);
have_heavy_list = 0;
if (isolation_boost) {
core_ctl_set_boost(false);
isolation_boost = false;
}
pipeline_set_boost(false, AUTO_PIPELINE);
}
return;
}
@ -3767,10 +3830,7 @@ void find_heaviest_topapp(u64 window_start)
}
}
if (!isolation_boost) {
core_ctl_set_boost(true);
isolation_boost = true;
}
pipeline_set_boost(true, AUTO_PIPELINE);
/* start with non-prime cpus chosen for this chipset (e.g. golds) */
cpumask_and(&last_available_big_cpus, cpu_online_mask, &cpus_for_pipeline);
@ -4000,7 +4060,7 @@ void rearrange_pipeline_preferred_cpus(u64 window_start)
out:
if (found_pipeline ^ last_found_pipeline) {
core_ctl_set_boost(found_pipeline);
pipeline_set_boost(found_pipeline, MANUAL_PIPELINE);
last_found_pipeline = found_pipeline;
}
}
@ -4021,7 +4081,7 @@ void rearrange_pipeline_preferred_cpus(u64 window_start)
* involved in the migration.
*/
static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migration,
struct cpumask *lock_cpus)
bool is_shared_rail_migration, struct cpumask *lock_cpus)
{
struct walt_sched_cluster *cluster;
struct rq *rq;
@ -4093,6 +4153,8 @@ static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migrat
}
if (is_asym_migration)
wflag |= WALT_CPUFREQ_ASYM_FIXUP;
if (is_shared_rail_migration)
wflag |= WALT_CPUFREQ_SHARED_RAIL;
} else {
wflag |= WALT_CPUFREQ_ROLLOVER;
}
@ -4228,7 +4290,7 @@ static void walt_irq_work(struct irq_work *irq_work)
struct walt_rq *wrq;
int level = 0;
int cpu;
bool is_migration = false, is_asym_migration = false;
bool is_migration = false, is_asym_migration = false, is_shared_rail_migration = false;
u32 wakeup_ctr_sum = 0;
if (irq_work == &walt_migration_irq_work)
@ -4247,6 +4309,11 @@ static void walt_irq_work(struct irq_work *irq_work)
if (cpumask_empty(&lock_cpus))
return;
if (cpumask_intersects(&lock_cpus, &shared_rail_sibling_cpus) &&
enable_shared_rail_boost) {
cpumask_or(&lock_cpus, &lock_cpus, &shared_rail_sibling_cpus);
is_shared_rail_migration = true;
}
if (!cluster_partial_halted() &&
cpumask_intersects(&lock_cpus, &asym_cap_sibling_cpus)) {
cpumask_or(&lock_cpus, &lock_cpus, &asym_cap_sibling_cpus);
@ -4262,7 +4329,8 @@ static void walt_irq_work(struct irq_work *irq_work)
level++;
}
__walt_irq_work_locked(is_migration, is_asym_migration, &lock_cpus);
__walt_irq_work_locked(is_migration, is_asym_migration,
is_shared_rail_migration, &lock_cpus);
if (!is_migration) {
for_each_cpu(cpu, cpu_online_mask) {

View File

@ -73,6 +73,12 @@ enum migrate_types {
RQ_TO_GROUP,
};
enum pipeline_types {
NO_PIPELINE = 0,
MANUAL_PIPELINE = 1,
AUTO_PIPELINE = 2,
};
#define WALT_LOW_LATENCY_PROCFS BIT(0)
#define WALT_LOW_LATENCY_BINDER BIT(1)
#define WALT_LOW_LATENCY_PIPELINE BIT(2)
@ -355,6 +361,7 @@ extern cpumask_t cpus_for_pipeline;
#define WALT_CPUFREQ_EARLY_DET 0x10
#define WALT_CPUFREQ_BOOST_UPDATE 0x20
#define WALT_CPUFREQ_ASYM_FIXUP 0x40
#define WALT_CPUFREQ_SHARED_RAIL 0x80
#define CPUFREQ_REASON_LOAD 0
#define CPUFREQ_REASON_BTR 0x1