diff --git a/kernel/sched/walt/walt.c b/kernel/sched/walt/walt.c index 586228348daf..6cc28c2a82de 100644 --- a/kernel/sched/walt/walt.c +++ b/kernel/sched/walt/walt.c @@ -67,6 +67,7 @@ static struct irq_work walt_cpufreq_irq_work; struct irq_work walt_migration_irq_work; unsigned int walt_rotation_enabled; cpumask_t asym_cap_sibling_cpus = CPU_MASK_NONE; +cpumask_t shared_rail_sibling_cpus = CPU_MASK_NONE; unsigned int __read_mostly sched_ravg_window = 20000000; int min_possible_cluster_id; @@ -83,6 +84,8 @@ unsigned int __read_mostly sched_init_task_load_windows; */ unsigned int __read_mostly sched_load_granule; +int enable_shared_rail_boost; + u64 walt_sched_clock(void) { if (unlikely(walt_clock_suspended)) @@ -678,14 +681,42 @@ __cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *rea return (util >= capacity) ? capacity : util; } +#define ADJUSTED_SHARED_RAIL_UTIL(orig, prime, x) \ + (max(orig, mult_frac(prime, x, 100))) +#define PRIME_FACTOR 90 + unsigned long cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *reason) { struct walt_cpu_load wl_other = {0}; + struct walt_cpu_load wl_prime = {0}; unsigned long util = 0, util_other = 0; unsigned long capacity = capacity_orig_of(cpu); - int i; - unsigned long max_nl = 0, max_pl = 0; + int i, mpct = PRIME_FACTOR; + unsigned long max_nl_other = 0, max_pl_other = 0; + bool shared_rail = false; + + if (cpumask_test_cpu(cpu, &shared_rail_sibling_cpus) && + enable_shared_rail_boost) { + for_each_cpu(i, &shared_rail_sibling_cpus) { + if (i == (num_possible_cpus() - 1)) + util = __cpu_util_freq_walt(i, &wl_prime, reason); + else { + util_other = max(util_other, + __cpu_util_freq_walt(i, &wl_other, reason)); + max_nl_other = max(max_nl_other, wl_other.nl); + max_pl_other = max(max_pl_other, wl_other.pl); + } + } + + if (cpu == (num_possible_cpus() - 1)) + mpct = 100; + + util = ADJUSTED_SHARED_RAIL_UTIL(util_other, util, mpct); + walt_load->nl = ADJUSTED_SHARED_RAIL_UTIL(max_nl_other, wl_prime.nl, mpct); + walt_load->pl = ADJUSTED_SHARED_RAIL_UTIL(max_pl_other, wl_prime.pl, mpct); + shared_rail = true; + } if (!cpumask_test_cpu(cpu, &asym_cap_sibling_cpus)) goto finish; @@ -695,19 +726,22 @@ cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *reaso for_each_cpu(i, &asym_cap_sibling_cpus) { if (i == cpu) - util = __cpu_util_freq_walt(cpu, walt_load, reason); + util = max(util, __cpu_util_freq_walt(cpu, walt_load, reason)); else { util_other = max(util_other, __cpu_util_freq_walt(i, &wl_other, reason)); - max_nl = max(max_nl, wl_other.nl); - max_pl = max(max_pl, wl_other.pl); + max_nl_other = max(max_nl_other, wl_other.nl); + max_pl_other = max(max_pl_other, wl_other.pl); } } util = max(util, util_other); - walt_load->nl = max(walt_load->nl, max_nl); - walt_load->pl = max(walt_load->pl, max_pl); + walt_load->nl = max(walt_load->nl, max_nl_other); + walt_load->pl = max(walt_load->pl, max_pl_other); return (util >= capacity) ? capacity : util; finish: + if (shared_rail) + return (util >= capacity) ? capacity : util; + return __cpu_util_freq_walt(cpu, walt_load, reason); } @@ -2791,11 +2825,16 @@ static void walt_update_cluster_topology(void) if (num_sched_clusters == 4) { cluster = NULL; cpumask_clear(&asym_cap_sibling_cpus); + cpumask_clear(&shared_rail_sibling_cpus); for_each_sched_cluster(cluster) { if (cluster->id != 0 && cluster->id != num_sched_clusters - 1) { cpumask_or(&asym_cap_sibling_cpus, &asym_cap_sibling_cpus, &cluster->cpus); } + if (cluster->id == 1 || cluster->id == num_sched_clusters - 1) { + cpumask_or(&shared_rail_sibling_cpus, + &shared_rail_sibling_cpus, &cluster->cpus); + } } } @@ -3675,6 +3714,33 @@ int remove_heavy(struct walt_task_struct *wts) cpumask_t cpus_for_pipeline = { CPU_BITS_NONE }; +/* always set boost for max cluster, for pipeline tasks */ +static inline void pipeline_set_boost(bool boost, int flag) +{ + static bool isolation_boost; + struct walt_sched_cluster *cluster; + + if (isolation_boost && !boost) { + isolation_boost = false; + + for_each_sched_cluster(cluster) { + if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) || + is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus))) + core_ctl_set_cluster_boost(cluster->id, false); + } + enable_shared_rail_boost &= ~flag; + } else if (!isolation_boost && boost) { + isolation_boost = true; + + for_each_sched_cluster(cluster) { + if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) || + is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus))) + core_ctl_set_cluster_boost(cluster->id, true); + } + enable_shared_rail_boost |= flag; + } +} + cpumask_t last_available_big_cpus = CPU_MASK_NONE; int have_heavy_list; void find_heaviest_topapp(u64 window_start) @@ -3683,7 +3749,6 @@ void find_heaviest_topapp(u64 window_start) struct walt_task_struct *wts; unsigned long flags; static u64 last_rearrange_ns; - static bool isolation_boost; int i, j; struct walt_task_struct *heavy_wts_to_drop[WALT_NR_CPUS]; int sched_heavy_nr = sysctl_sched_heavy_nr; @@ -3708,10 +3773,8 @@ void find_heaviest_topapp(u64 window_start) } raw_spin_unlock_irqrestore(&heavy_lock, flags); have_heavy_list = 0; - if (isolation_boost) { - core_ctl_set_boost(false); - isolation_boost = false; - } + + pipeline_set_boost(false, AUTO_PIPELINE); } return; } @@ -3767,10 +3830,7 @@ void find_heaviest_topapp(u64 window_start) } } - if (!isolation_boost) { - core_ctl_set_boost(true); - isolation_boost = true; - } + pipeline_set_boost(true, AUTO_PIPELINE); /* start with non-prime cpus chosen for this chipset (e.g. golds) */ cpumask_and(&last_available_big_cpus, cpu_online_mask, &cpus_for_pipeline); @@ -4000,7 +4060,7 @@ void rearrange_pipeline_preferred_cpus(u64 window_start) out: if (found_pipeline ^ last_found_pipeline) { - core_ctl_set_boost(found_pipeline); + pipeline_set_boost(found_pipeline, MANUAL_PIPELINE); last_found_pipeline = found_pipeline; } } @@ -4021,7 +4081,7 @@ void rearrange_pipeline_preferred_cpus(u64 window_start) * involved in the migration. */ static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migration, - struct cpumask *lock_cpus) + bool is_shared_rail_migration, struct cpumask *lock_cpus) { struct walt_sched_cluster *cluster; struct rq *rq; @@ -4093,6 +4153,8 @@ static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migrat } if (is_asym_migration) wflag |= WALT_CPUFREQ_ASYM_FIXUP; + if (is_shared_rail_migration) + wflag |= WALT_CPUFREQ_SHARED_RAIL; } else { wflag |= WALT_CPUFREQ_ROLLOVER; } @@ -4228,7 +4290,7 @@ static void walt_irq_work(struct irq_work *irq_work) struct walt_rq *wrq; int level = 0; int cpu; - bool is_migration = false, is_asym_migration = false; + bool is_migration = false, is_asym_migration = false, is_shared_rail_migration = false; u32 wakeup_ctr_sum = 0; if (irq_work == &walt_migration_irq_work) @@ -4247,6 +4309,11 @@ static void walt_irq_work(struct irq_work *irq_work) if (cpumask_empty(&lock_cpus)) return; + if (cpumask_intersects(&lock_cpus, &shared_rail_sibling_cpus) && + enable_shared_rail_boost) { + cpumask_or(&lock_cpus, &lock_cpus, &shared_rail_sibling_cpus); + is_shared_rail_migration = true; + } if (!cluster_partial_halted() && cpumask_intersects(&lock_cpus, &asym_cap_sibling_cpus)) { cpumask_or(&lock_cpus, &lock_cpus, &asym_cap_sibling_cpus); @@ -4262,7 +4329,8 @@ static void walt_irq_work(struct irq_work *irq_work) level++; } - __walt_irq_work_locked(is_migration, is_asym_migration, &lock_cpus); + __walt_irq_work_locked(is_migration, is_asym_migration, + is_shared_rail_migration, &lock_cpus); if (!is_migration) { for_each_cpu(cpu, cpu_online_mask) { diff --git a/kernel/sched/walt/walt.h b/kernel/sched/walt/walt.h index 9fc0505af37c..458471692455 100644 --- a/kernel/sched/walt/walt.h +++ b/kernel/sched/walt/walt.h @@ -73,6 +73,12 @@ enum migrate_types { RQ_TO_GROUP, }; +enum pipeline_types { + NO_PIPELINE = 0, + MANUAL_PIPELINE = 1, + AUTO_PIPELINE = 2, +}; + #define WALT_LOW_LATENCY_PROCFS BIT(0) #define WALT_LOW_LATENCY_BINDER BIT(1) #define WALT_LOW_LATENCY_PIPELINE BIT(2) @@ -355,6 +361,7 @@ extern cpumask_t cpus_for_pipeline; #define WALT_CPUFREQ_EARLY_DET 0x10 #define WALT_CPUFREQ_BOOST_UPDATE 0x20 #define WALT_CPUFREQ_ASYM_FIXUP 0x40 +#define WALT_CPUFREQ_SHARED_RAIL 0x80 #define CPUFREQ_REASON_LOAD 0 #define CPUFREQ_REASON_BTR 0x1