Merge "sched/walt: Introduce shared rail sibling"

2023-07-18 19:06:28 -07:00 · 2023-07-18 19:06:28 -07:00 · 826f36ec77
commit 826f36ec77
parent e33966e79d dddf5ac2dd
2 changed files with 95 additions and 20 deletions
--- a/kernel/sched/walt/walt.c
+++ b/kernel/sched/walt/walt.c
@ -67,6 +67,7 @@ static struct irq_work walt_cpufreq_irq_work;
 struct irq_work walt_migration_irq_work;
 unsigned int walt_rotation_enabled;
 cpumask_t asym_cap_sibling_cpus = CPU_MASK_NONE;
+cpumask_t shared_rail_sibling_cpus = CPU_MASK_NONE;

 unsigned int __read_mostly sched_ravg_window = 20000000;
 int min_possible_cluster_id;
@ -83,6 +84,8 @@ unsigned int __read_mostly sched_init_task_load_windows;
 */
 unsigned int __read_mostly sched_load_granule;

+int enable_shared_rail_boost;
+
 u64 walt_sched_clock(void)
 {
 	if (unlikely(walt_clock_suspended))
@ -678,14 +681,42 @@ __cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *rea
 	return (util >= capacity) ? capacity : util;
 }

+#define ADJUSTED_SHARED_RAIL_UTIL(orig, prime, x)       \
+	(max(orig, mult_frac(prime, x, 100)))
+#define PRIME_FACTOR 90
+
 unsigned long
 cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *reason)
 {
 	struct walt_cpu_load wl_other = {0};
+	struct walt_cpu_load wl_prime = {0};
 	unsigned long util = 0, util_other = 0;
 	unsigned long capacity = capacity_orig_of(cpu);
-	int i;
-	unsigned long max_nl = 0, max_pl = 0;
+	int i, mpct = PRIME_FACTOR;
+	unsigned long max_nl_other = 0, max_pl_other = 0;
+	bool shared_rail = false;
+
+	if (cpumask_test_cpu(cpu, &shared_rail_sibling_cpus) &&
+			enable_shared_rail_boost) {
+		for_each_cpu(i, &shared_rail_sibling_cpus) {
+			if (i == (num_possible_cpus() - 1))
+				util = __cpu_util_freq_walt(i, &wl_prime, reason);
+			else {
+				util_other = max(util_other,
+						__cpu_util_freq_walt(i, &wl_other, reason));
+				max_nl_other = max(max_nl_other, wl_other.nl);
+				max_pl_other = max(max_pl_other, wl_other.pl);
+			}
+		}
+
+		if (cpu == (num_possible_cpus() - 1))
+			mpct = 100;
+
+		util = ADJUSTED_SHARED_RAIL_UTIL(util_other, util, mpct);
+		walt_load->nl = ADJUSTED_SHARED_RAIL_UTIL(max_nl_other, wl_prime.nl, mpct);
+		walt_load->pl = ADJUSTED_SHARED_RAIL_UTIL(max_pl_other, wl_prime.pl, mpct);
+		shared_rail = true;
+	}

 	if (!cpumask_test_cpu(cpu, &asym_cap_sibling_cpus))
 		goto finish;
@ -695,19 +726,22 @@ cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *reaso

 	for_each_cpu(i, &asym_cap_sibling_cpus) {
 		if (i == cpu)
-			util = __cpu_util_freq_walt(cpu, walt_load, reason);
+			util = max(util, __cpu_util_freq_walt(cpu, walt_load, reason));
 		else {
 			util_other = max(util_other, __cpu_util_freq_walt(i, &wl_other, reason));
-			max_nl = max(max_nl, wl_other.nl);
-			max_pl = max(max_pl, wl_other.pl);
+			max_nl_other = max(max_nl_other, wl_other.nl);
+			max_pl_other = max(max_pl_other, wl_other.pl);
 		}
 	}

 	util = max(util, util_other);
-	walt_load->nl = max(walt_load->nl, max_nl);
-	walt_load->pl = max(walt_load->pl, max_pl);
+	walt_load->nl = max(walt_load->nl, max_nl_other);
+	walt_load->pl = max(walt_load->pl, max_pl_other);
 	return (util >= capacity) ? capacity : util;
 finish:
+	if (shared_rail)
+		return (util >= capacity) ? capacity : util;
+
 	return __cpu_util_freq_walt(cpu, walt_load, reason);
 }

@ -2791,11 +2825,16 @@ static void walt_update_cluster_topology(void)
 	if (num_sched_clusters == 4) {
 		cluster = NULL;
 		cpumask_clear(&asym_cap_sibling_cpus);
+		cpumask_clear(&shared_rail_sibling_cpus);
 		for_each_sched_cluster(cluster) {
 			if (cluster->id != 0 && cluster->id != num_sched_clusters - 1) {
 				cpumask_or(&asym_cap_sibling_cpus,
 					&asym_cap_sibling_cpus, &cluster->cpus);
 			}
+			if (cluster->id == 1 || cluster->id == num_sched_clusters - 1) {
+				cpumask_or(&shared_rail_sibling_cpus,
+					&shared_rail_sibling_cpus, &cluster->cpus);
+			}
 		}
 	}

@ -3675,6 +3714,33 @@ int remove_heavy(struct walt_task_struct *wts)

 cpumask_t cpus_for_pipeline = { CPU_BITS_NONE };

+/* always set boost for max cluster, for pipeline tasks */
+static inline void pipeline_set_boost(bool boost, int flag)
+{
+	static bool isolation_boost;
+	struct walt_sched_cluster *cluster;
+
+	if (isolation_boost && !boost) {
+		isolation_boost = false;
+
+		for_each_sched_cluster(cluster) {
+			if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
+			    is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
+				core_ctl_set_cluster_boost(cluster->id, false);
+		}
+		enable_shared_rail_boost &= ~flag;
+	} else if (!isolation_boost && boost) {
+		isolation_boost = true;
+
+		for_each_sched_cluster(cluster) {
+			if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
+			    is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
+				core_ctl_set_cluster_boost(cluster->id, true);
+		}
+		enable_shared_rail_boost |= flag;
+	}
+}
+
 cpumask_t last_available_big_cpus = CPU_MASK_NONE;
 int have_heavy_list;
 void find_heaviest_topapp(u64 window_start)
@ -3683,7 +3749,6 @@ void find_heaviest_topapp(u64 window_start)
 	struct walt_task_struct *wts;
 	unsigned long flags;
 	static u64 last_rearrange_ns;
-	static bool isolation_boost;
 	int i, j;
 	struct walt_task_struct *heavy_wts_to_drop[WALT_NR_CPUS];
 	int sched_heavy_nr = sysctl_sched_heavy_nr;
@ -3708,10 +3773,8 @@ void find_heaviest_topapp(u64 window_start)
 			}
 			raw_spin_unlock_irqrestore(&heavy_lock, flags);
 			have_heavy_list = 0;
-			if (isolation_boost) {
-				core_ctl_set_boost(false);
-				isolation_boost = false;
-			}
+
+			pipeline_set_boost(false, AUTO_PIPELINE);
 		}
 		return;
 	}
@ -3767,10 +3830,7 @@ void find_heaviest_topapp(u64 window_start)
 		}
 	}

-	if (!isolation_boost) {
-		core_ctl_set_boost(true);
-		isolation_boost = true;
-	}
+	pipeline_set_boost(true, AUTO_PIPELINE);

 	/* start with non-prime cpus chosen for this chipset (e.g. golds) */
 	cpumask_and(&last_available_big_cpus, cpu_online_mask, &cpus_for_pipeline);
@ -4000,7 +4060,7 @@ void rearrange_pipeline_preferred_cpus(u64 window_start)

 out:
 	if (found_pipeline ^ last_found_pipeline) {
-		core_ctl_set_boost(found_pipeline);
+		pipeline_set_boost(found_pipeline, MANUAL_PIPELINE);
 		last_found_pipeline = found_pipeline;
 	}
 }
@ -4021,7 +4081,7 @@ void rearrange_pipeline_preferred_cpus(u64 window_start)
 * involved in the migration.
 */
 static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migration,
-				struct cpumask *lock_cpus)
+				bool is_shared_rail_migration, struct cpumask *lock_cpus)
 {
 	struct walt_sched_cluster *cluster;
 	struct rq *rq;
@ -4093,6 +4153,8 @@ static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migrat
 				}
 				if (is_asym_migration)
 					wflag |= WALT_CPUFREQ_ASYM_FIXUP;
+				if (is_shared_rail_migration)
+					wflag |= WALT_CPUFREQ_SHARED_RAIL;
 			} else {
 				wflag |= WALT_CPUFREQ_ROLLOVER;
 			}
@ -4228,7 +4290,7 @@ static void walt_irq_work(struct irq_work *irq_work)
 	struct walt_rq *wrq;
 	int level = 0;
 	int cpu;
-	bool is_migration = false, is_asym_migration = false;
+	bool is_migration = false, is_asym_migration = false, is_shared_rail_migration = false;
 	u32 wakeup_ctr_sum = 0;

 	if (irq_work == &walt_migration_irq_work)
@ -4247,6 +4309,11 @@ static void walt_irq_work(struct irq_work *irq_work)
 		if (cpumask_empty(&lock_cpus))
 			return;

+		if (cpumask_intersects(&lock_cpus, &shared_rail_sibling_cpus) &&
+				enable_shared_rail_boost) {
+			cpumask_or(&lock_cpus, &lock_cpus, &shared_rail_sibling_cpus);
+			is_shared_rail_migration = true;
+		}
 		if (!cluster_partial_halted() &&
 				cpumask_intersects(&lock_cpus, &asym_cap_sibling_cpus)) {
 			cpumask_or(&lock_cpus, &lock_cpus, &asym_cap_sibling_cpus);
@ -4262,7 +4329,8 @@ static void walt_irq_work(struct irq_work *irq_work)
 		level++;
 	}

-	__walt_irq_work_locked(is_migration, is_asym_migration, &lock_cpus);
+	__walt_irq_work_locked(is_migration, is_asym_migration,
+			is_shared_rail_migration, &lock_cpus);

 	if (!is_migration) {
 		for_each_cpu(cpu, cpu_online_mask) {
--- a/kernel/sched/walt/walt.h
+++ b/kernel/sched/walt/walt.h
@ -73,6 +73,12 @@ enum migrate_types {
 	RQ_TO_GROUP,
 };

+enum pipeline_types {
+	NO_PIPELINE = 0,
+	MANUAL_PIPELINE = 1,
+	AUTO_PIPELINE = 2,
+};
+
 #define WALT_LOW_LATENCY_PROCFS		BIT(0)
 #define WALT_LOW_LATENCY_BINDER		BIT(1)
 #define WALT_LOW_LATENCY_PIPELINE	BIT(2)
@ -355,6 +361,7 @@ extern cpumask_t cpus_for_pipeline;
 #define WALT_CPUFREQ_EARLY_DET		0x10
 #define WALT_CPUFREQ_BOOST_UPDATE	0x20
 #define WALT_CPUFREQ_ASYM_FIXUP		0x40
+#define WALT_CPUFREQ_SHARED_RAIL	0x80

 #define CPUFREQ_REASON_LOAD		0
 #define CPUFREQ_REASON_BTR		0x1