sched/walt: Move scheduler techpack to kernel

The scheduler techpack sources are moved to kernel to ease
the development.

The weak symbol definitions for WALT functions are no longer
required to cover the case of compiling kernel without syncing
the scheduler techpack. So remove all the weak symbol references.

Change-Id: Ief85bccd3dceaf60dda44aef9893b4138dc63380
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
This commit is contained in:
Pavankumar Kondeti 2020-09-24 11:19:20 +05:30
parent 73db1046f1
commit 5b3db45955
20 changed files with 7725 additions and 250 deletions

View File

@ -34,30 +34,30 @@ extern unsigned int sysctl_sched_force_lb_enable;
extern unsigned int sysctl_hh_suspend_timeout_ms;
#endif
#ifdef CONFIG_SCHED_WALT
extern unsigned int __weak sysctl_sched_capacity_margin_up[MAX_MARGIN_LEVELS];
extern unsigned int __weak sysctl_sched_capacity_margin_down[MAX_MARGIN_LEVELS];
extern unsigned int __weak sysctl_sched_user_hint;
extern const int __weak sched_user_hint_max;
extern unsigned int __weak sysctl_sched_boost;
extern unsigned int __weak sysctl_sched_group_upmigrate_pct;
extern unsigned int __weak sysctl_sched_group_downmigrate_pct;
extern unsigned int __weak sysctl_sched_conservative_pl;
extern unsigned int __weak sysctl_sched_walt_rotate_big_tasks;
extern unsigned int __weak sysctl_sched_min_task_util_for_boost;
extern unsigned int __weak sysctl_sched_min_task_util_for_colocation;
extern unsigned int __weak sysctl_sched_asym_cap_sibling_freq_match_pct;
extern unsigned int __weak sysctl_sched_coloc_downmigrate_ns;
extern unsigned int __weak sysctl_sched_task_unfilter_period;
extern unsigned int __weak sysctl_sched_busy_hyst_enable_cpus;
extern unsigned int __weak sysctl_sched_busy_hyst;
extern unsigned int __weak sysctl_sched_coloc_busy_hyst_enable_cpus;
extern unsigned int __weak sysctl_sched_coloc_busy_hyst_cpu[NR_CPUS];
extern unsigned int __weak sysctl_sched_coloc_busy_hyst_max_ms;
extern unsigned int __weak sysctl_sched_coloc_busy_hyst_cpu_busy_pct[NR_CPUS];
extern unsigned int __weak sysctl_sched_window_stats_policy;
extern unsigned int __weak sysctl_sched_ravg_window_nr_ticks;
extern unsigned int __weak sysctl_sched_many_wakeup_threshold;
extern unsigned int __weak sysctl_sched_dynamic_ravg_window_enable;
extern unsigned int sysctl_sched_capacity_margin_up[MAX_MARGIN_LEVELS];
extern unsigned int sysctl_sched_capacity_margin_down[MAX_MARGIN_LEVELS];
extern unsigned int sysctl_sched_user_hint;
extern const int sched_user_hint_max;
extern unsigned int sysctl_sched_boost;
extern unsigned int sysctl_sched_group_upmigrate_pct;
extern unsigned int sysctl_sched_group_downmigrate_pct;
extern unsigned int sysctl_sched_conservative_pl;
extern unsigned int sysctl_sched_walt_rotate_big_tasks;
extern unsigned int sysctl_sched_min_task_util_for_boost;
extern unsigned int sysctl_sched_min_task_util_for_colocation;
extern unsigned int sysctl_sched_asym_cap_sibling_freq_match_pct;
extern unsigned int sysctl_sched_coloc_downmigrate_ns;
extern unsigned int sysctl_sched_task_unfilter_period;
extern unsigned int sysctl_sched_busy_hyst_enable_cpus;
extern unsigned int sysctl_sched_busy_hyst;
extern unsigned int sysctl_sched_coloc_busy_hyst_enable_cpus;
extern unsigned int sysctl_sched_coloc_busy_hyst_cpu[NR_CPUS];
extern unsigned int sysctl_sched_coloc_busy_hyst_max_ms;
extern unsigned int sysctl_sched_coloc_busy_hyst_cpu_busy_pct[NR_CPUS];
extern unsigned int sysctl_sched_window_stats_policy;
extern unsigned int sysctl_sched_ravg_window_nr_ticks;
extern unsigned int sysctl_sched_many_wakeup_threshold;
extern unsigned int sysctl_sched_dynamic_ravg_window_enable;
extern unsigned int sysctl_sched_prefer_spread;
extern unsigned int sysctl_walt_rtg_cfs_boost_prio;
extern unsigned int sysctl_walt_low_latency_task_threshold;

View File

@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle.o fair.o rt.o deadline.o
obj-y += wait.o wait_bit.o swait.o completion.o
obj-$(CONFIG_SCHED_WALT) += walt.o
obj-$(CONFIG_SCHED_WALT) += walt/
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
obj-$(CONFIG_SCHEDSTATS) += stats.o

View File

@ -21,7 +21,7 @@
#include "../smpboot.h"
#include "pelt.h"
#include "walt.h"
#include "walt/walt.h"
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>

View File

@ -4,7 +4,7 @@
*/
#include <linux/cpufreq_times.h>
#include "sched.h"
#include "walt.h"
#include "walt/walt.h"
#ifdef CONFIG_IRQ_TIME_ACCOUNTING

View File

@ -17,7 +17,7 @@
*/
#include "sched.h"
#include "pelt.h"
#include "walt.h"
#include "walt/walt.h"
struct dl_bandwidth def_dl_bandwidth;

View File

@ -25,7 +25,7 @@
#include <trace/events/sched.h>
#include <trace/hooks/sched.h>
#include "walt.h"
#include "walt/walt.h"
#ifdef CONFIG_SMP
static inline bool task_fits_max(struct task_struct *p, int cpu);

View File

@ -11,7 +11,7 @@
#include <trace/events/sched.h>
#include "walt.h"
#include "walt/walt.h"
#include <trace/hooks/sched.h>

View File

@ -8,7 +8,7 @@
* See kernel/stop_machine.c
*/
#include "sched.h"
#include "walt.h"
#include "walt/walt.h"
#ifdef CONFIG_SMP
static int

View File

@ -1,218 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016-2020, The Linux Foundation. All rights reserved.
*/
#include "sched.h"
#include "walt.h"
int __weak sched_wake_up_idle_show(struct seq_file *m, void *v)
{
return -EPERM;
}
ssize_t __weak sched_wake_up_idle_write(struct file *file,
const char __user *buf, size_t count, loff_t *offset)
{
return -EPERM;
}
int __weak sched_wake_up_idle_open(struct inode *inode, struct file *filp)
{
return -EPERM;
}
int __weak sched_init_task_load_show(struct seq_file *m, void *v)
{
return -EPERM;
}
ssize_t __weak
sched_init_task_load_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
return -EPERM;
}
int __weak sched_init_task_load_open(struct inode *inode, struct file *filp)
{
return -EPERM;
}
int __weak sched_group_id_show(struct seq_file *m, void *v)
{
return -EPERM;
}
ssize_t __weak sched_group_id_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
return -EPERM;
}
int __weak sched_group_id_open(struct inode *inode, struct file *filp)
{
return -EPERM;
}
int __weak sched_isolate_cpu(int cpu) { return 0; }
int __weak sched_unisolate_cpu(int cpu) { return 0; }
int __weak sched_unisolate_cpu_unlocked(int cpu) { return 0; }
int __weak register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
{
return 0;
}
void __weak sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin,
u32 fmax) { }
void __weak free_task_load_ptrs(struct task_struct *p) { }
int __weak core_ctl_set_boost(bool boost) { return 0; }
void __weak core_ctl_notifier_register(struct notifier_block *n) { }
void __weak core_ctl_notifier_unregister(struct notifier_block *n) { }
void __weak sched_update_nr_prod(int cpu, long delta, bool inc) { }
unsigned int __weak sched_get_cpu_util(int cpu) { return 0; }
void __weak sched_update_hyst_times(void) { }
u64 __weak sched_lpm_disallowed_time(int cpu) { return 0; }
int __weak
walt_proc_group_thresholds_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int __weak
walt_proc_user_hint_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int __weak
sched_updown_migrate_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int __weak
sched_ravg_window_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int __weak sched_boost_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int __weak sched_busy_hyst_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
u64 __weak sched_ktime_clock(void) { return 0; }
unsigned long __weak
cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load)
{
return cpu_util(cpu);
}
int __weak update_preferred_cluster(struct walt_related_thread_group *grp,
struct task_struct *p, u32 old_load, bool from_tick)
{
return 0;
}
void __weak set_preferred_cluster(struct walt_related_thread_group *grp) { }
void __weak add_new_task_to_grp(struct task_struct *new) { }
int __weak
preferred_cluster(struct walt_sched_cluster *cluster, struct task_struct *p)
{
return -1;
}
int __weak sync_cgroup_colocation(struct task_struct *p, bool insert)
{
return 0;
}
int __weak alloc_related_thread_groups(void) { return 0; }
void __weak check_for_migration(struct rq *rq, struct task_struct *p) { }
unsigned long __weak thermal_cap(int cpu)
{
return cpu_rq(cpu)->cpu_capacity_orig;
}
void __weak clear_walt_request(int cpu) { }
void __weak clear_ed_task(struct task_struct *p, struct rq *rq) { }
bool __weak early_detection_notify(struct rq *rq, u64 wallclock)
{
return 0;
}
void __weak note_task_waking(struct task_struct *p, u64 wallclock) { }
int __weak group_balance_cpu_not_isolated(struct sched_group *sg)
{
return group_balance_cpu(sg);
}
void __weak detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks) { }
void __weak attach_tasks_core(struct list_head *tasks, struct rq *rq) { }
void __weak walt_update_task_ravg(struct task_struct *p, struct rq *rq,
int event, u64 wallclock, u64 irqtime) { }
void __weak fixup_busy_time(struct task_struct *p, int new_cpu) { }
void __weak init_new_task_load(struct task_struct *p) { }
void __weak mark_task_starting(struct task_struct *p) { }
void __weak set_window_start(struct rq *rq) { }
bool __weak do_pl_notif(struct rq *rq) { return false; }
void __weak walt_sched_account_irqstart(int cpu, struct task_struct *curr) { }
void __weak walt_sched_account_irqend(int cpu, struct task_struct *curr,
u64 delta)
{
}
void __weak update_cluster_topology(void) { }
void __weak init_clusters(void) { }
void __weak walt_sched_init_rq(struct rq *rq) { }
void __weak walt_update_cluster_topology(void) { }
void __weak walt_task_dead(struct task_struct *p) { }
#if defined(CONFIG_UCLAMP_TASK_GROUP)
void __weak walt_init_sched_boost(struct task_group *tg) { }
#endif

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SCHED_WALT) += walt.o boost.o sched_avg.o qc_vas.o core_ctl.o trace.o
obj-$(CONFIG_CPU_FREQ) += cpu-boost.o

318
kernel/sched/walt/boost.c Normal file
View File

@ -0,0 +1,318 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
*/
#include "qc_vas.h"
#include <linux/of.h>
#include <linux/sched/core_ctl.h>
#include <trace/events/sched.h>
/*
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
* with higher capacity than those where a task would have normally
* ended up with their load characteristics. Any entity enabling
* boost is responsible for disabling it as well.
*/
unsigned int sysctl_sched_boost; /* To/from userspace */
unsigned int sched_boost_type; /* currently activated sched boost */
enum sched_boost_policy boost_policy;
static enum sched_boost_policy boost_policy_dt = SCHED_BOOST_NONE;
static DEFINE_MUTEX(boost_mutex);
#if defined(CONFIG_UCLAMP_TASK_GROUP)
void walt_init_sched_boost(struct task_group *tg)
{
tg->wtg.sched_boost_no_override = false;
tg->wtg.sched_boost_enabled = true;
tg->wtg.colocate = false;
tg->wtg.colocate_update_disabled = false;
}
static void update_cgroup_boost_settings(void)
{
struct task_group *tg;
rcu_read_lock();
list_for_each_entry_rcu(tg, &task_groups, list) {
if (tg->wtg.sched_boost_no_override)
continue;
tg->wtg.sched_boost_enabled = false;
}
rcu_read_unlock();
}
static void restore_cgroup_boost_settings(void)
{
struct task_group *tg;
rcu_read_lock();
list_for_each_entry_rcu(tg, &task_groups, list)
tg->wtg.sched_boost_enabled = true;
rcu_read_unlock();
}
#else
static void update_cgroup_boost_settings(void) { }
static void restore_cgroup_boost_settings(void) { }
#endif
/*
* Scheduler boost type and boost policy might at first seem unrelated,
* however, there exists a connection between them that will allow us
* to use them interchangeably during placement decisions. We'll explain
* the connection here in one possible way so that the implications are
* clear when looking at placement policies.
*
* When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
* When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
* neither be none nor RESTRAINED.
*/
static void set_boost_policy(int type)
{
if (type == NO_BOOST || type == RESTRAINED_BOOST) {
boost_policy = SCHED_BOOST_NONE;
return;
}
if (boost_policy_dt) {
boost_policy = boost_policy_dt;
return;
}
if (hmp_capable()) {
boost_policy = SCHED_BOOST_ON_BIG;
return;
}
boost_policy = SCHED_BOOST_ON_ALL;
}
static bool verify_boost_params(int type)
{
return type >= RESTRAINED_BOOST_DISABLE && type <= RESTRAINED_BOOST;
}
static void sched_no_boost_nop(void)
{
}
static void sched_full_throttle_boost_enter(void)
{
core_ctl_set_boost(true);
walt_enable_frequency_aggregation(true);
}
static void sched_full_throttle_boost_exit(void)
{
core_ctl_set_boost(false);
walt_enable_frequency_aggregation(false);
}
static void sched_conservative_boost_enter(void)
{
update_cgroup_boost_settings();
}
static void sched_conservative_boost_exit(void)
{
restore_cgroup_boost_settings();
}
static void sched_restrained_boost_enter(void)
{
walt_enable_frequency_aggregation(true);
}
static void sched_restrained_boost_exit(void)
{
walt_enable_frequency_aggregation(false);
}
struct sched_boost_data {
int refcount;
void (*enter)(void);
void (*exit)(void);
};
static struct sched_boost_data sched_boosts[] = {
[NO_BOOST] = {
.refcount = 0,
.enter = sched_no_boost_nop,
.exit = sched_no_boost_nop,
},
[FULL_THROTTLE_BOOST] = {
.refcount = 0,
.enter = sched_full_throttle_boost_enter,
.exit = sched_full_throttle_boost_exit,
},
[CONSERVATIVE_BOOST] = {
.refcount = 0,
.enter = sched_conservative_boost_enter,
.exit = sched_conservative_boost_exit,
},
[RESTRAINED_BOOST] = {
.refcount = 0,
.enter = sched_restrained_boost_enter,
.exit = sched_restrained_boost_exit,
},
};
#define SCHED_BOOST_START FULL_THROTTLE_BOOST
#define SCHED_BOOST_END (RESTRAINED_BOOST + 1)
static int sched_effective_boost(void)
{
int i;
/*
* The boosts are sorted in descending order by
* priority.
*/
for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++) {
if (sched_boosts[i].refcount >= 1)
return i;
}
return NO_BOOST;
}
static void sched_boost_disable(int type)
{
struct sched_boost_data *sb = &sched_boosts[type];
int next_boost;
if (sb->refcount <= 0)
return;
sb->refcount--;
if (sb->refcount)
return;
/*
* This boost's refcount becomes zero, so it must
* be disabled. Disable it first and then apply
* the next boost.
*/
sb->exit();
next_boost = sched_effective_boost();
sched_boosts[next_boost].enter();
}
static void sched_boost_enable(int type)
{
struct sched_boost_data *sb = &sched_boosts[type];
int next_boost, prev_boost = sched_boost_type;
sb->refcount++;
if (sb->refcount != 1)
return;
/*
* This boost enable request did not come before.
* Take this new request and find the next boost
* by aggregating all the enabled boosts. If there
* is a change, disable the previous boost and enable
* the next boost.
*/
next_boost = sched_effective_boost();
if (next_boost == prev_boost)
return;
sched_boosts[prev_boost].exit();
sched_boosts[next_boost].enter();
}
static void sched_boost_disable_all(void)
{
int i;
for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++) {
if (sched_boosts[i].refcount > 0) {
sched_boosts[i].exit();
sched_boosts[i].refcount = 0;
}
}
}
static void _sched_set_boost(int type)
{
if (type == 0)
sched_boost_disable_all();
else if (type > 0)
sched_boost_enable(type);
else
sched_boost_disable(-type);
/*
* sysctl_sched_boost holds the boost request from
* user space which could be different from the
* effectively enabled boost. Update the effective
* boost here.
*/
sched_boost_type = sched_effective_boost();
sysctl_sched_boost = sched_boost_type;
set_boost_policy(sysctl_sched_boost);
trace_sched_set_boost(sysctl_sched_boost);
}
void sched_boost_parse_dt(void)
{
struct device_node *sn;
const char *boost_policy;
sn = of_find_node_by_path("/sched-hmp");
if (!sn)
return;
if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
if (!strcmp(boost_policy, "boost-on-big"))
boost_policy_dt = SCHED_BOOST_ON_BIG;
else if (!strcmp(boost_policy, "boost-on-all"))
boost_policy_dt = SCHED_BOOST_ON_ALL;
}
}
int sched_set_boost(int type)
{
int ret = 0;
mutex_lock(&boost_mutex);
if (verify_boost_params(type))
_sched_set_boost(type);
else
ret = -EINVAL;
mutex_unlock(&boost_mutex);
return ret;
}
int sched_boost_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
unsigned int *data = (unsigned int *)table->data;
mutex_lock(&boost_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
goto done;
if (verify_boost_params(*data))
_sched_set_boost(*data);
else
ret = -EINVAL;
done:
mutex_unlock(&boost_mutex);
return ret;
}

1354
kernel/sched/walt/core_ctl.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,389 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2013-2015,2017,2019-2021, The Linux Foundation. All rights reserved.
*/
#define pr_fmt(fmt) "cpu-boost: " fmt
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/input.h>
#include <linux/time.h>
#include <linux/sysfs.h>
#include <linux/pm_qos.h>
#include "qc_vas.h"
#define cpu_boost_attr_rw(_name) \
static struct kobj_attribute _name##_attr = \
__ATTR(_name, 0644, show_##_name, store_##_name)
#define show_one(file_name) \
static ssize_t show_##file_name \
(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \
{ \
return scnprintf(buf, PAGE_SIZE, "%u\n", file_name); \
}
#define store_one(file_name) \
static ssize_t store_##file_name \
(struct kobject *kobj, struct kobj_attribute *attr, \
const char *buf, size_t count) \
{ \
\
sscanf(buf, "%u", &file_name); \
return count; \
}
struct cpu_sync {
int cpu;
unsigned int input_boost_min;
unsigned int input_boost_freq;
};
static DEFINE_PER_CPU(struct cpu_sync, sync_info);
static struct workqueue_struct *cpu_boost_wq;
static struct work_struct input_boost_work;
static bool input_boost_enabled;
static unsigned int input_boost_ms = 40;
show_one(input_boost_ms);
store_one(input_boost_ms);
cpu_boost_attr_rw(input_boost_ms);
static unsigned int sched_boost_on_input;
show_one(sched_boost_on_input);
store_one(sched_boost_on_input);
cpu_boost_attr_rw(sched_boost_on_input);
static bool sched_boost_active;
static struct delayed_work input_boost_rem;
static u64 last_input_time;
#define MIN_INPUT_INTERVAL (150 * USEC_PER_MSEC)
static DEFINE_PER_CPU(struct freq_qos_request, qos_req);
static ssize_t store_input_boost_freq(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
int i, ntokens = 0;
unsigned int val, cpu;
const char *cp = buf;
bool enabled = false;
while ((cp = strpbrk(cp + 1, " :")))
ntokens++;
/* single number: apply to all CPUs */
if (!ntokens) {
if (sscanf(buf, "%u\n", &val) != 1)
return -EINVAL;
for_each_possible_cpu(i)
per_cpu(sync_info, i).input_boost_freq = val;
goto check_enable;
}
/* CPU:value pair */
if (!(ntokens % 2))
return -EINVAL;
cp = buf;
for (i = 0; i < ntokens; i += 2) {
if (sscanf(cp, "%u:%u", &cpu, &val) != 2)
return -EINVAL;
if (cpu >= num_possible_cpus())
return -EINVAL;
per_cpu(sync_info, cpu).input_boost_freq = val;
cp = strnchr(cp, PAGE_SIZE - (cp - buf), ' ');
cp++;
}
check_enable:
for_each_possible_cpu(i) {
if (per_cpu(sync_info, i).input_boost_freq) {
enabled = true;
break;
}
}
input_boost_enabled = enabled;
return count;
}
static ssize_t show_input_boost_freq(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
int cnt = 0, cpu;
struct cpu_sync *s;
for_each_possible_cpu(cpu) {
s = &per_cpu(sync_info, cpu);
cnt += snprintf(buf + cnt, PAGE_SIZE - cnt,
"%d:%u ", cpu, s->input_boost_freq);
}
cnt += snprintf(buf + cnt, PAGE_SIZE - cnt, "\n");
return cnt;
}
cpu_boost_attr_rw(input_boost_freq);
static void boost_adjust_notify(struct cpufreq_policy *policy)
{
unsigned int cpu = policy->cpu;
struct cpu_sync *s = &per_cpu(sync_info, cpu);
unsigned int ib_min = s->input_boost_min;
struct freq_qos_request *req = &per_cpu(qos_req, cpu);
int ret;
pr_debug("CPU%u policy min before boost: %u kHz\n",
cpu, policy->min);
pr_debug("CPU%u boost min: %u kHz\n", cpu, ib_min);
ret = freq_qos_update_request(req, ib_min);
if (ret < 0)
pr_err("Failed to update freq constraint in boost_adjust: %d\n",
ib_min);
pr_debug("CPU%u policy min after boost: %u kHz\n",
cpu, policy->min);
return;
}
static void update_policy_online(void)
{
unsigned int i;
struct cpufreq_policy *policy;
struct cpumask online_cpus;
/* Re-evaluate policy to trigger adjust notifier for online CPUs */
get_online_cpus();
online_cpus = *cpu_online_mask;
for_each_cpu(i, &online_cpus) {
policy = cpufreq_cpu_get(i);
if (!policy) {
pr_err("%s: cpufreq policy not found for cpu%d\n",
__func__, i);
return;
}
cpumask_andnot(&online_cpus, &online_cpus,
policy->related_cpus);
boost_adjust_notify(policy);
}
put_online_cpus();
}
static void do_input_boost_rem(struct work_struct *work)
{
unsigned int i, ret;
struct cpu_sync *i_sync_info;
/* Reset the input_boost_min for all CPUs in the system */
pr_debug("Resetting input boost min for all CPUs\n");
for_each_possible_cpu(i) {
i_sync_info = &per_cpu(sync_info, i);
i_sync_info->input_boost_min = 0;
}
/* Update policies for all online CPUs */
update_policy_online();
if (sched_boost_active) {
ret = sched_set_boost(0);
if (ret)
pr_err("cpu-boost: sched boost disable failed\n");
sched_boost_active = false;
}
}
static void do_input_boost(struct work_struct *work)
{
unsigned int i, ret;
struct cpu_sync *i_sync_info;
cancel_delayed_work_sync(&input_boost_rem);
if (sched_boost_active) {
sched_set_boost(0);
sched_boost_active = false;
}
/* Set the input_boost_min for all CPUs in the system */
pr_debug("Setting input boost min for all CPUs\n");
for_each_possible_cpu(i) {
i_sync_info = &per_cpu(sync_info, i);
i_sync_info->input_boost_min = i_sync_info->input_boost_freq;
}
/* Update policies for all online CPUs */
update_policy_online();
/* Enable scheduler boost to migrate tasks to big cluster */
if (sched_boost_on_input > 0) {
ret = sched_set_boost(sched_boost_on_input);
if (ret)
pr_err("cpu-boost: sched boost enable failed\n");
else
sched_boost_active = true;
}
queue_delayed_work(cpu_boost_wq, &input_boost_rem,
msecs_to_jiffies(input_boost_ms));
}
static void cpuboost_input_event(struct input_handle *handle,
unsigned int type, unsigned int code, int value)
{
u64 now;
if (!input_boost_enabled)
return;
now = ktime_to_us(ktime_get());
if (now - last_input_time < MIN_INPUT_INTERVAL)
return;
if (work_pending(&input_boost_work))
return;
queue_work(cpu_boost_wq, &input_boost_work);
last_input_time = ktime_to_us(ktime_get());
}
static int cpuboost_input_connect(struct input_handler *handler,
struct input_dev *dev, const struct input_device_id *id)
{
struct input_handle *handle;
int error;
handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
if (!handle)
return -ENOMEM;
handle->dev = dev;
handle->handler = handler;
handle->name = "cpufreq";
error = input_register_handle(handle);
if (error)
goto err2;
error = input_open_device(handle);
if (error)
goto err1;
return 0;
err1:
input_unregister_handle(handle);
err2:
kfree(handle);
return error;
}
static void cpuboost_input_disconnect(struct input_handle *handle)
{
input_close_device(handle);
input_unregister_handle(handle);
kfree(handle);
}
static const struct input_device_id cpuboost_ids[] = {
/* multi-touch touchscreen */
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT_MASK(EV_ABS) },
.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
BIT_MASK(ABS_MT_POSITION_X) |
BIT_MASK(ABS_MT_POSITION_Y) },
},
/* touchpad */
{
.flags = INPUT_DEVICE_ID_MATCH_KEYBIT |
INPUT_DEVICE_ID_MATCH_ABSBIT,
.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
.absbit = { [BIT_WORD(ABS_X)] =
BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) },
},
/* Keypad */
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
.evbit = { BIT_MASK(EV_KEY) },
},
{ },
};
static struct input_handler cpuboost_input_handler = {
.event = cpuboost_input_event,
.connect = cpuboost_input_connect,
.disconnect = cpuboost_input_disconnect,
.name = "cpu-boost",
.id_table = cpuboost_ids,
};
struct kobject *cpu_boost_kobj;
static int cpu_boost_init(void)
{
int cpu, ret;
struct cpu_sync *s;
struct cpufreq_policy *policy;
struct freq_qos_request *req;
cpu_boost_wq = alloc_workqueue("cpuboost_wq", WQ_HIGHPRI, 0);
if (!cpu_boost_wq)
return -EFAULT;
INIT_WORK(&input_boost_work, do_input_boost);
INIT_DELAYED_WORK(&input_boost_rem, do_input_boost_rem);
for_each_possible_cpu(cpu) {
s = &per_cpu(sync_info, cpu);
s->cpu = cpu;
req = &per_cpu(qos_req, cpu);
policy = cpufreq_cpu_get(cpu);
if (!policy) {
pr_err("%s: cpufreq policy not found for cpu%d\n",
__func__, cpu);
return -ESRCH;
}
ret = freq_qos_add_request(&policy->constraints, req,
FREQ_QOS_MIN, policy->min);
if (ret < 0) {
pr_err("%s: Failed to add freq constraint (%d)\n",
__func__, ret);
return ret;
}
}
cpu_boost_kobj = kobject_create_and_add("cpu_boost",
&cpu_subsys.dev_root->kobj);
if (!cpu_boost_kobj)
pr_err("Failed to initialize sysfs node for cpu_boost.\n");
ret = sysfs_create_file(cpu_boost_kobj, &input_boost_ms_attr.attr);
if (ret)
pr_err("Failed to create input_boost_ms node: %d\n", ret);
ret = sysfs_create_file(cpu_boost_kobj, &input_boost_freq_attr.attr);
if (ret)
pr_err("Failed to create input_boost_freq node: %d\n", ret);
ret = sysfs_create_file(cpu_boost_kobj,
&sched_boost_on_input_attr.attr);
if (ret)
pr_err("Failed to create sched_boost_on_input node: %d\n", ret);
ret = input_register_handler(&cpuboost_input_handler);
return 0;
}
late_initcall(cpu_boost_init);

744
kernel/sched/walt/qc_vas.c Normal file
View File

@ -0,0 +1,744 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
*/
#include <linux/irq.h>
#include <linux/delay.h>
#include <trace/events/sched.h>
#include "qc_vas.h"
#ifdef CONFIG_SCHED_WALT
/* 1ms default for 20ms window size scaled to 1024 */
unsigned int sysctl_sched_min_task_util_for_boost = 51;
/* 0.68ms default for 20ms window size scaled to 1024 */
unsigned int sysctl_sched_min_task_util_for_colocation = 35;
int
kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu)
{
unsigned long flags;
int rc = 0;
/* Invoke active balance to force migrate currently running task */
raw_spin_lock_irqsave(&rq->lock, flags);
if (!rq->active_balance) {
rq->active_balance = 1;
rq->push_cpu = new_cpu;
get_task_struct(p);
rq->wrq.push_task = p;
rc = 1;
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
return rc;
}
struct walt_rotate_work {
struct work_struct w;
struct task_struct *src_task;
struct task_struct *dst_task;
int src_cpu;
int dst_cpu;
};
DEFINE_PER_CPU(struct walt_rotate_work, walt_rotate_works);
void walt_rotate_work_func(struct work_struct *work)
{
struct walt_rotate_work *wr = container_of(work,
struct walt_rotate_work, w);
migrate_swap(wr->src_task, wr->dst_task, wr->dst_cpu, wr->src_cpu);
put_task_struct(wr->src_task);
put_task_struct(wr->dst_task);
clear_reserved(wr->src_cpu);
clear_reserved(wr->dst_cpu);
}
void walt_rotate_work_init(void)
{
int i;
for_each_possible_cpu(i) {
struct walt_rotate_work *wr = &per_cpu(walt_rotate_works, i);
INIT_WORK(&wr->w, walt_rotate_work_func);
}
}
#define WALT_ROTATION_THRESHOLD_NS 16000000
void walt_check_for_rotation(struct rq *src_rq)
{
u64 wc, wait, max_wait = 0, run, max_run = 0;
int deserved_cpu = nr_cpu_ids, dst_cpu = nr_cpu_ids;
int i, src_cpu = cpu_of(src_rq);
struct rq *dst_rq;
struct walt_rotate_work *wr = NULL;
if (!walt_rotation_enabled)
return;
if (!is_min_capacity_cpu(src_cpu))
return;
wc = sched_ktime_clock();
for_each_possible_cpu(i) {
struct rq *rq = cpu_rq(i);
if (!is_min_capacity_cpu(i))
break;
if (is_reserved(i))
continue;
if (!rq->misfit_task_load || rq->curr->sched_class !=
&fair_sched_class)
continue;
wait = wc - rq->curr->wts.last_enqueued_ts;
if (wait > max_wait) {
max_wait = wait;
deserved_cpu = i;
}
}
if (deserved_cpu != src_cpu)
return;
for_each_possible_cpu(i) {
struct rq *rq = cpu_rq(i);
if (is_min_capacity_cpu(i))
continue;
if (is_reserved(i))
continue;
if (rq->curr->sched_class != &fair_sched_class)
continue;
if (rq->nr_running > 1)
continue;
run = wc - rq->curr->wts.last_enqueued_ts;
if (run < WALT_ROTATION_THRESHOLD_NS)
continue;
if (run > max_run) {
max_run = run;
dst_cpu = i;
}
}
if (dst_cpu == nr_cpu_ids)
return;
dst_rq = cpu_rq(dst_cpu);
double_rq_lock(src_rq, dst_rq);
if (dst_rq->curr->sched_class == &fair_sched_class) {
get_task_struct(src_rq->curr);
get_task_struct(dst_rq->curr);
mark_reserved(src_cpu);
mark_reserved(dst_cpu);
wr = &per_cpu(walt_rotate_works, src_cpu);
wr->src_task = src_rq->curr;
wr->dst_task = dst_rq->curr;
wr->src_cpu = src_cpu;
wr->dst_cpu = dst_cpu;
}
double_rq_unlock(src_rq, dst_rq);
if (wr)
queue_work_on(src_cpu, system_highpri_wq, &wr->w);
}
DEFINE_RAW_SPINLOCK(migration_lock);
void check_for_migration(struct rq *rq, struct task_struct *p)
{
int active_balance;
int new_cpu = -1;
int prev_cpu = task_cpu(p);
int ret;
if (rq->misfit_task_load) {
if (rq->curr->state != TASK_RUNNING ||
rq->curr->nr_cpus_allowed == 1)
return;
if (walt_rotation_enabled) {
raw_spin_lock(&migration_lock);
walt_check_for_rotation(rq);
raw_spin_unlock(&migration_lock);
return;
}
raw_spin_lock(&migration_lock);
rcu_read_lock();
new_cpu = find_energy_efficient_cpu(p, prev_cpu, 0, 1);
rcu_read_unlock();
if ((new_cpu >= 0) && (new_cpu != prev_cpu) &&
(capacity_orig_of(new_cpu) > capacity_orig_of(prev_cpu))) {
active_balance = kick_active_balance(rq, p, new_cpu);
if (active_balance) {
mark_reserved(new_cpu);
raw_spin_unlock(&migration_lock);
ret = stop_one_cpu_nowait(prev_cpu,
active_load_balance_cpu_stop, rq,
&rq->active_balance_work);
if (!ret)
clear_reserved(new_cpu);
else
wake_up_if_idle(new_cpu);
return;
}
}
raw_spin_unlock(&migration_lock);
}
}
int sched_init_task_load_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
struct task_struct *p;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
seq_printf(m, "%d\n", sched_get_init_task_load(p));
put_task_struct(p);
return 0;
}
ssize_t
sched_init_task_load_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
struct inode *inode = file_inode(file);
struct task_struct *p;
char buffer[PROC_NUMBUF];
int init_task_load, err;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count)) {
err = -EFAULT;
goto out;
}
err = kstrtoint(strstrip(buffer), 0, &init_task_load);
if (err)
goto out;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
err = sched_set_init_task_load(p, init_task_load);
put_task_struct(p);
out:
return err < 0 ? err : count;
}
int sched_init_task_load_open(struct inode *inode, struct file *filp)
{
return single_open(filp, sched_init_task_load_show, inode);
}
int sched_group_id_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
struct task_struct *p;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
seq_printf(m, "%d\n", sched_get_group_id(p));
put_task_struct(p);
return 0;
}
ssize_t
sched_group_id_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
struct inode *inode = file_inode(file);
struct task_struct *p;
char buffer[PROC_NUMBUF];
int group_id, err;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count)) {
err = -EFAULT;
goto out;
}
err = kstrtoint(strstrip(buffer), 0, &group_id);
if (err)
goto out;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
err = sched_set_group_id(p, group_id);
put_task_struct(p);
out:
return err < 0 ? err : count;
}
int sched_group_id_open(struct inode *inode, struct file *filp)
{
return single_open(filp, sched_group_id_show, inode);
}
#ifdef CONFIG_SMP
/*
* Print out various scheduling related per-task fields:
*/
int sched_wake_up_idle_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
struct task_struct *p;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
seq_printf(m, "%d\n", sched_get_wake_up_idle(p));
put_task_struct(p);
return 0;
}
ssize_t
sched_wake_up_idle_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
struct inode *inode = file_inode(file);
struct task_struct *p;
char buffer[PROC_NUMBUF];
int wake_up_idle, err;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count)) {
err = -EFAULT;
goto out;
}
err = kstrtoint(strstrip(buffer), 0, &wake_up_idle);
if (err)
goto out;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
err = sched_set_wake_up_idle(p, wake_up_idle);
put_task_struct(p);
out:
return err < 0 ? err : count;
}
int sched_wake_up_idle_open(struct inode *inode, struct file *filp)
{
return single_open(filp, sched_wake_up_idle_show, inode);
}
int group_balance_cpu_not_isolated(struct sched_group *sg)
{
cpumask_t cpus;
cpumask_and(&cpus, sched_group_span(sg), group_balance_mask(sg));
cpumask_andnot(&cpus, &cpus, cpu_isolated_mask);
return cpumask_first(&cpus);
}
#endif /* CONFIG_SMP */
#ifdef CONFIG_PROC_SYSCTL
static void sched_update_updown_migrate_values(bool up)
{
int i = 0, cpu;
struct walt_sched_cluster *cluster;
int cap_margin_levels = num_sched_clusters - 1;
if (cap_margin_levels > 1) {
/*
* No need to worry about CPUs in last cluster
* if there are more than 2 clusters in the system
*/
for_each_sched_cluster(cluster) {
for_each_cpu(cpu, &cluster->cpus) {
if (up)
sched_capacity_margin_up[cpu] =
sysctl_sched_capacity_margin_up[i];
else
sched_capacity_margin_down[cpu] =
sysctl_sched_capacity_margin_down[i];
}
if (++i >= cap_margin_levels)
break;
}
} else {
for_each_possible_cpu(cpu) {
if (up)
sched_capacity_margin_up[cpu] =
sysctl_sched_capacity_margin_up[0];
else
sched_capacity_margin_down[cpu] =
sysctl_sched_capacity_margin_down[0];
}
}
}
int sched_updown_migrate_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret, i;
unsigned int *data = (unsigned int *)table->data;
unsigned int *old_val;
static DEFINE_MUTEX(mutex);
int cap_margin_levels = num_sched_clusters ? num_sched_clusters - 1 : 0;
if (cap_margin_levels <= 0)
return -EINVAL;
mutex_lock(&mutex);
if (table->maxlen != (sizeof(unsigned int) * cap_margin_levels))
table->maxlen = sizeof(unsigned int) * cap_margin_levels;
if (!write) {
ret = proc_douintvec_capacity(table, write, buffer, lenp, ppos);
goto unlock_mutex;
}
/*
* Cache the old values so that they can be restored
* if either the write fails (for example out of range values)
* or the downmigrate and upmigrate are not in sync.
*/
old_val = kzalloc(table->maxlen, GFP_KERNEL);
if (!old_val) {
ret = -ENOMEM;
goto unlock_mutex;
}
memcpy(old_val, data, table->maxlen);
ret = proc_douintvec_capacity(table, write, buffer, lenp, ppos);
if (ret) {
memcpy(data, old_val, table->maxlen);
goto free_old_val;
}
for (i = 0; i < cap_margin_levels; i++) {
if (sysctl_sched_capacity_margin_up[i] >
sysctl_sched_capacity_margin_down[i]) {
memcpy(data, old_val, table->maxlen);
ret = -EINVAL;
goto free_old_val;
}
}
sched_update_updown_migrate_values(data ==
&sysctl_sched_capacity_margin_up[0]);
free_old_val:
kfree(old_val);
unlock_mutex:
mutex_unlock(&mutex);
return ret;
}
#endif /* CONFIG_PROC_SYSCTL */
int sched_isolate_count(const cpumask_t *mask, bool include_offline)
{
cpumask_t count_mask = CPU_MASK_NONE;
if (include_offline) {
cpumask_complement(&count_mask, cpu_online_mask);
cpumask_or(&count_mask, &count_mask, cpu_isolated_mask);
cpumask_and(&count_mask, &count_mask, mask);
} else {
cpumask_and(&count_mask, mask, cpu_isolated_mask);
}
return cpumask_weight(&count_mask);
}
#ifdef CONFIG_HOTPLUG_CPU
static int do_isolation_work_cpu_stop(void *data)
{
unsigned int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
local_irq_disable();
irq_migrate_all_off_this_cpu();
sched_ttwu_pending();
/* Update our root-domain */
rq_lock(rq, &rf);
/*
* Temporarily mark the rq as offline. This will allow us to
* move tasks off the CPU.
*/
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
migrate_tasks(rq, &rf, false);
if (rq->rd)
set_rq_online(rq);
rq_unlock(rq, &rf);
clear_walt_request(cpu);
local_irq_enable();
return 0;
}
static int do_unisolation_work_cpu_stop(void *data)
{
watchdog_enable(smp_processor_id());
return 0;
}
static void sched_update_group_capacities(int cpu)
{
struct sched_domain *sd;
mutex_lock(&sched_domains_mutex);
rcu_read_lock();
for_each_domain(cpu, sd) {
int balance_cpu = group_balance_cpu(sd->groups);
init_sched_groups_capacity(cpu, sd);
/*
* Need to ensure this is also called with balancing
* cpu.
*/
if (cpu != balance_cpu)
init_sched_groups_capacity(balance_cpu, sd);
}
rcu_read_unlock();
mutex_unlock(&sched_domains_mutex);
}
static unsigned int cpu_isolation_vote[NR_CPUS];
/*
* 1) CPU is isolated and cpu is offlined:
* Unisolate the core.
* 2) CPU is not isolated and CPU is offlined:
* No action taken.
* 3) CPU is offline and request to isolate
* Request ignored.
* 4) CPU is offline and isolated:
* Not a possible state.
* 5) CPU is online and request to isolate
* Normal case: Isolate the CPU
* 6) CPU is not isolated and comes back online
* Nothing to do
*
* Note: The client calling sched_isolate_cpu() is repsonsible for ONLY
* calling sched_unisolate_cpu() on a CPU that the client previously isolated.
* Client is also responsible for unisolating when a core goes offline
* (after CPU is marked offline).
*/
int sched_isolate_cpu(int cpu)
{
struct rq *rq;
cpumask_t avail_cpus;
int ret_code = 0;
u64 start_time = 0;
if (trace_sched_isolate_enabled())
start_time = sched_clock();
cpu_maps_update_begin();
cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_possible(cpu) ||
!cpu_online(cpu) || cpu >= NR_CPUS) {
ret_code = -EINVAL;
goto out;
}
rq = cpu_rq(cpu);
if (++cpu_isolation_vote[cpu] > 1)
goto out;
/* We cannot isolate ALL cpus in the system */
if (cpumask_weight(&avail_cpus) == 1) {
--cpu_isolation_vote[cpu];
ret_code = -EINVAL;
goto out;
}
/*
* There is a race between watchdog being enabled by hotplug and
* core isolation disabling the watchdog. When a CPU is hotplugged in
* and the hotplug lock has been released the watchdog thread might
* not have run yet to enable the watchdog.
* We have to wait for the watchdog to be enabled before proceeding.
*/
if (!watchdog_configured(cpu)) {
msleep(20);
if (!watchdog_configured(cpu)) {
--cpu_isolation_vote[cpu];
ret_code = -EBUSY;
goto out;
}
}
set_cpu_isolated(cpu, true);
cpumask_clear_cpu(cpu, &avail_cpus);
/* Migrate timers */
smp_call_function_any(&avail_cpus, hrtimer_quiesce_cpu, &cpu, 1);
smp_call_function_any(&avail_cpus, timer_quiesce_cpu, &cpu, 1);
watchdog_disable(cpu);
irq_lock_sparse();
stop_cpus(cpumask_of(cpu), do_isolation_work_cpu_stop, 0);
irq_unlock_sparse();
calc_load_migrate(rq);
update_max_interval();
sched_update_group_capacities(cpu);
out:
cpu_maps_update_done();
trace_sched_isolate(cpu, cpumask_bits(cpu_isolated_mask)[0],
start_time, 1);
return ret_code;
}
/*
* Note: The client calling sched_isolate_cpu() is repsonsible for ONLY
* calling sched_unisolate_cpu() on a CPU that the client previously isolated.
* Client is also responsible for unisolating when a core goes offline
* (after CPU is marked offline).
*/
int sched_unisolate_cpu_unlocked(int cpu)
{
int ret_code = 0;
u64 start_time = 0;
if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_possible(cpu)
|| cpu >= NR_CPUS) {
ret_code = -EINVAL;
goto out;
}
if (trace_sched_isolate_enabled())
start_time = sched_clock();
if (!cpu_isolation_vote[cpu]) {
ret_code = -EINVAL;
goto out;
}
if (--cpu_isolation_vote[cpu])
goto out;
set_cpu_isolated(cpu, false);
update_max_interval();
sched_update_group_capacities(cpu);
if (cpu_online(cpu)) {
stop_cpus(cpumask_of(cpu), do_unisolation_work_cpu_stop, 0);
/* Kick CPU to immediately do load balancing */
if (!atomic_fetch_or(NOHZ_KICK_MASK, nohz_flags(cpu)))
smp_send_reschedule(cpu);
}
out:
trace_sched_isolate(cpu, cpumask_bits(cpu_isolated_mask)[0],
start_time, 0);
return ret_code;
}
int sched_unisolate_cpu(int cpu)
{
int ret_code;
cpu_maps_update_begin();
ret_code = sched_unisolate_cpu_unlocked(cpu);
cpu_maps_update_done();
return ret_code;
}
/*
* Remove a task from the runqueue and pretend that it's migrating. This
* should prevent migrations for the detached task and disallow further
* changes to tsk_cpus_allowed.
*/
void
detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks)
{
lockdep_assert_held(&rq->lock);
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(rq, p, 0);
list_add(&p->se.group_node, tasks);
}
void attach_tasks_core(struct list_head *tasks, struct rq *rq)
{
struct task_struct *p;
lockdep_assert_held(&rq->lock);
while (!list_empty(tasks)) {
p = list_first_entry(tasks, struct task_struct, se.group_node);
list_del_init(&p->se.group_node);
BUG_ON(task_rq(p) != rq);
activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
}
}
#endif /* CONFIG_HOTPLUG_CPU */
#endif /* CONFIG_SCHED_WALT */

View File

@ -0,0 +1,81 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
*/
#include "../sched.h"
#include "../../../fs/proc/internal.h"
#include "walt.h"
#include "trace.h"
#ifdef CONFIG_SCHED_WALT
#ifdef CONFIG_HZ_300
/*
* Tick interval becomes to 3333333 due to
* rounding error when HZ=300.
*/
#define DEFAULT_SCHED_RAVG_WINDOW (3333333 * 5)
#else
/* Min window size (in ns) = 16ms */
#define DEFAULT_SCHED_RAVG_WINDOW 16000000
#endif
/* Max window size (in ns) = 1s */
#define MAX_SCHED_RAVG_WINDOW 1000000000
#define NR_WINDOWS_PER_SEC (NSEC_PER_SEC / DEFAULT_SCHED_RAVG_WINDOW)
extern int num_sched_clusters;
extern unsigned int walt_big_tasks(int cpu);
extern void reset_task_stats(struct task_struct *p);
extern void walt_rotate_work_init(void);
extern void walt_rotation_checkpoint(int nr_big);
extern void walt_fill_ta_data(struct core_ctl_notif_data *data);
extern int sched_set_group_id(struct task_struct *p, unsigned int group_id);
extern unsigned int sched_get_group_id(struct task_struct *p);
extern int sched_set_init_task_load(struct task_struct *p, int init_load_pct);
extern u32 sched_get_init_task_load(struct task_struct *p);
extern void core_ctl_check(u64 wallclock);
extern int sched_set_boost(int enable);
extern int sched_isolate_count(const cpumask_t *mask, bool include_offline);
extern struct list_head cluster_head;
#define for_each_sched_cluster(cluster) \
list_for_each_entry_rcu(cluster, &cluster_head, list)
static inline u32 cpu_cycles_to_freq(u64 cycles, u64 period)
{
return div64_u64(cycles, period);
}
static inline unsigned int sched_cpu_legacy_freq(int cpu)
{
unsigned long curr_cap = arch_scale_freq_capacity(cpu);
return (curr_cap * (u64) cpu_rq(cpu)->wrq.cluster->max_possible_freq) >>
SCHED_CAPACITY_SHIFT;
}
extern __read_mostly bool sched_freq_aggr_en;
static inline void walt_enable_frequency_aggregation(bool enable)
{
sched_freq_aggr_en = enable;
}
#ifndef CONFIG_IRQ_TIME_ACCOUNTING
static inline u64 irq_time_read(int cpu) { return 0; }
#endif
#else
static inline unsigned int walt_big_tasks(int cpu)
{
return 0;
}
static inline int sched_set_boost(int enable)
{
return -EINVAL;
}
#endif

View File

@ -0,0 +1,260 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012, 2015-2021, The Linux Foundation. All rights reserved.
*/
/*
* Scheduler hook for average runqueue determination
*/
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
#include <linux/sched.h>
#include <linux/math64.h>
#include "qc_vas.h"
#include <trace/events/sched.h>
static DEFINE_PER_CPU(u64, nr_prod_sum);
static DEFINE_PER_CPU(u64, last_time);
static DEFINE_PER_CPU(u64, nr_big_prod_sum);
static DEFINE_PER_CPU(u64, nr);
static DEFINE_PER_CPU(u64, nr_max);
static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
static s64 last_get_time;
unsigned int sysctl_sched_busy_hyst_enable_cpus;
unsigned int sysctl_sched_busy_hyst;
unsigned int sysctl_sched_coloc_busy_hyst_enable_cpus = 112;
unsigned int sysctl_sched_coloc_busy_hyst_cpu[NR_CPUS] = {
[0 ... NR_CPUS-1] = 39000000 };
unsigned int sysctl_sched_coloc_busy_hyst_max_ms = 5000;
unsigned int sysctl_sched_coloc_busy_hyst_cpu_busy_pct[NR_CPUS] = {
[0 ... NR_CPUS-1] = 10 };
static DEFINE_PER_CPU(atomic64_t, busy_hyst_end_time) = ATOMIC64_INIT(0);
static DEFINE_PER_CPU(u64, hyst_time);
static DEFINE_PER_CPU(u64, coloc_hyst_busy);
static DEFINE_PER_CPU(u64, coloc_hyst_time);
#define NR_THRESHOLD_PCT 15
#define MAX_RTGB_TIME (sysctl_sched_coloc_busy_hyst_max_ms * NSEC_PER_MSEC)
/**
* sched_get_nr_running_avg
* @return: Average nr_running, iowait and nr_big_tasks value since last poll.
* Returns the avg * 100 to return up to two decimal points
* of accuracy.
*
* Obtains the average nr_running value since the last poll.
* This function may not be called concurrently with itself
*/
void sched_get_nr_running_avg(struct sched_avg_stats *stats)
{
int cpu;
u64 curr_time = sched_clock();
u64 period = curr_time - last_get_time;
u64 tmp_nr, tmp_misfit;
bool any_hyst_time = false;
if (!period)
return;
/* read and reset nr_running counts */
for_each_possible_cpu(cpu) {
unsigned long flags;
u64 diff;
spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
curr_time = sched_clock();
diff = curr_time - per_cpu(last_time, cpu);
BUG_ON((s64)diff < 0);
tmp_nr = per_cpu(nr_prod_sum, cpu);
tmp_nr += per_cpu(nr, cpu) * diff;
tmp_nr = div64_u64((tmp_nr * 100), period);
tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
tmp_misfit += walt_big_tasks(cpu) * diff;
tmp_misfit = div64_u64((tmp_misfit * 100), period);
/*
* NR_THRESHOLD_PCT is to make sure that the task ran
* at least 85% in the last window to compensate any
* over estimating being done.
*/
stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
100);
stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
NR_THRESHOLD_PCT), 100);
stats[cpu].nr_max = per_cpu(nr_max, cpu);
stats[cpu].nr_scaled = tmp_nr;
trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
stats[cpu].nr_misfit, stats[cpu].nr_max,
stats[cpu].nr_scaled);
per_cpu(last_time, cpu) = curr_time;
per_cpu(nr_prod_sum, cpu) = 0;
per_cpu(nr_big_prod_sum, cpu) = 0;
per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
}
for_each_possible_cpu(cpu) {
if (per_cpu(coloc_hyst_time, cpu)) {
any_hyst_time = true;
break;
}
}
if (any_hyst_time && get_rtgb_active_time() >= MAX_RTGB_TIME)
sched_update_hyst_times();
last_get_time = curr_time;
}
EXPORT_SYMBOL(sched_get_nr_running_avg);
void sched_update_hyst_times(void)
{
bool rtgb_active;
int cpu;
unsigned long cpu_cap, coloc_busy_pct;
rtgb_active = is_rtgb_active() && (sched_boost() != CONSERVATIVE_BOOST)
&& (get_rtgb_active_time() < MAX_RTGB_TIME);
for_each_possible_cpu(cpu) {
cpu_cap = arch_scale_cpu_capacity(cpu);
coloc_busy_pct = sysctl_sched_coloc_busy_hyst_cpu_busy_pct[cpu];
per_cpu(hyst_time, cpu) = (BIT(cpu)
& sysctl_sched_busy_hyst_enable_cpus) ?
sysctl_sched_busy_hyst : 0;
per_cpu(coloc_hyst_time, cpu) = ((BIT(cpu)
& sysctl_sched_coloc_busy_hyst_enable_cpus)
&& rtgb_active) ?
sysctl_sched_coloc_busy_hyst_cpu[cpu] : 0;
per_cpu(coloc_hyst_busy, cpu) = mult_frac(cpu_cap,
coloc_busy_pct, 100);
}
}
#define BUSY_NR_RUN 3
#define BUSY_LOAD_FACTOR 10
static inline void update_busy_hyst_end_time(int cpu, bool dequeue,
unsigned long prev_nr_run, u64 curr_time)
{
bool nr_run_trigger = false;
bool load_trigger = false, coloc_load_trigger = false;
u64 agg_hyst_time;
if (!per_cpu(hyst_time, cpu) && !per_cpu(coloc_hyst_time, cpu))
return;
if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN)
nr_run_trigger = true;
if (dequeue && (cpu_util(cpu) * BUSY_LOAD_FACTOR) >
capacity_orig_of(cpu))
load_trigger = true;
if (dequeue && cpu_util(cpu) > per_cpu(coloc_hyst_busy, cpu))
coloc_load_trigger = true;
agg_hyst_time = max((nr_run_trigger || load_trigger) ?
per_cpu(hyst_time, cpu) : 0,
(nr_run_trigger || coloc_load_trigger) ?
per_cpu(coloc_hyst_time, cpu) : 0);
if (agg_hyst_time)
atomic64_set(&per_cpu(busy_hyst_end_time, cpu),
curr_time + agg_hyst_time);
}
int sched_busy_hyst_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
if (table->maxlen > (sizeof(unsigned int) * num_possible_cpus()))
table->maxlen = sizeof(unsigned int) * num_possible_cpus();
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
sched_update_hyst_times();
return ret;
}
/**
* sched_update_nr_prod
* @cpu: The core id of the nr running driver.
* @delta: Adjust nr by 'delta' amount
* @inc: Whether we are increasing or decreasing the count
* @return: N/A
*
* Update average with latest nr_running value for CPU
*/
void sched_update_nr_prod(int cpu, long delta, bool inc)
{
u64 diff;
u64 curr_time;
unsigned long flags, nr_running;
spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
nr_running = per_cpu(nr, cpu);
curr_time = sched_clock();
diff = curr_time - per_cpu(last_time, cpu);
BUG_ON((s64)diff < 0);
per_cpu(last_time, cpu) = curr_time;
per_cpu(nr, cpu) = nr_running + (inc ? delta : -delta);
BUG_ON((s64)per_cpu(nr, cpu) < 0);
if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
update_busy_hyst_end_time(cpu, !inc, nr_running, curr_time);
per_cpu(nr_prod_sum, cpu) += nr_running * diff;
per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
}
EXPORT_SYMBOL(sched_update_nr_prod);
/*
* Returns the CPU utilization % in the last window.
*
*/
unsigned int sched_get_cpu_util(int cpu)
{
struct rq *rq = cpu_rq(cpu);
u64 util;
unsigned long capacity, flags;
unsigned int busy;
raw_spin_lock_irqsave(&rq->lock, flags);
capacity = capacity_orig_of(cpu);
util = rq->wrq.prev_runnable_sum + rq->wrq.grp_time.prev_runnable_sum;
util = div64_u64(util, sched_ravg_window >> SCHED_CAPACITY_SHIFT);
raw_spin_unlock_irqrestore(&rq->lock, flags);
util = (util >= capacity) ? capacity : util;
busy = div64_ul((util * 100), capacity);
return busy;
}
u64 sched_lpm_disallowed_time(int cpu)
{
u64 now = sched_clock();
u64 bias_end_time = atomic64_read(&per_cpu(busy_hyst_end_time, cpu));
if (now < bias_end_time)
return bias_end_time - now;
return 0;
}

82
kernel/sched/walt/trace.c Normal file
View File

@ -0,0 +1,82 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
*/
#include "qc_vas.h"
#ifdef CONFIG_SCHED_WALT
static inline void __window_data(u32 *dst, u32 *src)
{
if (src)
memcpy(dst, src, nr_cpu_ids * sizeof(u32));
else
memset(dst, 0, nr_cpu_ids * sizeof(u32));
}
struct trace_seq;
const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
{
int i;
const char *ret = p->buffer + seq_buf_used(&p->seq);
for (i = 0; i < buf_len; i++)
trace_seq_printf(p, "%u ", buf[i]);
trace_seq_putc(p, 0);
return ret;
}
static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new)
{
if (curr)
if (new)
return rq->wrq.nt_curr_runnable_sum;
else
return rq->wrq.curr_runnable_sum;
else
if (new)
return rq->wrq.nt_prev_runnable_sum;
else
return rq->wrq.prev_runnable_sum;
}
static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new)
{
if (curr)
if (new)
return rq->wrq.grp_time.nt_curr_runnable_sum;
else
return rq->wrq.grp_time.curr_runnable_sum;
else
if (new)
return rq->wrq.grp_time.nt_prev_runnable_sum;
else
return rq->wrq.grp_time.prev_runnable_sum;
}
static inline s64
__get_update_sum(struct rq *rq, enum migrate_types migrate_type,
bool src, bool new, bool curr)
{
switch (migrate_type) {
case RQ_TO_GROUP:
if (src)
return __rq_update_sum(rq, curr, new);
else
return __grp_update_sum(rq, curr, new);
case GROUP_TO_RQ:
if (src)
return __grp_update_sum(rq, curr, new);
else
return __rq_update_sum(rq, curr, new);
default:
WARN_ON_ONCE(1);
return -1;
}
}
#endif
#define CREATE_TRACE_POINTS
#include "trace.h"

669
kernel/sched/walt/trace.h Normal file
View File

@ -0,0 +1,669 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM sched
#if !defined(_TRACE_WALT_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_WALT_H
#include <linux/tracepoint.h>
#ifdef CONFIG_SCHED_WALT
struct rq;
struct group_cpu_time;
extern const char __weak *task_event_names[];
TRACE_EVENT(sched_update_pred_demand,
TP_PROTO(struct task_struct *p, u32 runtime, int pct,
unsigned int pred_demand),
TP_ARGS(p, runtime, pct, pred_demand),
TP_STRUCT__entry(
__array(char, comm, TASK_COMM_LEN)
__field(pid_t, pid)
__field(unsigned int, runtime)
__field(int, pct)
__field(unsigned int, pred_demand)
__array(u8, bucket, NUM_BUSY_BUCKETS)
__field(int, cpu)
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->runtime = runtime;
__entry->pct = pct;
__entry->pred_demand = pred_demand;
memcpy(__entry->bucket, p->wts.busy_buckets,
NUM_BUSY_BUCKETS * sizeof(u8));
__entry->cpu = task_cpu(p);
),
TP_printk("%d (%s): runtime %u pct %d cpu %d pred_demand %u (buckets: %u %u %u %u %u %u %u %u %u %u)",
__entry->pid, __entry->comm,
__entry->runtime, __entry->pct, __entry->cpu,
__entry->pred_demand, __entry->bucket[0], __entry->bucket[1],
__entry->bucket[2], __entry->bucket[3], __entry->bucket[4],
__entry->bucket[5], __entry->bucket[6], __entry->bucket[7],
__entry->bucket[8], __entry->bucket[9])
);
TRACE_EVENT(sched_update_history,
TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples,
enum task_event evt),
TP_ARGS(rq, p, runtime, samples, evt),
TP_STRUCT__entry(
__array(char, comm, TASK_COMM_LEN)
__field(pid_t, pid)
__field(unsigned int, runtime)
__field(int, samples)
__field(enum task_event, evt)
__field(unsigned int, demand)
__field(unsigned int, coloc_demand)
__field(unsigned int, pred_demand)
__array(u32, hist, RAVG_HIST_SIZE_MAX)
__field(unsigned int, nr_big_tasks)
__field(int, cpu)
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->runtime = runtime;
__entry->samples = samples;
__entry->evt = evt;
__entry->demand = p->wts.demand;
__entry->coloc_demand = p->wts.coloc_demand;
__entry->pred_demand = p->wts.pred_demand;
memcpy(__entry->hist, p->wts.sum_history,
RAVG_HIST_SIZE_MAX * sizeof(u32));
__entry->nr_big_tasks = rq->wrq.walt_stats.nr_big_tasks;
__entry->cpu = rq->cpu;
),
TP_printk("%d (%s): runtime %u samples %d event %s demand %u coloc_demand %u pred_demand %u (hist: %u %u %u %u %u) cpu %d nr_big %u",
__entry->pid, __entry->comm,
__entry->runtime, __entry->samples,
task_event_names[__entry->evt],
__entry->demand, __entry->coloc_demand, __entry->pred_demand,
__entry->hist[0], __entry->hist[1],
__entry->hist[2], __entry->hist[3],
__entry->hist[4], __entry->cpu, __entry->nr_big_tasks)
);
TRACE_EVENT(sched_get_task_cpu_cycles,
TP_PROTO(int cpu, int event, u64 cycles,
u64 exec_time, struct task_struct *p),
TP_ARGS(cpu, event, cycles, exec_time, p),
TP_STRUCT__entry(
__field(int, cpu)
__field(int, event)
__field(u64, cycles)
__field(u64, exec_time)
__field(u32, freq)
__field(u32, legacy_freq)
__field(u32, max_freq)
__field(pid_t, pid)
__array(char, comm, TASK_COMM_LEN)
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->event = event;
__entry->cycles = cycles;
__entry->exec_time = exec_time;
__entry->freq = cpu_cycles_to_freq(cycles, exec_time);
__entry->legacy_freq = sched_cpu_legacy_freq(cpu);
__entry->max_freq = cpu_max_freq(cpu);
__entry->pid = p->pid;
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
),
TP_printk("cpu=%d event=%d cycles=%llu exec_time=%llu freq=%u legacy_freq=%u max_freq=%u task=%d (%s)",
__entry->cpu, __entry->event, __entry->cycles,
__entry->exec_time, __entry->freq, __entry->legacy_freq,
__entry->max_freq, __entry->pid, __entry->comm)
);
TRACE_EVENT(sched_update_task_ravg,
TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
u64 wallclock, u64 irqtime,
struct group_cpu_time *cpu_time),
TP_ARGS(p, rq, evt, wallclock, irqtime, cpu_time),
TP_STRUCT__entry(
__array(char, comm, TASK_COMM_LEN)
__field(pid_t, pid)
__field(pid_t, cur_pid)
__field(unsigned int, cur_freq)
__field(u64, wallclock)
__field(u64, mark_start)
__field(u64, delta_m)
__field(u64, win_start)
__field(u64, delta)
__field(u64, irqtime)
__field(enum task_event, evt)
__field(unsigned int, demand)
__field(unsigned int, coloc_demand)
__field(unsigned int, sum)
__field(int, cpu)
__field(unsigned int, pred_demand)
__field(u64, rq_cs)
__field(u64, rq_ps)
__field(u64, grp_cs)
__field(u64, grp_ps)
__field(u64, grp_nt_cs)
__field(u64, grp_nt_ps)
__field(u32, curr_window)
__field(u32, prev_window)
__dynamic_array(u32, curr_sum, nr_cpu_ids)
__dynamic_array(u32, prev_sum, nr_cpu_ids)
__field(u64, nt_cs)
__field(u64, nt_ps)
__field(u64, active_time)
__field(u32, curr_top)
__field(u32, prev_top)
),
TP_fast_assign(
__entry->wallclock = wallclock;
__entry->win_start = rq->wrq.window_start;
__entry->delta = (wallclock - rq->wrq.window_start);
__entry->evt = evt;
__entry->cpu = rq->cpu;
__entry->cur_pid = rq->curr->pid;
__entry->cur_freq = rq->wrq.task_exec_scale;
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->mark_start = p->wts.mark_start;
__entry->delta_m = (wallclock - p->wts.mark_start);
__entry->demand = p->wts.demand;
__entry->coloc_demand = p->wts.coloc_demand;
__entry->sum = p->wts.sum;
__entry->irqtime = irqtime;
__entry->pred_demand = p->wts.pred_demand;
__entry->rq_cs = rq->wrq.curr_runnable_sum;
__entry->rq_ps = rq->wrq.prev_runnable_sum;
__entry->grp_cs = cpu_time ? cpu_time->curr_runnable_sum : 0;
__entry->grp_ps = cpu_time ? cpu_time->prev_runnable_sum : 0;
__entry->grp_nt_cs = cpu_time ?
cpu_time->nt_curr_runnable_sum : 0;
__entry->grp_nt_ps = cpu_time ?
cpu_time->nt_prev_runnable_sum : 0;
__entry->curr_window = p->wts.curr_window;
__entry->prev_window = p->wts.prev_window;
__window_data(__get_dynamic_array(curr_sum),
p->wts.curr_window_cpu);
__window_data(__get_dynamic_array(prev_sum),
p->wts.prev_window_cpu);
__entry->nt_cs = rq->wrq.nt_curr_runnable_sum;
__entry->nt_ps = rq->wrq.nt_prev_runnable_sum;
__entry->active_time = p->wts.active_time;
__entry->curr_top = rq->wrq.curr_top;
__entry->prev_top = rq->wrq.prev_top;
),
TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u coloc_demand: %u sum %u irqtime %llu pred_demand %u rq_cs %llu rq_ps %llu cur_window %u (%s) prev_window %u (%s) nt_cs %llu nt_ps %llu active_time %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu curr_top %u prev_top %u",
__entry->wallclock, __entry->win_start, __entry->delta,
task_event_names[__entry->evt], __entry->cpu,
__entry->cur_freq, __entry->cur_pid,
__entry->pid, __entry->comm, __entry->mark_start,
__entry->delta_m, __entry->demand, __entry->coloc_demand,
__entry->sum, __entry->irqtime, __entry->pred_demand,
__entry->rq_cs, __entry->rq_ps, __entry->curr_window,
__window_print(p, __get_dynamic_array(curr_sum), nr_cpu_ids),
__entry->prev_window,
__window_print(p, __get_dynamic_array(prev_sum), nr_cpu_ids),
__entry->nt_cs, __entry->nt_ps,
__entry->active_time, __entry->grp_cs,
__entry->grp_ps, __entry->grp_nt_cs, __entry->grp_nt_ps,
__entry->curr_top, __entry->prev_top)
);
TRACE_EVENT(sched_update_task_ravg_mini,
TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
u64 wallclock, u64 irqtime,
struct group_cpu_time *cpu_time),
TP_ARGS(p, rq, evt, wallclock, irqtime, cpu_time),
TP_STRUCT__entry(
__array(char, comm, TASK_COMM_LEN)
__field(pid_t, pid)
__field(u64, wallclock)
__field(u64, mark_start)
__field(u64, delta_m)
__field(u64, win_start)
__field(u64, delta)
__field(enum task_event, evt)
__field(unsigned int, demand)
__field(int, cpu)
__field(u64, rq_cs)
__field(u64, rq_ps)
__field(u64, grp_cs)
__field(u64, grp_ps)
__field(u32, curr_window)
__field(u32, prev_window)
),
TP_fast_assign(
__entry->wallclock = wallclock;
__entry->win_start = rq->wrq.window_start;
__entry->delta = (wallclock - rq->wrq.window_start);
__entry->evt = evt;
__entry->cpu = rq->cpu;
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->mark_start = p->wts.mark_start;
__entry->delta_m = (wallclock - p->wts.mark_start);
__entry->demand = p->wts.demand;
__entry->rq_cs = rq->wrq.curr_runnable_sum;
__entry->rq_ps = rq->wrq.prev_runnable_sum;
__entry->grp_cs = cpu_time ? cpu_time->curr_runnable_sum : 0;
__entry->grp_ps = cpu_time ? cpu_time->prev_runnable_sum : 0;
__entry->curr_window = p->wts.curr_window;
__entry->prev_window = p->wts.prev_window;
),
TP_printk("wc %llu ws %llu delta %llu event %s cpu %d task %d (%s) ms %llu delta %llu demand %u rq_cs %llu rq_ps %llu cur_window %u prev_window %u grp_cs %lld grp_ps %lld",
__entry->wallclock, __entry->win_start, __entry->delta,
task_event_names[__entry->evt], __entry->cpu,
__entry->pid, __entry->comm, __entry->mark_start,
__entry->delta_m, __entry->demand,
__entry->rq_cs, __entry->rq_ps, __entry->curr_window,
__entry->prev_window, __entry->grp_cs, __entry->grp_ps)
);
struct migration_sum_data;
extern const char __weak *migrate_type_names[];
TRACE_EVENT(sched_set_preferred_cluster,
TP_PROTO(struct walt_related_thread_group *grp, u64 total_demand),
TP_ARGS(grp, total_demand),
TP_STRUCT__entry(
__field(int, id)
__field(u64, total_demand)
__field(bool, skip_min)
),
TP_fast_assign(
__entry->id = grp->id;
__entry->total_demand = total_demand;
__entry->skip_min = grp->skip_min;
),
TP_printk("group_id %d total_demand %llu skip_min %d",
__entry->id, __entry->total_demand,
__entry->skip_min)
);
TRACE_EVENT(sched_migration_update_sum,
TP_PROTO(struct task_struct *p, enum migrate_types migrate_type,
struct rq *rq),
TP_ARGS(p, migrate_type, rq),
TP_STRUCT__entry(
__field(int, tcpu)
__field(int, pid)
__field(enum migrate_types, migrate_type)
__field(s64, src_cs)
__field(s64, src_ps)
__field(s64, dst_cs)
__field(s64, dst_ps)
__field(s64, src_nt_cs)
__field(s64, src_nt_ps)
__field(s64, dst_nt_cs)
__field(s64, dst_nt_ps)
),
TP_fast_assign(
__entry->tcpu = task_cpu(p);
__entry->pid = p->pid;
__entry->migrate_type = migrate_type;
__entry->src_cs = __get_update_sum(rq, migrate_type,
true, false, true);
__entry->src_ps = __get_update_sum(rq, migrate_type,
true, false, false);
__entry->dst_cs = __get_update_sum(rq, migrate_type,
false, false, true);
__entry->dst_ps = __get_update_sum(rq, migrate_type,
false, false, false);
__entry->src_nt_cs = __get_update_sum(rq, migrate_type,
true, true, true);
__entry->src_nt_ps = __get_update_sum(rq, migrate_type,
true, true, false);
__entry->dst_nt_cs = __get_update_sum(rq, migrate_type,
false, true, true);
__entry->dst_nt_ps = __get_update_sum(rq, migrate_type,
false, true, false);
),
TP_printk("pid %d task_cpu %d migrate_type %s src_cs %llu src_ps %llu dst_cs %lld dst_ps %lld src_nt_cs %llu src_nt_ps %llu dst_nt_cs %lld dst_nt_ps %lld",
__entry->pid, __entry->tcpu,
migrate_type_names[__entry->migrate_type],
__entry->src_cs, __entry->src_ps, __entry->dst_cs,
__entry->dst_ps, __entry->src_nt_cs, __entry->src_nt_ps,
__entry->dst_nt_cs, __entry->dst_nt_ps)
);
TRACE_EVENT(sched_set_boost,
TP_PROTO(int type),
TP_ARGS(type),
TP_STRUCT__entry(
__field(int, type)
),
TP_fast_assign(
__entry->type = type;
),
TP_printk("type %d", __entry->type)
);
TRACE_EVENT(sched_load_to_gov,
TP_PROTO(struct rq *rq, u64 aggr_grp_load, u32 tt_load,
int freq_aggr, u64 load, int policy,
int big_task_rotation,
unsigned int user_hint),
TP_ARGS(rq, aggr_grp_load, tt_load, freq_aggr, load, policy,
big_task_rotation, user_hint),
TP_STRUCT__entry(
__field(int, cpu)
__field(int, policy)
__field(int, ed_task_pid)
__field(u64, aggr_grp_load)
__field(int, freq_aggr)
__field(u64, tt_load)
__field(u64, rq_ps)
__field(u64, grp_rq_ps)
__field(u64, nt_ps)
__field(u64, grp_nt_ps)
__field(u64, pl)
__field(u64, load)
__field(int, big_task_rotation)
__field(unsigned int, user_hint)
),
TP_fast_assign(
__entry->cpu = cpu_of(rq);
__entry->policy = policy;
__entry->ed_task_pid =
rq->wrq.ed_task ? rq->wrq.ed_task->pid : -1;
__entry->aggr_grp_load = aggr_grp_load;
__entry->freq_aggr = freq_aggr;
__entry->tt_load = tt_load;
__entry->rq_ps = rq->wrq.prev_runnable_sum;
__entry->grp_rq_ps = rq->wrq.grp_time.prev_runnable_sum;
__entry->nt_ps = rq->wrq.nt_prev_runnable_sum;
__entry->grp_nt_ps = rq->wrq.grp_time.nt_prev_runnable_sum;
__entry->pl =
rq->wrq.walt_stats.pred_demands_sum_scaled;
__entry->load = load;
__entry->big_task_rotation = big_task_rotation;
__entry->user_hint = user_hint;
),
TP_printk("cpu=%d policy=%d ed_task_pid=%d aggr_grp_load=%llu freq_aggr=%d tt_load=%llu rq_ps=%llu grp_rq_ps=%llu nt_ps=%llu grp_nt_ps=%llu pl=%llu load=%llu big_task_rotation=%d user_hint=%u",
__entry->cpu, __entry->policy, __entry->ed_task_pid,
__entry->aggr_grp_load, __entry->freq_aggr,
__entry->tt_load, __entry->rq_ps, __entry->grp_rq_ps,
__entry->nt_ps, __entry->grp_nt_ps, __entry->pl, __entry->load,
__entry->big_task_rotation, __entry->user_hint)
);
TRACE_EVENT(core_ctl_eval_need,
TP_PROTO(unsigned int cpu, unsigned int old_need,
unsigned int new_need, unsigned int updated),
TP_ARGS(cpu, old_need, new_need, updated),
TP_STRUCT__entry(
__field(u32, cpu)
__field(u32, old_need)
__field(u32, new_need)
__field(u32, updated)
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->old_need = old_need;
__entry->new_need = new_need;
__entry->updated = updated;
),
TP_printk("cpu=%u, old_need=%u, new_need=%u, updated=%u", __entry->cpu,
__entry->old_need, __entry->new_need, __entry->updated)
);
TRACE_EVENT(core_ctl_set_busy,
TP_PROTO(unsigned int cpu, unsigned int busy,
unsigned int old_is_busy, unsigned int is_busy),
TP_ARGS(cpu, busy, old_is_busy, is_busy),
TP_STRUCT__entry(
__field(u32, cpu)
__field(u32, busy)
__field(u32, old_is_busy)
__field(u32, is_busy)
__field(bool, high_irqload)
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->busy = busy;
__entry->old_is_busy = old_is_busy;
__entry->is_busy = is_busy;
__entry->high_irqload = sched_cpu_high_irqload(cpu);
),
TP_printk("cpu=%u, busy=%u, old_is_busy=%u, new_is_busy=%u high_irqload=%d",
__entry->cpu, __entry->busy, __entry->old_is_busy,
__entry->is_busy, __entry->high_irqload)
);
TRACE_EVENT(core_ctl_set_boost,
TP_PROTO(u32 refcount, s32 ret),
TP_ARGS(refcount, ret),
TP_STRUCT__entry(
__field(u32, refcount)
__field(s32, ret)
),
TP_fast_assign(
__entry->refcount = refcount;
__entry->ret = ret;
),
TP_printk("refcount=%u, ret=%d", __entry->refcount, __entry->ret)
);
TRACE_EVENT(core_ctl_update_nr_need,
TP_PROTO(int cpu, int nr_need, int prev_misfit_need,
int nrrun, int max_nr, int nr_prev_assist),
TP_ARGS(cpu, nr_need, prev_misfit_need, nrrun, max_nr, nr_prev_assist),
TP_STRUCT__entry(
__field(int, cpu)
__field(int, nr_need)
__field(int, prev_misfit_need)
__field(int, nrrun)
__field(int, max_nr)
__field(int, nr_prev_assist)
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->nr_need = nr_need;
__entry->prev_misfit_need = prev_misfit_need;
__entry->nrrun = nrrun;
__entry->max_nr = max_nr;
__entry->nr_prev_assist = nr_prev_assist;
),
TP_printk("cpu=%d nr_need=%d prev_misfit_need=%d nrrun=%d max_nr=%d nr_prev_assist=%d",
__entry->cpu, __entry->nr_need, __entry->prev_misfit_need,
__entry->nrrun, __entry->max_nr, __entry->nr_prev_assist)
);
TRACE_EVENT(core_ctl_notif_data,
TP_PROTO(u32 nr_big, u32 ta_load, u32 *ta_util, u32 *cur_cap),
TP_ARGS(nr_big, ta_load, ta_util, cur_cap),
TP_STRUCT__entry(
__field(u32, nr_big)
__field(u32, ta_load)
__array(u32, ta_util, MAX_CLUSTERS)
__array(u32, cur_cap, MAX_CLUSTERS)
),
TP_fast_assign(
__entry->nr_big = nr_big;
__entry->ta_load = ta_load;
memcpy(__entry->ta_util, ta_util, MAX_CLUSTERS * sizeof(u32));
memcpy(__entry->cur_cap, cur_cap, MAX_CLUSTERS * sizeof(u32));
),
TP_printk("nr_big=%u ta_load=%u ta_util=(%u %u %u) cur_cap=(%u %u %u)",
__entry->nr_big, __entry->ta_load,
__entry->ta_util[0], __entry->ta_util[1],
__entry->ta_util[2], __entry->cur_cap[0],
__entry->cur_cap[1], __entry->cur_cap[2])
);
/*
* Tracepoint for sched_get_nr_running_avg
*/
TRACE_EVENT(sched_get_nr_running_avg,
TP_PROTO(int cpu, int nr, int nr_misfit, int nr_max, int nr_scaled),
TP_ARGS(cpu, nr, nr_misfit, nr_max, nr_scaled),
TP_STRUCT__entry(
__field(int, cpu)
__field(int, nr)
__field(int, nr_misfit)
__field(int, nr_max)
__field(int, nr_scaled)
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->nr = nr;
__entry->nr_misfit = nr_misfit;
__entry->nr_max = nr_max;
__entry->nr_scaled = nr_scaled;
),
TP_printk("cpu=%d nr=%d nr_misfit=%d nr_max=%d nr_scaled=%d",
__entry->cpu, __entry->nr, __entry->nr_misfit, __entry->nr_max,
__entry->nr_scaled)
);
/*
* sched_isolate - called when cores are isolated/unisolated
*
* @acutal_mask: mask of cores actually isolated/unisolated
* @req_mask: mask of cores requested isolated/unisolated
* @online_mask: cpu online mask
* @time: amount of time in us it took to isolate/unisolate
* @isolate: 1 if isolating, 0 if unisolating
*
*/
TRACE_EVENT(sched_isolate,
TP_PROTO(unsigned int requested_cpu, unsigned int isolated_cpus,
u64 start_time, unsigned char isolate),
TP_ARGS(requested_cpu, isolated_cpus, start_time, isolate),
TP_STRUCT__entry(
__field(u32, requested_cpu)
__field(u32, isolated_cpus)
__field(u32, time)
__field(unsigned char, isolate)
),
TP_fast_assign(
__entry->requested_cpu = requested_cpu;
__entry->isolated_cpus = isolated_cpus;
__entry->time = div64_u64(sched_clock() - start_time, 1000);
__entry->isolate = isolate;
),
TP_printk("iso cpu=%u cpus=0x%x time=%u us isolated=%d",
__entry->requested_cpu, __entry->isolated_cpus,
__entry->time, __entry->isolate)
);
TRACE_EVENT(sched_ravg_window_change,
TP_PROTO(unsigned int sched_ravg_window, unsigned int new_sched_ravg_window
, u64 change_time),
TP_ARGS(sched_ravg_window, new_sched_ravg_window, change_time),
TP_STRUCT__entry(
__field(unsigned int, sched_ravg_window)
__field(unsigned int, new_sched_ravg_window)
__field(u64, change_time)
),
TP_fast_assign(
__entry->sched_ravg_window = sched_ravg_window;
__entry->new_sched_ravg_window = new_sched_ravg_window;
__entry->change_time = change_time;
),
TP_printk("from=%u to=%u at=%lu",
__entry->sched_ravg_window, __entry->new_sched_ravg_window,
__entry->change_time)
);
TRACE_EVENT(walt_window_rollover,
TP_PROTO(u64 window_start),
TP_ARGS(window_start),
TP_STRUCT__entry(
__field(u64, window_start)
),
TP_fast_assign(
__entry->window_start = window_start;
),
TP_printk("window_start=%llu", __entry->window_start)
);
#endif /* CONFIG_SCHED_WALT */
#endif /* _TRACE_WALT_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace
#include <trace/define_trace.h>

3793
kernel/sched/walt/walt.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016-2020, The Linux Foundation. All rights reserved.
* Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
*/
#ifndef __WALT_H