PM / devfreq: memlat: Add writeback percentage as a lat condition

Certain L3 workloads are latency sensitive to cache snoop traffic and
this traffic is not directly quantified in the existing memlat scheme.

Use L2 writeback percentage as a metric to identify snoop traffic to
improve memlat.

Change-Id: I9d43375d96de5a199c6a87c55e5c1079549b23ce
Signed-off-by: Santosh Mardi <gsantosh@codeaurora.org>
This commit is contained in:
Santosh Mardi 2020-07-14 08:50:33 +05:30
parent bd0cb90137
commit a4ff536215
4 changed files with 104 additions and 7 deletions

View File

@ -68,8 +68,20 @@ struct cpu_data {
* defaults to using all of @cpu_grp's CPUs. * defaults to using all of @cpu_grp's CPUs.
* @miss_ev_id: The event code corresponding to the @miss_ev * @miss_ev_id: The event code corresponding to the @miss_ev
* perf event. Will be 0 for compute. * perf event. Will be 0 for compute.
* @access_ev_id: The event code corresponding to the @access_ev
* perf event. Optional - only needed for writeback
* percent.
* @wb_ev_id: The event code corresponding to the @wb_ev perf
* event. Optional - only needed for writeback
* percent.
* @miss_ev: The cache miss perf event exclusive to this * @miss_ev: The cache miss perf event exclusive to this
* mon. Will be NULL for compute. * mon. Will be NULL for compute.
* @access_ev: The cache access perf event exclusive to this
* mon. Optional - only needed for writeback
* percent.
* @wb_ev: The cache writeback perf event exclusive to this
* mon. Optional - only needed for writeback
* percent.
* @requested_update_ms: The mon's desired polling rate. The lowest * @requested_update_ms: The mon's desired polling rate. The lowest
* @requested_update_ms of all mons determines * @requested_update_ms of all mons determines
* @cpu_grp's update_ms. * @cpu_grp's update_ms.
@ -81,8 +93,12 @@ struct memlat_mon {
bool is_active; bool is_active;
cpumask_t cpus; cpumask_t cpus;
unsigned int miss_ev_id; unsigned int miss_ev_id;
unsigned int access_ev_id;
unsigned int wb_ev_id;
unsigned int requested_update_ms; unsigned int requested_update_ms;
struct event_data *miss_ev; struct event_data *miss_ev;
struct event_data *access_ev;
struct event_data *wb_ev;
struct memlat_hwmon hw; struct memlat_hwmon hw;
struct memlat_cpu_grp *cpu_grp; struct memlat_cpu_grp *cpu_grp;
@ -208,6 +224,10 @@ static void update_counts(struct memlat_cpu_grp *cpu_grp)
cpu - cpumask_first(&mon->cpus); cpu - cpumask_first(&mon->cpus);
cpu_grp->read_event_cpu = cpu; cpu_grp->read_event_cpu = cpu;
read_event(&mon->miss_ev[mon_idx]); read_event(&mon->miss_ev[mon_idx]);
if (mon->wb_ev_id && mon->access_ev_id) {
read_event(&mon->wb_ev[mon_idx]);
read_event(&mon->access_ev[mon_idx]);
}
cpu_grp->read_event_cpu = -1; cpu_grp->read_event_cpu = -1;
} }
} }
@ -238,6 +258,13 @@ static unsigned long get_cnt(struct memlat_hwmon *hw)
devstats->inst_count = 0; devstats->inst_count = 0;
devstats->mem_count = 1; devstats->mem_count = 1;
} }
if (mon->access_ev_id && mon->wb_ev_id)
devstats->wb_pct =
mult_frac(100, mon->wb_ev[mon_idx].last_delta,
mon->access_ev[mon_idx].last_delta);
else
devstats->wb_pct = 0;
} }
return 0; return 0;
@ -581,6 +608,18 @@ static int start_hwmon(struct memlat_hwmon *hw)
mon->miss_ev_id, attr); mon->miss_ev_id, attr);
if (ret < 0) if (ret < 0)
goto unlock_out; goto unlock_out;
if (mon->access_ev_id && mon->wb_ev_id) {
ret = set_event(&mon->access_ev[idx], cpu,
mon->access_ev_id, attr);
if (ret)
goto unlock_out;
ret = set_event(&mon->wb_ev[idx], cpu,
mon->wb_ev_id, attr);
if (ret)
goto unlock_out;
}
} }
} }
@ -624,6 +663,7 @@ static void stop_hwmon(struct memlat_hwmon *hw)
devstats->mem_count = 0; devstats->mem_count = 0;
devstats->freq = 0; devstats->freq = 0;
devstats->stall_pct = 0; devstats->stall_pct = 0;
devstats->wb_pct = 0;
} }
if (!cpu_grp->num_active_mons) { if (!cpu_grp->num_active_mons) {
@ -894,6 +934,8 @@ static int memlat_mon_probe(struct platform_device *pdev, bool is_compute)
*/ */
if (is_compute) { if (is_compute) {
mon->miss_ev_id = 0; mon->miss_ev_id = 0;
mon->access_ev_id = 0;
mon->wb_ev_id = 0;
ret = register_compute(dev, hw); ret = register_compute(dev, hw);
} else { } else {
mon->miss_ev = mon->miss_ev =
@ -914,6 +956,39 @@ static int memlat_mon_probe(struct platform_device *pdev, bool is_compute)
} }
mon->miss_ev_id = event_id; mon->miss_ev_id = event_id;
ret = of_property_read_u32(dev->of_node, "qcom,access-ev",
&event_id);
if (ret)
dev_dbg(dev, "Access event not specified. Skipping.\n");
else
mon->access_ev_id = event_id;
ret = of_property_read_u32(dev->of_node, "qcom,wb-ev",
&event_id);
if (ret)
dev_dbg(dev, "WB event not specified. Skipping.\n");
else
mon->wb_ev_id = event_id;
if (mon->wb_ev_id && mon->access_ev_id) {
mon->access_ev =
devm_kzalloc(dev, num_cpus *
sizeof(*mon->access_ev),
GFP_KERNEL);
if (!mon->access_ev) {
ret = -ENOMEM;
goto unlock_out;
}
mon->wb_ev =
devm_kzalloc(dev, num_cpus *
sizeof(*mon->wb_ev), GFP_KERNEL);
if (!mon->wb_ev) {
ret = -ENOMEM;
goto unlock_out;
}
}
ret = register_memlat(dev, hw); ret = register_memlat(dev, hw);
} }

View File

@ -29,6 +29,8 @@
struct memlat_node { struct memlat_node {
unsigned int ratio_ceil; unsigned int ratio_ceil;
unsigned int stall_floor; unsigned int stall_floor;
unsigned int wb_pct_thres;
unsigned int wb_filter_ratio;
bool mon_started; bool mon_started;
bool already_zero; bool already_zero;
struct list_head list; struct list_head list;
@ -296,11 +298,14 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
hw->core_stats[i].inst_count, hw->core_stats[i].inst_count,
hw->core_stats[i].mem_count, hw->core_stats[i].mem_count,
hw->core_stats[i].freq, hw->core_stats[i].freq,
hw->core_stats[i].stall_pct, ratio); hw->core_stats[i].stall_pct,
hw->core_stats[i].wb_pct, ratio);
if (ratio <= node->ratio_ceil if (((ratio <= node->ratio_ceil
&& hw->core_stats[i].stall_pct >= node->stall_floor && hw->core_stats[i].stall_pct >= node->stall_floor) ||
&& hw->core_stats[i].freq > max_freq) { (hw->core_stats[i].wb_pct >= node->wb_pct_thres
&& ratio <= node->wb_filter_ratio))
&& (hw->core_stats[i].freq > max_freq)) {
lat_dev = i; lat_dev = i;
max_freq = hw->core_stats[i].freq; max_freq = hw->core_stats[i].freq;
} }
@ -330,11 +335,19 @@ static DEVICE_ATTR_RW(ratio_ceil);
show_attr(stall_floor); show_attr(stall_floor);
store_attr(stall_floor, 0U, 100U); store_attr(stall_floor, 0U, 100U);
static DEVICE_ATTR_RW(stall_floor); static DEVICE_ATTR_RW(stall_floor);
show_attr(wb_pct_thres);
store_attr(wb_pct_thres, 0U, 100U);
static DEVICE_ATTR_RW(wb_pct_thres);
show_attr(wb_filter_ratio);
store_attr(wb_filter_ratio, 0U, 50000U);
static DEVICE_ATTR_RW(wb_filter_ratio);
static struct attribute *memlat_dev_attr[] = { static struct attribute *memlat_dev_attr[] = {
&dev_attr_ratio_ceil.attr, &dev_attr_ratio_ceil.attr,
&dev_attr_stall_floor.attr, &dev_attr_stall_floor.attr,
&dev_attr_freq_map.attr, &dev_attr_freq_map.attr,
&dev_attr_wb_pct_thres.attr,
&dev_attr_wb_filter_ratio.attr,
NULL, NULL,
}; };
@ -495,6 +508,8 @@ static struct memlat_node *register_common(struct device *dev,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
node->ratio_ceil = 10; node->ratio_ceil = 10;
node->wb_pct_thres = 100;
node->wb_filter_ratio = 25000;
node->hw = hw; node->hw = hw;
if (hw->get_child_of_node) if (hw->get_child_of_node)

View File

@ -15,6 +15,9 @@
* @mem_count: Number of memory accesses made. * @mem_count: Number of memory accesses made.
* @freq: Effective frequency of the device in the * @freq: Effective frequency of the device in the
* last interval. * last interval.
* @wb_pct: The ratio of writebacks to accesses. Used as an
* indirect way to identify memory latency due to
* snoop activity.
*/ */
struct dev_stats { struct dev_stats {
int id; int id;
@ -22,6 +25,7 @@ struct dev_stats {
unsigned long mem_count; unsigned long mem_count;
unsigned long freq; unsigned long freq;
unsigned long stall_pct; unsigned long stall_pct;
unsigned long wb_pct;
}; };
struct core_dev_map { struct core_dev_map {

View File

@ -670,9 +670,9 @@ TRACE_EVENT(memlat_dev_meas,
TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst, TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
unsigned long mem, unsigned long freq, unsigned int stall, unsigned long mem, unsigned long freq, unsigned int stall,
unsigned int ratio), unsigned int wb, unsigned int ratio),
TP_ARGS(name, dev_id, inst, mem, freq, stall, ratio), TP_ARGS(name, dev_id, inst, mem, freq, stall, wb, ratio),
TP_STRUCT__entry( TP_STRUCT__entry(
__string(name, name) __string(name, name)
@ -681,6 +681,7 @@ TRACE_EVENT(memlat_dev_meas,
__field(unsigned long, mem) __field(unsigned long, mem)
__field(unsigned long, freq) __field(unsigned long, freq)
__field(unsigned int, stall) __field(unsigned int, stall)
__field(unsigned int, wb)
__field(unsigned int, ratio) __field(unsigned int, ratio)
), ),
@ -691,16 +692,18 @@ TRACE_EVENT(memlat_dev_meas,
__entry->mem = mem; __entry->mem = mem;
__entry->freq = freq; __entry->freq = freq;
__entry->stall = stall; __entry->stall = stall;
__entry->wb = wb;
__entry->ratio = ratio; __entry->ratio = ratio;
), ),
TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, ratio=%u", TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, wb=%u, ratio=%u",
__get_str(name), __get_str(name),
__entry->dev_id, __entry->dev_id,
__entry->inst, __entry->inst,
__entry->mem, __entry->mem,
__entry->freq, __entry->freq,
__entry->stall, __entry->stall,
__entry->wb,
__entry->ratio) __entry->ratio)
); );