PM / devfreq: memlat: Add writeback percentage as a lat condition
Certain L3 workloads are latency sensitive to cache snoop traffic and this traffic is not directly quantified in the existing memlat scheme. Use L2 writeback percentage as a metric to identify snoop traffic to improve memlat. Change-Id: I9d43375d96de5a199c6a87c55e5c1079549b23ce Signed-off-by: Santosh Mardi <gsantosh@codeaurora.org>
This commit is contained in:
parent
bd0cb90137
commit
a4ff536215
@ -68,8 +68,20 @@ struct cpu_data {
|
|||||||
* defaults to using all of @cpu_grp's CPUs.
|
* defaults to using all of @cpu_grp's CPUs.
|
||||||
* @miss_ev_id: The event code corresponding to the @miss_ev
|
* @miss_ev_id: The event code corresponding to the @miss_ev
|
||||||
* perf event. Will be 0 for compute.
|
* perf event. Will be 0 for compute.
|
||||||
|
* @access_ev_id: The event code corresponding to the @access_ev
|
||||||
|
* perf event. Optional - only needed for writeback
|
||||||
|
* percent.
|
||||||
|
* @wb_ev_id: The event code corresponding to the @wb_ev perf
|
||||||
|
* event. Optional - only needed for writeback
|
||||||
|
* percent.
|
||||||
* @miss_ev: The cache miss perf event exclusive to this
|
* @miss_ev: The cache miss perf event exclusive to this
|
||||||
* mon. Will be NULL for compute.
|
* mon. Will be NULL for compute.
|
||||||
|
* @access_ev: The cache access perf event exclusive to this
|
||||||
|
* mon. Optional - only needed for writeback
|
||||||
|
* percent.
|
||||||
|
* @wb_ev: The cache writeback perf event exclusive to this
|
||||||
|
* mon. Optional - only needed for writeback
|
||||||
|
* percent.
|
||||||
* @requested_update_ms: The mon's desired polling rate. The lowest
|
* @requested_update_ms: The mon's desired polling rate. The lowest
|
||||||
* @requested_update_ms of all mons determines
|
* @requested_update_ms of all mons determines
|
||||||
* @cpu_grp's update_ms.
|
* @cpu_grp's update_ms.
|
||||||
@ -81,8 +93,12 @@ struct memlat_mon {
|
|||||||
bool is_active;
|
bool is_active;
|
||||||
cpumask_t cpus;
|
cpumask_t cpus;
|
||||||
unsigned int miss_ev_id;
|
unsigned int miss_ev_id;
|
||||||
|
unsigned int access_ev_id;
|
||||||
|
unsigned int wb_ev_id;
|
||||||
unsigned int requested_update_ms;
|
unsigned int requested_update_ms;
|
||||||
struct event_data *miss_ev;
|
struct event_data *miss_ev;
|
||||||
|
struct event_data *access_ev;
|
||||||
|
struct event_data *wb_ev;
|
||||||
struct memlat_hwmon hw;
|
struct memlat_hwmon hw;
|
||||||
|
|
||||||
struct memlat_cpu_grp *cpu_grp;
|
struct memlat_cpu_grp *cpu_grp;
|
||||||
@ -208,6 +224,10 @@ static void update_counts(struct memlat_cpu_grp *cpu_grp)
|
|||||||
cpu - cpumask_first(&mon->cpus);
|
cpu - cpumask_first(&mon->cpus);
|
||||||
cpu_grp->read_event_cpu = cpu;
|
cpu_grp->read_event_cpu = cpu;
|
||||||
read_event(&mon->miss_ev[mon_idx]);
|
read_event(&mon->miss_ev[mon_idx]);
|
||||||
|
if (mon->wb_ev_id && mon->access_ev_id) {
|
||||||
|
read_event(&mon->wb_ev[mon_idx]);
|
||||||
|
read_event(&mon->access_ev[mon_idx]);
|
||||||
|
}
|
||||||
cpu_grp->read_event_cpu = -1;
|
cpu_grp->read_event_cpu = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -238,6 +258,13 @@ static unsigned long get_cnt(struct memlat_hwmon *hw)
|
|||||||
devstats->inst_count = 0;
|
devstats->inst_count = 0;
|
||||||
devstats->mem_count = 1;
|
devstats->mem_count = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mon->access_ev_id && mon->wb_ev_id)
|
||||||
|
devstats->wb_pct =
|
||||||
|
mult_frac(100, mon->wb_ev[mon_idx].last_delta,
|
||||||
|
mon->access_ev[mon_idx].last_delta);
|
||||||
|
else
|
||||||
|
devstats->wb_pct = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -581,6 +608,18 @@ static int start_hwmon(struct memlat_hwmon *hw)
|
|||||||
mon->miss_ev_id, attr);
|
mon->miss_ev_id, attr);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto unlock_out;
|
goto unlock_out;
|
||||||
|
|
||||||
|
if (mon->access_ev_id && mon->wb_ev_id) {
|
||||||
|
ret = set_event(&mon->access_ev[idx], cpu,
|
||||||
|
mon->access_ev_id, attr);
|
||||||
|
if (ret)
|
||||||
|
goto unlock_out;
|
||||||
|
|
||||||
|
ret = set_event(&mon->wb_ev[idx], cpu,
|
||||||
|
mon->wb_ev_id, attr);
|
||||||
|
if (ret)
|
||||||
|
goto unlock_out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -624,6 +663,7 @@ static void stop_hwmon(struct memlat_hwmon *hw)
|
|||||||
devstats->mem_count = 0;
|
devstats->mem_count = 0;
|
||||||
devstats->freq = 0;
|
devstats->freq = 0;
|
||||||
devstats->stall_pct = 0;
|
devstats->stall_pct = 0;
|
||||||
|
devstats->wb_pct = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cpu_grp->num_active_mons) {
|
if (!cpu_grp->num_active_mons) {
|
||||||
@ -894,6 +934,8 @@ static int memlat_mon_probe(struct platform_device *pdev, bool is_compute)
|
|||||||
*/
|
*/
|
||||||
if (is_compute) {
|
if (is_compute) {
|
||||||
mon->miss_ev_id = 0;
|
mon->miss_ev_id = 0;
|
||||||
|
mon->access_ev_id = 0;
|
||||||
|
mon->wb_ev_id = 0;
|
||||||
ret = register_compute(dev, hw);
|
ret = register_compute(dev, hw);
|
||||||
} else {
|
} else {
|
||||||
mon->miss_ev =
|
mon->miss_ev =
|
||||||
@ -914,6 +956,39 @@ static int memlat_mon_probe(struct platform_device *pdev, bool is_compute)
|
|||||||
}
|
}
|
||||||
mon->miss_ev_id = event_id;
|
mon->miss_ev_id = event_id;
|
||||||
|
|
||||||
|
ret = of_property_read_u32(dev->of_node, "qcom,access-ev",
|
||||||
|
&event_id);
|
||||||
|
if (ret)
|
||||||
|
dev_dbg(dev, "Access event not specified. Skipping.\n");
|
||||||
|
else
|
||||||
|
mon->access_ev_id = event_id;
|
||||||
|
|
||||||
|
ret = of_property_read_u32(dev->of_node, "qcom,wb-ev",
|
||||||
|
&event_id);
|
||||||
|
if (ret)
|
||||||
|
dev_dbg(dev, "WB event not specified. Skipping.\n");
|
||||||
|
else
|
||||||
|
mon->wb_ev_id = event_id;
|
||||||
|
|
||||||
|
if (mon->wb_ev_id && mon->access_ev_id) {
|
||||||
|
mon->access_ev =
|
||||||
|
devm_kzalloc(dev, num_cpus *
|
||||||
|
sizeof(*mon->access_ev),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!mon->access_ev) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto unlock_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
mon->wb_ev =
|
||||||
|
devm_kzalloc(dev, num_cpus *
|
||||||
|
sizeof(*mon->wb_ev), GFP_KERNEL);
|
||||||
|
if (!mon->wb_ev) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto unlock_out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ret = register_memlat(dev, hw);
|
ret = register_memlat(dev, hw);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,6 +29,8 @@
|
|||||||
struct memlat_node {
|
struct memlat_node {
|
||||||
unsigned int ratio_ceil;
|
unsigned int ratio_ceil;
|
||||||
unsigned int stall_floor;
|
unsigned int stall_floor;
|
||||||
|
unsigned int wb_pct_thres;
|
||||||
|
unsigned int wb_filter_ratio;
|
||||||
bool mon_started;
|
bool mon_started;
|
||||||
bool already_zero;
|
bool already_zero;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
@ -296,11 +298,14 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
|
|||||||
hw->core_stats[i].inst_count,
|
hw->core_stats[i].inst_count,
|
||||||
hw->core_stats[i].mem_count,
|
hw->core_stats[i].mem_count,
|
||||||
hw->core_stats[i].freq,
|
hw->core_stats[i].freq,
|
||||||
hw->core_stats[i].stall_pct, ratio);
|
hw->core_stats[i].stall_pct,
|
||||||
|
hw->core_stats[i].wb_pct, ratio);
|
||||||
|
|
||||||
if (ratio <= node->ratio_ceil
|
if (((ratio <= node->ratio_ceil
|
||||||
&& hw->core_stats[i].stall_pct >= node->stall_floor
|
&& hw->core_stats[i].stall_pct >= node->stall_floor) ||
|
||||||
&& hw->core_stats[i].freq > max_freq) {
|
(hw->core_stats[i].wb_pct >= node->wb_pct_thres
|
||||||
|
&& ratio <= node->wb_filter_ratio))
|
||||||
|
&& (hw->core_stats[i].freq > max_freq)) {
|
||||||
lat_dev = i;
|
lat_dev = i;
|
||||||
max_freq = hw->core_stats[i].freq;
|
max_freq = hw->core_stats[i].freq;
|
||||||
}
|
}
|
||||||
@ -330,11 +335,19 @@ static DEVICE_ATTR_RW(ratio_ceil);
|
|||||||
show_attr(stall_floor);
|
show_attr(stall_floor);
|
||||||
store_attr(stall_floor, 0U, 100U);
|
store_attr(stall_floor, 0U, 100U);
|
||||||
static DEVICE_ATTR_RW(stall_floor);
|
static DEVICE_ATTR_RW(stall_floor);
|
||||||
|
show_attr(wb_pct_thres);
|
||||||
|
store_attr(wb_pct_thres, 0U, 100U);
|
||||||
|
static DEVICE_ATTR_RW(wb_pct_thres);
|
||||||
|
show_attr(wb_filter_ratio);
|
||||||
|
store_attr(wb_filter_ratio, 0U, 50000U);
|
||||||
|
static DEVICE_ATTR_RW(wb_filter_ratio);
|
||||||
|
|
||||||
static struct attribute *memlat_dev_attr[] = {
|
static struct attribute *memlat_dev_attr[] = {
|
||||||
&dev_attr_ratio_ceil.attr,
|
&dev_attr_ratio_ceil.attr,
|
||||||
&dev_attr_stall_floor.attr,
|
&dev_attr_stall_floor.attr,
|
||||||
&dev_attr_freq_map.attr,
|
&dev_attr_freq_map.attr,
|
||||||
|
&dev_attr_wb_pct_thres.attr,
|
||||||
|
&dev_attr_wb_filter_ratio.attr,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -495,6 +508,8 @@ static struct memlat_node *register_common(struct device *dev,
|
|||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
node->ratio_ceil = 10;
|
node->ratio_ceil = 10;
|
||||||
|
node->wb_pct_thres = 100;
|
||||||
|
node->wb_filter_ratio = 25000;
|
||||||
node->hw = hw;
|
node->hw = hw;
|
||||||
|
|
||||||
if (hw->get_child_of_node)
|
if (hw->get_child_of_node)
|
||||||
|
@ -15,6 +15,9 @@
|
|||||||
* @mem_count: Number of memory accesses made.
|
* @mem_count: Number of memory accesses made.
|
||||||
* @freq: Effective frequency of the device in the
|
* @freq: Effective frequency of the device in the
|
||||||
* last interval.
|
* last interval.
|
||||||
|
* @wb_pct: The ratio of writebacks to accesses. Used as an
|
||||||
|
* indirect way to identify memory latency due to
|
||||||
|
* snoop activity.
|
||||||
*/
|
*/
|
||||||
struct dev_stats {
|
struct dev_stats {
|
||||||
int id;
|
int id;
|
||||||
@ -22,6 +25,7 @@ struct dev_stats {
|
|||||||
unsigned long mem_count;
|
unsigned long mem_count;
|
||||||
unsigned long freq;
|
unsigned long freq;
|
||||||
unsigned long stall_pct;
|
unsigned long stall_pct;
|
||||||
|
unsigned long wb_pct;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct core_dev_map {
|
struct core_dev_map {
|
||||||
|
@ -670,9 +670,9 @@ TRACE_EVENT(memlat_dev_meas,
|
|||||||
|
|
||||||
TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
|
TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
|
||||||
unsigned long mem, unsigned long freq, unsigned int stall,
|
unsigned long mem, unsigned long freq, unsigned int stall,
|
||||||
unsigned int ratio),
|
unsigned int wb, unsigned int ratio),
|
||||||
|
|
||||||
TP_ARGS(name, dev_id, inst, mem, freq, stall, ratio),
|
TP_ARGS(name, dev_id, inst, mem, freq, stall, wb, ratio),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__string(name, name)
|
__string(name, name)
|
||||||
@ -681,6 +681,7 @@ TRACE_EVENT(memlat_dev_meas,
|
|||||||
__field(unsigned long, mem)
|
__field(unsigned long, mem)
|
||||||
__field(unsigned long, freq)
|
__field(unsigned long, freq)
|
||||||
__field(unsigned int, stall)
|
__field(unsigned int, stall)
|
||||||
|
__field(unsigned int, wb)
|
||||||
__field(unsigned int, ratio)
|
__field(unsigned int, ratio)
|
||||||
),
|
),
|
||||||
|
|
||||||
@ -691,16 +692,18 @@ TRACE_EVENT(memlat_dev_meas,
|
|||||||
__entry->mem = mem;
|
__entry->mem = mem;
|
||||||
__entry->freq = freq;
|
__entry->freq = freq;
|
||||||
__entry->stall = stall;
|
__entry->stall = stall;
|
||||||
|
__entry->wb = wb;
|
||||||
__entry->ratio = ratio;
|
__entry->ratio = ratio;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, ratio=%u",
|
TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, wb=%u, ratio=%u",
|
||||||
__get_str(name),
|
__get_str(name),
|
||||||
__entry->dev_id,
|
__entry->dev_id,
|
||||||
__entry->inst,
|
__entry->inst,
|
||||||
__entry->mem,
|
__entry->mem,
|
||||||
__entry->freq,
|
__entry->freq,
|
||||||
__entry->stall,
|
__entry->stall,
|
||||||
|
__entry->wb,
|
||||||
__entry->ratio)
|
__entry->ratio)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user