Revert "perf: Fix perf_event_read_local() time"
This reverts commit91b04e83c7
which is commit 09f5e7dc7ad705289e1b1ec065439aa3c42951c4 upstream It breaks the kernel abi and is not needed for Android systems, so revert it. Bug: 161946584 Fixes:91b04e83c7
("perf: Fix perf_event_read_local() time") Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Ic93864b39858bea47cc735e83c294fc4063b9dcd
This commit is contained in:
parent
0b4470b56e
commit
e33a5b611c
@ -678,6 +678,18 @@ struct perf_event {
|
||||
u64 total_time_running;
|
||||
u64 tstamp;
|
||||
|
||||
/*
|
||||
* timestamp shadows the actual context timing but it can
|
||||
* be safely used in NMI interrupt context. It reflects the
|
||||
* context time as it was when the event was last scheduled in,
|
||||
* or when ctx_sched_in failed to schedule the event because we
|
||||
* run out of PMC.
|
||||
*
|
||||
* ctx_time already accounts for ctx->timestamp. Therefore to
|
||||
* compute ctx_time for a sample, simply add perf_clock().
|
||||
*/
|
||||
u64 shadow_ctx_time;
|
||||
|
||||
struct perf_event_attr attr;
|
||||
u16 header_size;
|
||||
u16 id_header_size;
|
||||
@ -822,7 +834,6 @@ struct perf_event_context {
|
||||
*/
|
||||
u64 time;
|
||||
u64 timestamp;
|
||||
u64 timeoffset;
|
||||
|
||||
/*
|
||||
* These fields let us detect when two contexts have both
|
||||
@ -905,8 +916,6 @@ struct bpf_perf_event_data_kern {
|
||||
struct perf_cgroup_info {
|
||||
u64 time;
|
||||
u64 timestamp;
|
||||
u64 timeoffset;
|
||||
int active;
|
||||
};
|
||||
|
||||
struct perf_cgroup {
|
||||
|
@ -673,23 +673,6 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state)
|
||||
WRITE_ONCE(event->state, state);
|
||||
}
|
||||
|
||||
/*
|
||||
* UP store-release, load-acquire
|
||||
*/
|
||||
|
||||
#define __store_release(ptr, val) \
|
||||
do { \
|
||||
barrier(); \
|
||||
WRITE_ONCE(*(ptr), (val)); \
|
||||
} while (0)
|
||||
|
||||
#define __load_acquire(ptr) \
|
||||
({ \
|
||||
__unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \
|
||||
barrier(); \
|
||||
___p; \
|
||||
})
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
|
||||
static inline bool
|
||||
@ -735,51 +718,34 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
|
||||
return t->time;
|
||||
}
|
||||
|
||||
static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
|
||||
static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
|
||||
{
|
||||
struct perf_cgroup_info *t;
|
||||
struct perf_cgroup_info *info;
|
||||
u64 now;
|
||||
|
||||
t = per_cpu_ptr(event->cgrp->info, event->cpu);
|
||||
if (!__load_acquire(&t->active))
|
||||
return t->time;
|
||||
now += READ_ONCE(t->timeoffset);
|
||||
return now;
|
||||
}
|
||||
now = perf_clock();
|
||||
|
||||
static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
|
||||
{
|
||||
if (adv)
|
||||
info->time += now - info->timestamp;
|
||||
info = this_cpu_ptr(cgrp->info);
|
||||
|
||||
info->time += now - info->timestamp;
|
||||
info->timestamp = now;
|
||||
/*
|
||||
* see update_context_time()
|
||||
*/
|
||||
WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
|
||||
}
|
||||
|
||||
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
|
||||
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
|
||||
{
|
||||
struct perf_cgroup *cgrp = cpuctx->cgrp;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct perf_cgroup_info *info;
|
||||
|
||||
if (cgrp) {
|
||||
u64 now = perf_clock();
|
||||
|
||||
for (css = &cgrp->css; css; css = css->parent) {
|
||||
cgrp = container_of(css, struct perf_cgroup, css);
|
||||
info = this_cpu_ptr(cgrp->info);
|
||||
|
||||
__update_cgrp_time(info, now, true);
|
||||
if (final)
|
||||
__store_release(&info->active, 0);
|
||||
__update_cgrp_time(cgrp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void update_cgrp_time_from_event(struct perf_event *event)
|
||||
{
|
||||
struct perf_cgroup_info *info;
|
||||
struct perf_cgroup *cgrp;
|
||||
|
||||
/*
|
||||
@ -793,10 +759,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
|
||||
/*
|
||||
* Do not update time when cgroup is not active
|
||||
*/
|
||||
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
|
||||
info = this_cpu_ptr(event->cgrp->info);
|
||||
__update_cgrp_time(info, perf_clock(), true);
|
||||
}
|
||||
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
|
||||
__update_cgrp_time(event->cgrp);
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -820,8 +784,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
|
||||
for (css = &cgrp->css; css; css = css->parent) {
|
||||
cgrp = container_of(css, struct perf_cgroup, css);
|
||||
info = this_cpu_ptr(cgrp->info);
|
||||
__update_cgrp_time(info, ctx->timestamp, false);
|
||||
__store_release(&info->active, 1);
|
||||
info->timestamp = ctx->timestamp;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1017,6 +980,14 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void
|
||||
perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
|
||||
{
|
||||
struct perf_cgroup_info *t;
|
||||
t = per_cpu_ptr(event->cgrp->info, event->cpu);
|
||||
event->shadow_ctx_time = now - t->timestamp;
|
||||
}
|
||||
|
||||
static inline void
|
||||
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
|
||||
{
|
||||
@ -1094,8 +1065,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
|
||||
bool final)
|
||||
static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
|
||||
{
|
||||
}
|
||||
|
||||
@ -1127,12 +1097,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
|
||||
{
|
||||
}
|
||||
|
||||
static inline u64 perf_cgroup_event_time(struct perf_event *event)
|
||||
static inline void
|
||||
perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
|
||||
static inline u64 perf_cgroup_event_time(struct perf_event *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -1554,59 +1524,22 @@ static void perf_unpin_context(struct perf_event_context *ctx)
|
||||
/*
|
||||
* Update the record of the current time in a context.
|
||||
*/
|
||||
static void __update_context_time(struct perf_event_context *ctx, bool adv)
|
||||
static void update_context_time(struct perf_event_context *ctx)
|
||||
{
|
||||
u64 now = perf_clock();
|
||||
|
||||
if (adv)
|
||||
ctx->time += now - ctx->timestamp;
|
||||
ctx->time += now - ctx->timestamp;
|
||||
ctx->timestamp = now;
|
||||
|
||||
/*
|
||||
* The above: time' = time + (now - timestamp), can be re-arranged
|
||||
* into: time` = now + (time - timestamp), which gives a single value
|
||||
* offset to compute future time without locks on.
|
||||
*
|
||||
* See perf_event_time_now(), which can be used from NMI context where
|
||||
* it's (obviously) not possible to acquire ctx->lock in order to read
|
||||
* both the above values in a consistent manner.
|
||||
*/
|
||||
WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
|
||||
}
|
||||
|
||||
static void update_context_time(struct perf_event_context *ctx)
|
||||
{
|
||||
__update_context_time(ctx, true);
|
||||
}
|
||||
|
||||
static u64 perf_event_time(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
|
||||
if (unlikely(!ctx))
|
||||
return 0;
|
||||
|
||||
if (is_cgroup_event(event))
|
||||
return perf_cgroup_event_time(event);
|
||||
|
||||
return ctx->time;
|
||||
}
|
||||
|
||||
static u64 perf_event_time_now(struct perf_event *event, u64 now)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
|
||||
if (unlikely(!ctx))
|
||||
return 0;
|
||||
|
||||
if (is_cgroup_event(event))
|
||||
return perf_cgroup_event_time_now(event, now);
|
||||
|
||||
if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
|
||||
return ctx->time;
|
||||
|
||||
now += READ_ONCE(ctx->timeoffset);
|
||||
return now;
|
||||
return ctx ? ctx->time : 0;
|
||||
}
|
||||
|
||||
static enum event_type_t get_event_type(struct perf_event *event)
|
||||
@ -2400,7 +2333,7 @@ __perf_remove_from_context(struct perf_event *event,
|
||||
|
||||
if (ctx->is_active & EVENT_TIME) {
|
||||
update_context_time(ctx);
|
||||
update_cgrp_time_from_cpuctx(cpuctx, false);
|
||||
update_cgrp_time_from_cpuctx(cpuctx);
|
||||
}
|
||||
|
||||
event_sched_out(event, cpuctx, ctx);
|
||||
@ -2409,9 +2342,6 @@ __perf_remove_from_context(struct perf_event *event,
|
||||
list_del_event(event, ctx);
|
||||
|
||||
if (!ctx->nr_events && ctx->is_active) {
|
||||
if (ctx == &cpuctx->ctx)
|
||||
update_cgrp_time_from_cpuctx(cpuctx, true);
|
||||
|
||||
ctx->is_active = 0;
|
||||
ctx->rotate_necessary = 0;
|
||||
if (ctx->task) {
|
||||
@ -2537,6 +2467,40 @@ void perf_event_disable_inatomic(struct perf_event *event)
|
||||
irq_work_queue(&event->pending);
|
||||
}
|
||||
|
||||
static void perf_set_shadow_time(struct perf_event *event,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
/*
|
||||
* use the correct time source for the time snapshot
|
||||
*
|
||||
* We could get by without this by leveraging the
|
||||
* fact that to get to this function, the caller
|
||||
* has most likely already called update_context_time()
|
||||
* and update_cgrp_time_xx() and thus both timestamp
|
||||
* are identical (or very close). Given that tstamp is,
|
||||
* already adjusted for cgroup, we could say that:
|
||||
* tstamp - ctx->timestamp
|
||||
* is equivalent to
|
||||
* tstamp - cgrp->timestamp.
|
||||
*
|
||||
* Then, in perf_output_read(), the calculation would
|
||||
* work with no changes because:
|
||||
* - event is guaranteed scheduled in
|
||||
* - no scheduled out in between
|
||||
* - thus the timestamp would be the same
|
||||
*
|
||||
* But this is a bit hairy.
|
||||
*
|
||||
* So instead, we have an explicit cgroup call to remain
|
||||
* within the time source all along. We believe it
|
||||
* is cleaner and simpler to understand.
|
||||
*/
|
||||
if (is_cgroup_event(event))
|
||||
perf_cgroup_set_shadow_time(event, event->tstamp);
|
||||
else
|
||||
event->shadow_ctx_time = event->tstamp - ctx->timestamp;
|
||||
}
|
||||
|
||||
#define MAX_INTERRUPTS (~0ULL)
|
||||
|
||||
static void perf_log_throttle(struct perf_event *event, int enable);
|
||||
@ -2577,6 +2541,8 @@ event_sched_in(struct perf_event *event,
|
||||
|
||||
perf_pmu_disable(event->pmu);
|
||||
|
||||
perf_set_shadow_time(event, ctx);
|
||||
|
||||
perf_log_itrace_start(event);
|
||||
|
||||
if (event->pmu->add(event, PERF_EF_START)) {
|
||||
@ -3250,6 +3216,16 @@ static void ctx_sched_out(struct perf_event_context *ctx,
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->is_active &= ~event_type;
|
||||
if (!(ctx->is_active & EVENT_ALL))
|
||||
ctx->is_active = 0;
|
||||
|
||||
if (ctx->task) {
|
||||
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
|
||||
if (!ctx->is_active)
|
||||
cpuctx->task_ctx = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Always update time if it was set; not only when it changes.
|
||||
* Otherwise we can 'forget' to update time for any but the last
|
||||
@ -3263,22 +3239,7 @@ static void ctx_sched_out(struct perf_event_context *ctx,
|
||||
if (is_active & EVENT_TIME) {
|
||||
/* update (and stop) ctx time */
|
||||
update_context_time(ctx);
|
||||
update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
|
||||
/*
|
||||
* CPU-release for the below ->is_active store,
|
||||
* see __load_acquire() in perf_event_time_now()
|
||||
*/
|
||||
barrier();
|
||||
}
|
||||
|
||||
ctx->is_active &= ~event_type;
|
||||
if (!(ctx->is_active & EVENT_ALL))
|
||||
ctx->is_active = 0;
|
||||
|
||||
if (ctx->task) {
|
||||
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
|
||||
if (!ctx->is_active)
|
||||
cpuctx->task_ctx = NULL;
|
||||
update_cgrp_time_from_cpuctx(cpuctx);
|
||||
}
|
||||
|
||||
is_active ^= ctx->is_active; /* changed bits */
|
||||
@ -3715,19 +3676,13 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because the userpage is strictly per-event (there is no concept of context,
|
||||
* so there cannot be a context indirection), every userpage must be updated
|
||||
* when context time starts :-(
|
||||
*
|
||||
* IOW, we must not miss EVENT_TIME edges.
|
||||
*/
|
||||
static inline bool event_update_userpage(struct perf_event *event)
|
||||
{
|
||||
if (likely(!atomic_read(&event->mmap_count)))
|
||||
return false;
|
||||
|
||||
perf_event_update_time(event);
|
||||
perf_set_shadow_time(event, event->ctx);
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return true;
|
||||
@ -3811,23 +3766,13 @@ ctx_sched_in(struct perf_event_context *ctx,
|
||||
struct task_struct *task)
|
||||
{
|
||||
int is_active = ctx->is_active;
|
||||
u64 now;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
if (likely(!ctx->nr_events))
|
||||
return;
|
||||
|
||||
if (is_active ^ EVENT_TIME) {
|
||||
/* start ctx time */
|
||||
__update_context_time(ctx, false);
|
||||
perf_cgroup_set_timestamp(task, ctx);
|
||||
/*
|
||||
* CPU-release for the below ->is_active store,
|
||||
* see __load_acquire() in perf_event_time_now()
|
||||
*/
|
||||
barrier();
|
||||
}
|
||||
|
||||
ctx->is_active |= (event_type | EVENT_TIME);
|
||||
if (ctx->task) {
|
||||
if (!is_active)
|
||||
@ -3838,6 +3783,13 @@ ctx_sched_in(struct perf_event_context *ctx,
|
||||
|
||||
is_active ^= ctx->is_active; /* changed bits */
|
||||
|
||||
if (is_active & EVENT_TIME) {
|
||||
/* start ctx time */
|
||||
now = perf_clock();
|
||||
ctx->timestamp = now;
|
||||
perf_cgroup_set_timestamp(task, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* First go through the list and put on any pinned groups
|
||||
* in order to give them the best chance of going on.
|
||||
@ -4373,18 +4325,6 @@ static inline u64 perf_event_count(struct perf_event *event)
|
||||
return local64_read(&event->count) + atomic64_read(&event->child_count);
|
||||
}
|
||||
|
||||
static void calc_timer_values(struct perf_event *event,
|
||||
u64 *now,
|
||||
u64 *enabled,
|
||||
u64 *running)
|
||||
{
|
||||
u64 ctx_time;
|
||||
|
||||
*now = perf_clock();
|
||||
ctx_time = perf_event_time_now(event, *now);
|
||||
__perf_update_times(event, ctx_time, enabled, running);
|
||||
}
|
||||
|
||||
/*
|
||||
* NMI-safe method to read a local event, that is an event that
|
||||
* is:
|
||||
@ -4444,9 +4384,10 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
|
||||
|
||||
*value = local64_read(&event->count);
|
||||
if (enabled || running) {
|
||||
u64 __enabled, __running, __now;;
|
||||
u64 now = event->shadow_ctx_time + perf_clock();
|
||||
u64 __enabled, __running;
|
||||
|
||||
calc_timer_values(event, &__now, &__enabled, &__running);
|
||||
__perf_update_times(event, now, &__enabled, &__running);
|
||||
if (enabled)
|
||||
*enabled = __enabled;
|
||||
if (running)
|
||||
@ -5754,6 +5695,18 @@ static int perf_event_index(struct perf_event *event)
|
||||
return event->pmu->event_idx(event);
|
||||
}
|
||||
|
||||
static void calc_timer_values(struct perf_event *event,
|
||||
u64 *now,
|
||||
u64 *enabled,
|
||||
u64 *running)
|
||||
{
|
||||
u64 ctx_time;
|
||||
|
||||
*now = perf_clock();
|
||||
ctx_time = event->shadow_ctx_time + *now;
|
||||
__perf_update_times(event, ctx_time, enabled, running);
|
||||
}
|
||||
|
||||
static void perf_event_init_userpage(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_mmap_page *userpg;
|
||||
@ -6293,6 +6246,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
ring_buffer_attach(event, rb);
|
||||
|
||||
perf_event_update_time(event);
|
||||
perf_set_shadow_time(event, event->ctx);
|
||||
perf_event_init_userpage(event);
|
||||
perf_event_update_userpage(event);
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user