perf_counter: revamp syscall input ABI

Impact: modify ABI

The hardware/software classification in hw_event->type became a little
strained due to the addition of tracepoint tracing.

Instead split up the field and provide a type field to explicitly specify
the counter type, while using the event_id field to specify which event to
use.

Raw counters still work as before, only the raw config now goes into
raw_event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.836807573@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Zijlstra 2009-03-19 20:26:18 +01:00 committed by Ingo Molnar
parent e077df4f43
commit b8e83514b6
4 changed files with 117 additions and 75 deletions

View file

@ -602,7 +602,7 @@ hw_perf_counter_init(struct perf_counter *counter)
return NULL; return NULL;
if ((s64)counter->hw_event.irq_period < 0) if ((s64)counter->hw_event.irq_period < 0)
return NULL; return NULL;
ev = counter->hw_event.type; ev = counter->hw_event.event_id;
if (!counter->hw_event.raw) { if (!counter->hw_event.raw) {
if (ev >= ppmu->n_generic || if (ev >= ppmu->n_generic ||
ppmu->generic_events[ev] == 0) ppmu->generic_events[ev] == 0)
@ -692,7 +692,7 @@ static void perf_handle_group(struct perf_counter *counter)
list_for_each_entry(sub, &leader->sibling_list, list_entry) { list_for_each_entry(sub, &leader->sibling_list, list_entry) {
if (sub != counter) if (sub != counter)
sub->hw_ops->read(sub); sub->hw_ops->read(sub);
perf_store_irq_data(counter, sub->hw_event.type); perf_store_irq_data(counter, sub->hw_event.event_config);
perf_store_irq_data(counter, atomic64_read(&sub->count)); perf_store_irq_data(counter, atomic64_read(&sub->count));
} }
} }

View file

@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
/* /*
* Raw event type provide the config in the event structure * Raw event type provide the config in the event structure
*/ */
if (hw_event->raw) { if (hw_event->raw_type) {
hwc->config |= pmc_ops->raw_event(hw_event->type); hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id);
} else { } else {
if (hw_event->type >= pmc_ops->max_events) if (hw_event->event_id >= pmc_ops->max_events)
return -EINVAL; return -EINVAL;
/* /*
* The generic map: * The generic map:
*/ */
hwc->config |= pmc_ops->event_map(hw_event->type); hwc->config |= pmc_ops->event_map(hw_event->event_id);
} }
counter->wakeup_pending = 0; counter->wakeup_pending = 0;
@ -715,7 +715,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
perf_store_irq_data(sibling, counter->hw_event.type); perf_store_irq_data(sibling, counter->hw_event.event_config);
perf_store_irq_data(sibling, atomic64_read(&counter->count)); perf_store_irq_data(sibling, atomic64_read(&counter->count));
} }
} }

View file

@ -21,56 +21,81 @@
*/ */
/* /*
* Generalized performance counter event types, used by the hw_event.type * hw_event.type
*/
enum perf_event_types {
PERF_TYPE_HARDWARE = 0,
PERF_TYPE_SOFTWARE = 1,
PERF_TYPE_TRACEPOINT = 2,
/*
* available TYPE space, raw is the max value.
*/
PERF_TYPE_RAW = 128,
};
/*
* Generalized performance counter event types, used by the hw_event.event_id
* parameter of the sys_perf_counter_open() syscall: * parameter of the sys_perf_counter_open() syscall:
*/ */
enum hw_event_types { enum hw_event_ids {
/* /*
* Common hardware events, generalized by the kernel: * Common hardware events, generalized by the kernel:
*/ */
PERF_COUNT_CPU_CYCLES = 0, PERF_COUNT_CPU_CYCLES = 0,
PERF_COUNT_INSTRUCTIONS = 1, PERF_COUNT_INSTRUCTIONS = 1,
PERF_COUNT_CACHE_REFERENCES = 2, PERF_COUNT_CACHE_REFERENCES = 2,
PERF_COUNT_CACHE_MISSES = 3, PERF_COUNT_CACHE_MISSES = 3,
PERF_COUNT_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_BRANCH_MISSES = 5, PERF_COUNT_BRANCH_MISSES = 5,
PERF_COUNT_BUS_CYCLES = 6, PERF_COUNT_BUS_CYCLES = 6,
PERF_HW_EVENTS_MAX = 7, PERF_HW_EVENTS_MAX = 7,
};
/* /*
* Special "software" counters provided by the kernel, even if * Special "software" counters provided by the kernel, even if the hardware
* the hardware does not support performance counters. These * does not support performance counters. These counters measure various
* counters measure various physical and sw events of the * physical and sw events of the kernel (and allow the profiling of them as
* kernel (and allow the profiling of them as well): * well):
*/ */
PERF_COUNT_CPU_CLOCK = -1, enum sw_event_ids {
PERF_COUNT_TASK_CLOCK = -2, PERF_COUNT_CPU_CLOCK = 0,
PERF_COUNT_PAGE_FAULTS = -3, PERF_COUNT_TASK_CLOCK = 1,
PERF_COUNT_CONTEXT_SWITCHES = -4, PERF_COUNT_PAGE_FAULTS = 2,
PERF_COUNT_CPU_MIGRATIONS = -5, PERF_COUNT_CONTEXT_SWITCHES = 3,
PERF_COUNT_PAGE_FAULTS_MIN = -6, PERF_COUNT_CPU_MIGRATIONS = 4,
PERF_COUNT_PAGE_FAULTS_MAJ = -7, PERF_COUNT_PAGE_FAULTS_MIN = 5,
PERF_COUNT_PAGE_FAULTS_MAJ = 6,
PERF_SW_EVENTS_MIN = -8, PERF_SW_EVENTS_MAX = 7,
PERF_TP_EVENTS_MIN = -65536
}; };
/* /*
* IRQ-notification data record type: * IRQ-notification data record type:
*/ */
enum perf_counter_record_type { enum perf_counter_record_type {
PERF_RECORD_SIMPLE = 0, PERF_RECORD_SIMPLE = 0,
PERF_RECORD_IRQ = 1, PERF_RECORD_IRQ = 1,
PERF_RECORD_GROUP = 2, PERF_RECORD_GROUP = 2,
}; };
/* /*
* Hardware event to monitor via a performance monitoring counter: * Hardware event to monitor via a performance monitoring counter:
*/ */
struct perf_counter_hw_event { struct perf_counter_hw_event {
__s64 type; union {
struct {
__u64 event_id : 56,
type : 8;
};
struct {
__u64 raw_event_id : 63,
raw_type : 1;
};
__u64 event_config;
};
__u64 irq_period; __u64 irq_period;
__u64 record_type; __u64 record_type;
@ -78,7 +103,6 @@ struct perf_counter_hw_event {
__u64 disabled : 1, /* off by default */ __u64 disabled : 1, /* off by default */
nmi : 1, /* NMI sampling */ nmi : 1, /* NMI sampling */
raw : 1, /* raw event type */
inherit : 1, /* children inherit it */ inherit : 1, /* children inherit it */
pinned : 1, /* must always be on PMU */ pinned : 1, /* must always be on PMU */
exclusive : 1, /* only group on PMU */ exclusive : 1, /* only group on PMU */
@ -87,7 +111,7 @@ struct perf_counter_hw_event {
exclude_hv : 1, /* ditto hypervisor */ exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */ exclude_idle : 1, /* don't count when idle */
__reserved_1 : 54; __reserved_1 : 55;
__u32 extra_config_len; __u32 extra_config_len;
__u32 __reserved_4; __u32 __reserved_4;
@ -298,10 +322,11 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
*/ */
static inline int is_software_counter(struct perf_counter *counter) static inline int is_software_counter(struct perf_counter *counter)
{ {
return !counter->hw_event.raw && counter->hw_event.type < 0; return !counter->hw_event.raw_type &&
counter->hw_event.type != PERF_TYPE_HARDWARE;
} }
extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *); extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
#else #else
static inline void static inline void
@ -320,7 +345,7 @@ static inline u64 hw_perf_save_disable(void) { return 0; }
static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_disable(void) { return -EINVAL; }
static inline int perf_counter_task_enable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; }
static inline void perf_swcounter_event(enum hw_event_types event, u64 nr, static inline void perf_swcounter_event(u32 event, u64 nr,
int nmi, struct pt_regs *regs) { } int nmi, struct pt_regs *regs) { }
#endif #endif

View file

@ -1395,12 +1395,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
atomic64_set(&hwc->count, -left); atomic64_set(&hwc->count, -left);
} }
static void perf_swcounter_save_and_restart(struct perf_counter *counter)
{
perf_swcounter_update(counter);
perf_swcounter_set_period(counter);
}
static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data) static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
{ {
struct perf_data *irqdata = counter->irqdata; struct perf_data *irqdata = counter->irqdata;
@ -1421,7 +1415,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling)
list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
counter->hw_ops->read(counter); counter->hw_ops->read(counter);
perf_swcounter_store_irq(sibling, counter->hw_event.type); perf_swcounter_store_irq(sibling, counter->hw_event.event_config);
perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
} }
} }
@ -1477,21 +1471,25 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
static void perf_swcounter_overflow(struct perf_counter *counter, static void perf_swcounter_overflow(struct perf_counter *counter,
int nmi, struct pt_regs *regs) int nmi, struct pt_regs *regs)
{ {
perf_swcounter_save_and_restart(counter); perf_swcounter_update(counter);
perf_swcounter_set_period(counter);
perf_swcounter_interrupt(counter, nmi, regs); perf_swcounter_interrupt(counter, nmi, regs);
} }
static int perf_swcounter_match(struct perf_counter *counter, static int perf_swcounter_match(struct perf_counter *counter,
enum hw_event_types event, enum perf_event_types type,
struct pt_regs *regs) u32 event, struct pt_regs *regs)
{ {
if (counter->state != PERF_COUNTER_STATE_ACTIVE) if (counter->state != PERF_COUNTER_STATE_ACTIVE)
return 0; return 0;
if (counter->hw_event.raw) if (counter->hw_event.raw_type)
return 0; return 0;
if (counter->hw_event.type != event) if (counter->hw_event.type != type)
return 0;
if (counter->hw_event.event_id != event)
return 0; return 0;
if (counter->hw_event.exclude_user && user_mode(regs)) if (counter->hw_event.exclude_user && user_mode(regs))
@ -1512,8 +1510,8 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
} }
static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
enum hw_event_types event, u64 nr, enum perf_event_types type, u32 event,
int nmi, struct pt_regs *regs) u64 nr, int nmi, struct pt_regs *regs)
{ {
struct perf_counter *counter; struct perf_counter *counter;
@ -1522,24 +1520,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
if (perf_swcounter_match(counter, event, regs)) if (perf_swcounter_match(counter, type, event, regs))
perf_swcounter_add(counter, nr, nmi, regs); perf_swcounter_add(counter, nr, nmi, regs);
} }
rcu_read_unlock(); rcu_read_unlock();
} }
void perf_swcounter_event(enum hw_event_types event, u64 nr, static void __perf_swcounter_event(enum perf_event_types type, u32 event,
int nmi, struct pt_regs *regs) u64 nr, int nmi, struct pt_regs *regs)
{ {
struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs); perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
if (cpuctx->task_ctx) if (cpuctx->task_ctx) {
perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs); perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
nr, nmi, regs);
}
put_cpu_var(perf_cpu_context); put_cpu_var(perf_cpu_context);
} }
void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
{
__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
}
static void perf_swcounter_read(struct perf_counter *counter) static void perf_swcounter_read(struct perf_counter *counter)
{ {
perf_swcounter_update(counter); perf_swcounter_update(counter);
@ -1733,8 +1738,12 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
#ifdef CONFIG_EVENT_PROFILE #ifdef CONFIG_EVENT_PROFILE
void perf_tpcounter_event(int event_id) void perf_tpcounter_event(int event_id)
{ {
perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1, struct pt_regs *regs = get_irq_regs();
task_pt_regs(current));
if (!regs)
regs = task_pt_regs(current);
__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
} }
extern int ftrace_profile_enable(int); extern int ftrace_profile_enable(int);
@ -1742,15 +1751,13 @@ extern void ftrace_profile_disable(int);
static void tp_perf_counter_destroy(struct perf_counter *counter) static void tp_perf_counter_destroy(struct perf_counter *counter)
{ {
int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; ftrace_profile_disable(counter->hw_event.event_id);
ftrace_profile_disable(event_id);
} }
static const struct hw_perf_counter_ops * static const struct hw_perf_counter_ops *
tp_perf_counter_init(struct perf_counter *counter) tp_perf_counter_init(struct perf_counter *counter)
{ {
int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; int event_id = counter->hw_event.event_id;
int ret; int ret;
ret = ftrace_profile_enable(event_id); ret = ftrace_profile_enable(event_id);
@ -1758,6 +1765,7 @@ tp_perf_counter_init(struct perf_counter *counter)
return NULL; return NULL;
counter->destroy = tp_perf_counter_destroy; counter->destroy = tp_perf_counter_destroy;
counter->hw.irq_period = counter->hw_event.irq_period;
return &perf_ops_generic; return &perf_ops_generic;
} }
@ -1783,7 +1791,7 @@ sw_perf_counter_init(struct perf_counter *counter)
* to be kernel events, and page faults are never hypervisor * to be kernel events, and page faults are never hypervisor
* events. * events.
*/ */
switch (counter->hw_event.type) { switch (counter->hw_event.event_id) {
case PERF_COUNT_CPU_CLOCK: case PERF_COUNT_CPU_CLOCK:
hw_ops = &perf_ops_cpu_clock; hw_ops = &perf_ops_cpu_clock;
@ -1813,9 +1821,6 @@ sw_perf_counter_init(struct perf_counter *counter)
if (!counter->hw_event.exclude_kernel) if (!counter->hw_event.exclude_kernel)
hw_ops = &perf_ops_cpu_migrations; hw_ops = &perf_ops_cpu_migrations;
break; break;
default:
hw_ops = tp_perf_counter_init(counter);
break;
} }
if (hw_ops) if (hw_ops)
@ -1870,10 +1875,22 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
counter->state = PERF_COUNTER_STATE_OFF; counter->state = PERF_COUNTER_STATE_OFF;
hw_ops = NULL; hw_ops = NULL;
if (!hw_event->raw && hw_event->type < 0)
hw_ops = sw_perf_counter_init(counter); if (hw_event->raw_type)
else
hw_ops = hw_perf_counter_init(counter); hw_ops = hw_perf_counter_init(counter);
else switch (hw_event->type) {
case PERF_TYPE_HARDWARE:
hw_ops = hw_perf_counter_init(counter);
break;
case PERF_TYPE_SOFTWARE:
hw_ops = sw_perf_counter_init(counter);
break;
case PERF_TYPE_TRACEPOINT:
hw_ops = tp_perf_counter_init(counter);
break;
}
if (!hw_ops) { if (!hw_ops) {
kfree(counter); kfree(counter);