Hi, This patch adds Oprofile support on ARMv7, using the PMNC unit. Tested on OMAP3430 SDP. Feedback and comments are welcome. The patch to user space components is attached for reference. It i applies against version 0.9.3 of oprofile source (http://prdownloads.sourceforge.net/oprofile/oprofile-0.9.3.tar.gz). Regards, Jean. --- From: Jean Pihet Date: Tue, 6 May 2008 17:21:44 +0200 Subject: [PATCH] ARM: Add ARMv7 oprofile support Add ARMv7 Oprofile support to kernel Signed-off-by: Jean Pihet --- diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c60a27d..60b50a0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -161,6 +161,11 @@ config OPROFILE_MPCORE config OPROFILE_ARM11_CORE bool +config OPROFILE_ARMV7 + def_bool y + depends on CPU_V7 && !SMP + bool + endif config VECTORS_BASE diff --git a/arch/arm/oprofile/Makefile b/arch/arm/oprofile/Makefile index e61d0cc..88e31f5 100644 --- a/arch/arm/oprofile/Makefile +++ b/arch/arm/oprofile/Makefile @@ -11,3 +11,4 @@ oprofile-$(CONFIG_CPU_XSCALE) += op_model_xscale.o oprofile-$(CONFIG_OPROFILE_ARM11_CORE) += op_model_arm11_core.o oprofile-$(CONFIG_OPROFILE_ARMV6) += op_model_v6.o oprofile-$(CONFIG_OPROFILE_MPCORE) += op_model_mpcore.o +oprofile-$(CONFIG_OPROFILE_ARMV7) += op_model_v7.o diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 0a5cf3a..3fcd752 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -145,6 +145,10 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) spec = &op_mpcore_spec; #endif +#ifdef CONFIG_OPROFILE_ARMV7 + spec = &op_armv7_spec; +#endif + if (spec) { ret = spec->init(); if (ret < 0) diff --git a/arch/arm/oprofile/op_arm_model.h b/arch/arm/oprofile/op_arm_model.h index 4899c62..8c4e4f6 100644 --- a/arch/arm/oprofile/op_arm_model.h +++ b/arch/arm/oprofile/op_arm_model.h @@ -26,6 +26,7 @@ extern struct op_arm_model_spec op_xscale_spec; extern struct op_arm_model_spec op_armv6_spec; extern struct op_arm_model_spec op_mpcore_spec; +extern struct op_arm_model_spec op_armv7_spec; extern void arm_backtrace(struct pt_regs * const regs, unsigned int depth); diff --git a/arch/arm/oprofile/op_model_v7.c b/arch/arm/oprofile/op_model_v7.c new file mode 100644 index 0000000..a159bc1 --- /dev/null +++ b/arch/arm/oprofile/op_model_v7.c @@ -0,0 +1,407 @@ +/** + * @file op_model_v7.c + * ARM V7 (Cortex A8) Event Monitor Driver + * + * @remark Copyright 2008 Jean Pihet + * @remark Copyright 2004 ARM SMP Development Team + */ +#include +#include +#include +#include +#include +#include + +#include "op_counter.h" +#include "op_arm_model.h" +#include "op_model_v7.h" + +/* #define DEBUG */ + + +/* + * ARM V7 PMNC support + */ + +static u32 cnt_en[CNTMAX]; + +static inline void armv7_pmnc_write(u32 val) +{ + val &= PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val)); +} + +static inline u32 armv7_pmnc_read(void) +{ + u32 val; + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + return val; +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int cnt) +{ + u32 val; + + if (cnt >= CNTMAX) { + printk(KERN_ERR "oprofile: CPU%u enabling wrong PMNC counter" + " %d\n", smp_processor_id(), cnt); + return -1; + } + + if (cnt == CCNT) + val = CNTENS_C; + else + val = (1 << (cnt - CNT0)); + + val &= CNTENS_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return cnt; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int cnt) +{ + u32 val; + + if (cnt >= CNTMAX) { + printk(KERN_ERR "oprofile: CPU%u disabling wrong PMNC counter" + " %d\n", smp_processor_id(), cnt); + return -1; + } + + if (cnt == CCNT) + val = CNTENC_C; + else + val = (1 << (cnt - CNT0)); + + val &= CNTENC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return cnt; +} + +static inline u32 armv7_pmnc_enable_intens(unsigned int cnt) +{ + u32 val; + + if (cnt >= CNTMAX) { + printk(KERN_ERR "oprofile: CPU%u enabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), cnt); + return -1; + } + + if (cnt == CCNT) + val = INTENS_C; + else + val = (1 << (cnt - CNT0)); + + val &= INTENS_MASK; + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); + + return cnt; +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +static inline int armv7_pmnc_select_counter(unsigned int cnt) +{ + u32 val; + + if ((cnt == CCNT) || (cnt >= CNTMAX)) { + printk(KERN_ERR "oprofile: CPU%u selecting wrong PMNC counteri" + " %d\n", smp_processor_id(), cnt); + return -1; + } + + val = (cnt - CNT0) & SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return cnt; +} + +static inline void armv7_pmnc_write_evtsel(unsigned int cnt, u32 val) +{ + if (armv7_pmnc_select_counter(cnt) == cnt) { + val &= EVTSEL_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static void armv7_pmnc_reset_counter(unsigned int cnt) +{ + u32 cpu_cnt = CPU_COUNTER(smp_processor_id(), cnt); + u32 val = -(u32)counter_config[cpu_cnt].count; + + switch (cnt) { + case CCNT: + armv7_pmnc_disable_counter(cnt); + + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (val)); + + if (cnt_en[cnt] != 0) + armv7_pmnc_enable_counter(cnt); + + break; + + case CNT0: + case CNT1: + case CNT2: + case CNT3: + armv7_pmnc_disable_counter(cnt); + + if (armv7_pmnc_select_counter(cnt) == cnt) + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (val)); + + if (cnt_en[cnt] != 0) + armv7_pmnc_enable_counter(cnt); + + break; + + default: + printk(KERN_ERR "oprofile: CPU%u resetting wrong PMNC counter" + " %d\n", smp_processor_id(), cnt); + break; + } +} + +int armv7_setup_pmnc(void) +{ + unsigned int cnt; + + if (armv7_pmnc_read() & PMNC_E) { + printk(KERN_ERR "oprofile: CPU%u PMNC still enabled when setup" + " new event counter.\n", smp_processor_id()); + return -EBUSY; + } + + /* + * Initialize & Reset PMNC: C bit, D bit and P bit. + * Note: Using a slower count for CCNT (D bit: divide by 64) results + * in a more stable system + */ + armv7_pmnc_write(PMNC_P | PMNC_C | PMNC_D); + + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + unsigned long event; + u32 cpu_cnt = CPU_COUNTER(smp_processor_id(), cnt); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(cnt); + cnt_en[cnt] = 0; + + if (!counter_config[cpu_cnt].enabled) + continue; + + event = counter_config[cpu_cnt].event & 255; + + /* + * Set event (if destined for PMNx counters) + * We don't need to set the event if it's a cycle count + */ + if (cnt != CCNT) + armv7_pmnc_write_evtsel(cnt, event); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(cnt); + + /* + * Reset counter + */ + armv7_pmnc_reset_counter(cnt); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(cnt); + cnt_en[cnt] = 1; + } + + return 0; +} + +static inline void armv7_start_pmnc(void) +{ + armv7_pmnc_write(armv7_pmnc_read() | PMNC_E); +} + +static inline void armv7_stop_pmnc(void) +{ + armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E); +} + +/* + * CPU counters' IRQ handler (one IRQ per CPU) + */ +static irqreturn_t armv7_pmnc_interrupt(int irq, void *arg) +{ + struct pt_regs *regs = get_irq_regs(); + unsigned int cnt; + u32 flags; + + + /* + * Stop IRQ generation + */ + armv7_stop_pmnc(); + + /* + * Get and reset overflow status flags + */ + flags = armv7_pmnc_getreset_flags(); + + /* + * Cycle counter + */ + if (flags & FLAG_C) { + u32 cpu_cnt = CPU_COUNTER(smp_processor_id(), CCNT); + armv7_pmnc_reset_counter(CCNT); + oprofile_add_sample(regs, cpu_cnt); + } + + /* + * PMNC counters 0:3 + */ + for (cnt = CNT0; cnt < CNTMAX; cnt++) { + if (flags & (1 << (cnt - CNT0))) { + u32 cpu_cnt = CPU_COUNTER(smp_processor_id(), cnt); + armv7_pmnc_reset_counter(cnt); + oprofile_add_sample(regs, cpu_cnt); + } + } + + /* + * Allow IRQ generation + */ + armv7_start_pmnc(); + + return IRQ_HANDLED; +} + +int armv7_request_interrupts(int *irqs, int nr) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < nr; i++) { + ret = request_irq(irqs[i], armv7_pmnc_interrupt, + IRQF_DISABLED, "CP15 PMNC", NULL); + if (ret != 0) { + printk(KERN_ERR "oprofile: unable to request IRQ%u" + " for ARMv7\n", + irqs[i]); + break; + } + } + + if (i != nr) + while (i-- != 0) + free_irq(irqs[i], NULL); + + return ret; +} + +void armv7_release_interrupts(int *irqs, int nr) +{ + unsigned int i; + + for (i = 0; i < nr; i++) + free_irq(irqs[i], NULL); +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(void) +{ + u32 val; + unsigned int cnt; + + printk(KERN_INFO "PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + printk(KERN_INFO "PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + printk(KERN_INFO "CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + printk(KERN_INFO "INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + printk(KERN_INFO "FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + printk(KERN_INFO "SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + printk(KERN_INFO "CCNT =0x%08x\n", val); + + for (cnt = CNT0; cnt < CNTMAX; cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + printk(KERN_INFO "CNT[%d] count =0x%08x\n", cnt-CNT0, val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", cnt-CNT0, val); + } +} +#endif + + +static int irqs[] = { +#ifdef CONFIG_ARCH_OMAP3 + INT_34XX_BENCH_MPU_EMUL, +#endif +}; + +static void armv7_pmnc_stop(void) +{ +#ifdef DEBUG + armv7_pmnc_dump_regs(); +#endif + armv7_stop_pmnc(); + armv7_release_interrupts(irqs, ARRAY_SIZE(irqs)); +} + +static int armv7_pmnc_start(void) +{ + int ret; + +#ifdef DEBUG + armv7_pmnc_dump_regs(); +#endif + ret = armv7_request_interrupts(irqs, ARRAY_SIZE(irqs)); + if (ret >= 0) + armv7_start_pmnc(); + + return ret; +} + +static int armv7_detect_pmnc(void) +{ + return 0; +} + +struct op_arm_model_spec op_armv7_spec = { + .init = armv7_detect_pmnc, + .num_counters = 5, + .setup_ctrs = armv7_setup_pmnc, + .start = armv7_pmnc_start, + .stop = armv7_pmnc_stop, + .name = "arm/armv7", +}; diff --git a/arch/arm/oprofile/op_model_v7.h b/arch/arm/oprofile/op_model_v7.h new file mode 100644 index 0000000..08f40ea --- /dev/null +++ b/arch/arm/oprofile/op_model_v7.h @@ -0,0 +1,101 @@ +/** + * @file op_model_v7.h + * ARM v7 (Cortex A8) Event Monitor Driver + * + * @remark Copyright 2008 Jean Pihet + * @remark Copyright 2004 ARM SMP Development Team + * @remark Copyright 2000-2004 Deepak Saxena + * @remark Copyright 2000-2004 MontaVista Software Inc + * @remark Copyright 2004 Dave Jiang + * @remark Copyright 2004 Intel Corporation + * @remark Copyright 2004 Zwane Mwaikambo + * @remark Copyright 2004 Oprofile Authors + * + * @remark Read the file COPYING + * + * @author Zwane Mwaikambo + */ +#ifndef OP_MODEL_V7_H +#define OP_MODEL_V7_H + +/* + * Per-CPU PMNC: config reg + */ +#define PMNC_E (1 << 0) /* Enable all counters */ +#define PMNC_P (1 << 1) /* Reset all counters */ +#define PMNC_C (1 << 2) /* Cycle counter reset */ +#define PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define PMNC_X (1 << 4) /* Export to ETM */ +#define PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * Available counters + */ +#define CCNT 0 +#define CNT0 1 +#define CNT1 2 +#define CNT2 3 +#define CNT3 4 +#define CNTMAX 5 + +#define CPU_COUNTER(cpu, counter) ((cpu) * CNTMAX + (counter)) + +/* + * CNTENS: counters enable reg + */ +#define CNTENS_P0 (1 << 0) +#define CNTENS_P1 (1 << 1) +#define CNTENS_P2 (1 << 2) +#define CNTENS_P3 (1 << 3) +#define CNTENS_C (1 << 31) +#define CNTENS_MASK 0x8000000f /* Mask for writable bits */ + +/* + * CNTENC: counters disable reg + */ +#define CNTENC_P0 (1 << 0) +#define CNTENC_P1 (1 << 1) +#define CNTENC_P2 (1 << 2) +#define CNTENC_P3 (1 << 3) +#define CNTENC_C (1 << 31) +#define CNTENC_MASK 0x8000000f /* Mask for writable bits */ + +/* + * INTENS: counters overflow interrupt enable reg + */ +#define INTENS_P0 (1 << 0) +#define INTENS_P1 (1 << 1) +#define INTENS_P2 (1 << 2) +#define INTENS_P3 (1 << 3) +#define INTENS_C (1 << 31) +#define INTENS_MASK 0x8000000f /* Mask for writable bits */ + +/* + * EVTSEL: Event selection reg + */ +#define EVTSEL_MASK 0x7f /* Mask for writable bits */ + +/* + * SELECT: Counter selection reg + */ +#define SELECT_MASK 0x1f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define FLAG_P0 (1 << 0) +#define FLAG_P1 (1 << 1) +#define FLAG_P2 (1 << 2) +#define FLAG_P3 (1 << 3) +#define FLAG_C (1 << 31) +#define FLAG_MASK 0x8000000f /* Mask for writable bits */ + + +int armv7_setup_pmu(void); +int armv7_start_pmu(void); +int armv7_stop_pmu(void); +int armv7_request_interrupts(int *, int); +void armv7_release_interrupts(int *, int); + +#endif