perf: add hotplug support

The change is a squash of the following two commits:

1) enable perf to continue across hotplug:
Currently perf hardware, software and tracepoint events are
deleted when a cpu is hotplugged out. This change restarts
the events after hotplug. In arm_pmu.c most of the code
for handline power collapse is reused for hotplug.
This change supercedes commit 1f0f95c5fe9e ("perf: add hotplug
support so that perf continues after hotplug") and uses the
new hotplug notification method.

2) disable perf_event_read during hotplug:
core.c should not allow perf_event_read access during hotplug.
DCVS may try to read events during hotplug startup or
shutdown. Set a flag to not allow access during hotplug.

Change-Id: I3c5f1f532d451a096d2d3ee976e0a15fca826e8b
Signed-off-by: Patrick Fay <pfay@codeaurora.org>
[rananta@codeaurora.org: resolved trivial conflicts]
Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
This commit is contained in:
Raghavendra Rao Ananta 2017-10-30 11:17:37 -07:00
parent e0a2db2172
commit a7db37ad8d
5 changed files with 277 additions and 79 deletions

View File

@ -29,6 +29,8 @@
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
static DEFINE_PER_CPU(bool, is_hotplugging);
/*
* ARMv8 PMUv3 Performance Events handling code.
* Common event types (some are defined in asm/perf_event.h).
@ -942,6 +944,9 @@ static void armv8pmu_idle_update(struct arm_pmu *cpu_pmu)
if (!cpu_pmu)
return;
if (__this_cpu_read(is_hotplugging))
return;
hw_events = this_cpu_ptr(cpu_pmu->hw_events);
if (!hw_events)
@ -995,7 +1000,6 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
pmu_idle_nb->cpu_pmu = cpu_pmu;
pmu_idle_nb->perf_cpu_idle_nb.notifier_call = perf_cpu_idle_notifier;
idle_notifier_register(&pmu_idle_nb->perf_cpu_idle_nb);
ret = smp_call_function_any(&cpu_pmu->supported_cpus,
__armv8pmu_probe_pmu,
@ -1176,6 +1180,37 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{},
};
#ifdef CONFIG_HOTPLUG_CPU
static int perf_event_hotplug_coming_up(unsigned int cpu)
{
per_cpu(is_hotplugging, cpu) = false;
return 0;
}
static int perf_event_hotplug_going_down(unsigned int cpu)
{
per_cpu(is_hotplugging, cpu) = true;
return 0;
}
static int perf_event_cpu_hp_init(void)
{
int ret;
ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ONLINE,
"PERF_EVENT/CPUHP_AP_PERF_ONLINE",
perf_event_hotplug_coming_up,
perf_event_hotplug_going_down);
if (ret)
pr_err("CPU hotplug notifier for perf_event.c could not be registered: %d\n",
ret);
return ret;
}
#else
static int perf_event_cpu_hp_init(void) { return 0; }
#endif
/*
* Non DT systems have their micro/arch events probed at run-time.
* A fairly complete list of generic events are provided and ones that
@ -1188,7 +1223,14 @@ static const struct pmu_probe_info armv8_pmu_probe_table[] = {
static int armv8_pmu_device_probe(struct platform_device *pdev)
{
int ret;
int ret, cpu;
for_each_possible_cpu(cpu)
per_cpu(is_hotplugging, cpu) = false;
ret = perf_event_cpu_hp_init();
if (ret)
return ret;
/* set to true so armv8pmu_idle_update doesn't try to load
* hw_events before arm_pmu_device_probe has initialized it.

View File

@ -26,6 +26,9 @@
#include <asm/irq_regs.h>
#define USE_CPUHP_STATE CPUHP_AP_PERF_ARM_STARTING
#define USE_CPUHP_STR "AP_PERF_ARM_STARTING"
static int
armpmu_map_cache_event(const unsigned (*cache_map)
[PERF_COUNT_HW_CACHE_MAX]
@ -539,13 +542,18 @@ void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
return;
armpmu->pmu_state = ARM_PMU_STATE_GOING_DOWN;
if (irq_is_percpu(irq)) {
free_percpu_irq(irq, &hw_events->percpu_pmu);
cpumask_clear(&armpmu->active_irqs);
armpmu->percpu_irq = -1;
armpmu->pmu_state = ARM_PMU_STATE_OFF;
return;
}
free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
armpmu->pmu_state = ARM_PMU_STATE_OFF;
}
void armpmu_free_irqs(struct arm_pmu *armpmu)
@ -568,6 +576,7 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
if (irq_is_percpu(irq) && cpumask_empty(&armpmu->active_irqs)) {
err = request_percpu_irq(irq, handler, "arm-pmu",
&hw_events->percpu_pmu);
armpmu->percpu_irq = irq;
} else if (irq_is_percpu(irq)) {
int other_cpu = cpumask_first(&armpmu->active_irqs);
int other_irq = per_cpu(hw_events->irq, other_cpu);
@ -604,6 +613,8 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
if (err)
goto err_out;
armpmu->pmu_state = ARM_PMU_STATE_RUNNING;
cpumask_set_cpu(cpu, &armpmu->active_irqs);
return 0;
@ -625,53 +636,12 @@ int armpmu_request_irqs(struct arm_pmu *armpmu)
return err;
}
static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
{
struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
return per_cpu(hw_events->irq, cpu);
}
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
* UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
* junk values out of them.
*/
static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
{
struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
int irq;
if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
return 0;
if (pmu->reset)
pmu->reset(pmu);
irq = armpmu_get_cpu_irq(pmu, cpu);
if (irq) {
if (irq_is_percpu(irq)) {
enable_percpu_irq(irq, IRQ_TYPE_NONE);
return 0;
}
}
return 0;
}
static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
{
struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
int irq;
if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
return 0;
irq = armpmu_get_cpu_irq(pmu, cpu);
if (irq && irq_is_percpu(irq))
disable_percpu_irq(irq);
return 0;
}
struct cpu_pm_pmu_args {
struct arm_pmu *armpmu;
unsigned long cmd;
int cpu;
int ret;
};
#ifdef CONFIG_CPU_PM
static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
@ -719,15 +689,19 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
}
}
static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
void *v)
static void cpu_pm_pmu_common(void *info)
{
struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
struct cpu_pm_pmu_args *data = info;
struct arm_pmu *armpmu = data->armpmu;
unsigned long cmd = data->cmd;
int cpu = data->cpu;
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
return NOTIFY_DONE;
if (!cpumask_test_cpu(cpu, &armpmu->supported_cpus)) {
data->ret = NOTIFY_DONE;
return;
}
/*
* Always reset the PMU registers on power-up even if
@ -736,8 +710,12 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
if (cmd == CPU_PM_EXIT && armpmu->reset)
armpmu->reset(armpmu);
if (!enabled)
return NOTIFY_OK;
if (!enabled) {
data->ret = NOTIFY_OK;
return;
}
data->ret = NOTIFY_OK;
switch (cmd) {
case CPU_PM_ENTER:
@ -745,15 +723,29 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
cpu_pm_pmu_setup(armpmu, cmd);
break;
case CPU_PM_EXIT:
cpu_pm_pmu_setup(armpmu, cmd);
case CPU_PM_ENTER_FAILED:
cpu_pm_pmu_setup(armpmu, cmd);
armpmu->start(armpmu);
break;
default:
return NOTIFY_DONE;
data->ret = NOTIFY_DONE;
break;
}
return NOTIFY_OK;
return;
}
static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
void *v)
{
struct cpu_pm_pmu_args data = {
.armpmu = container_of(b, struct arm_pmu, cpu_pm_nb),
.cmd = cmd,
.cpu = smp_processor_id(),
};
cpu_pm_pmu_common(&data);
return data.ret;
}
static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
@ -766,11 +758,75 @@ static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
{
cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
}
#else
static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
static void cpu_pm_pmu_common(void *info) { }
#endif
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
* UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
* junk values out of them.
*/
static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
{
struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
struct cpu_pm_pmu_args data = {
.armpmu = pmu,
.cpu = (int)cpu,
};
if (!pmu || !cpumask_test_cpu(cpu, &pmu->supported_cpus))
return 0;
data.cmd = CPU_PM_EXIT;
cpu_pm_pmu_common(&data);
if (data.ret == NOTIFY_DONE)
return 0;
if (data.armpmu->pmu_state != ARM_PMU_STATE_OFF &&
data.armpmu->plat_device) {
int irq = data.armpmu->percpu_irq;
if (irq > 0 && irq_is_percpu(irq))
enable_percpu_irq(irq, IRQ_TYPE_NONE);
}
return 0;
}
static int arm_perf_stopping_cpu(unsigned int cpu, struct hlist_node *node)
{
struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
struct cpu_pm_pmu_args data = {
.armpmu = pmu,
.cpu = (int)cpu,
};
if (!pmu || !cpumask_test_cpu(cpu, &pmu->supported_cpus))
return 0;
data.cmd = CPU_PM_ENTER;
cpu_pm_pmu_common(&data);
/* Disarm the PMU IRQ before disappearing. */
if (data.armpmu->pmu_state == ARM_PMU_STATE_RUNNING &&
data.armpmu->plat_device) {
int irq = data.armpmu->percpu_irq;
if (irq > 0 && irq_is_percpu(irq))
disable_percpu_irq(irq);
}
return 0;
}
static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
{
int err;
@ -782,12 +838,12 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
err = cpu_pm_pmu_register(cpu_pmu);
if (err)
goto out_unregister;
goto out_unreg_perf_starting;
return 0;
out_unregister:
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
out_unreg_perf_starting:
cpuhp_state_remove_instance_nocalls(USE_CPUHP_STATE,
&cpu_pmu->node);
out:
return err;
@ -796,7 +852,7 @@ out:
static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
{
cpu_pm_pmu_unregister(cpu_pmu);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
cpuhp_state_remove_instance_nocalls(USE_CPUHP_STATE,
&cpu_pmu->node);
}
@ -818,16 +874,16 @@ struct arm_pmu *armpmu_alloc(void)
}
pmu->pmu = (struct pmu) {
.pmu_enable = armpmu_enable,
.pmu_disable = armpmu_disable,
.event_init = armpmu_event_init,
.add = armpmu_add,
.del = armpmu_del,
.start = armpmu_start,
.stop = armpmu_stop,
.read = armpmu_read,
.filter_match = armpmu_filter_match,
.attr_groups = pmu->attr_groups,
.pmu_enable = armpmu_enable,
.pmu_disable = armpmu_disable,
.event_init = armpmu_event_init,
.add = armpmu_add,
.del = armpmu_del,
.start = armpmu_start,
.stop = armpmu_stop,
.read = armpmu_read,
.filter_match = armpmu_filter_match,
.attr_groups = pmu->attr_groups,
/*
* This is a CPU PMU potentially in a heterogeneous
* configuration (e.g. big.LITTLE). This is not an uncore PMU,
@ -835,7 +891,8 @@ struct arm_pmu *armpmu_alloc(void)
* pmu::filter_match callback and pmu::event_init group
* validation).
*/
.capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS,
.capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS,
.events_across_hotplug = 1,
};
pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
@ -878,6 +935,9 @@ int armpmu_register(struct arm_pmu *pmu)
if (!__oprofile_cpu_pmu)
__oprofile_cpu_pmu = pmu;
pmu->pmu_state = ARM_PMU_STATE_OFF;
pmu->percpu_irq = -1;
pr_info("enabled with %s PMU driver, %d counters available\n",
pmu->name, pmu->num_events);
@ -895,9 +955,9 @@ static int arm_pmu_hp_init(void)
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
"perf/arm/pmu:starting",
arm_perf_starting_cpu,
arm_perf_teardown_cpu);
arm_perf_stopping_cpu);
if (ret)
pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
pr_err("CPU hotplug ARM PMU STOPPING registering failed: %d\n",
ret);
return ret;
}

View File

@ -90,6 +90,12 @@ enum armpmu_attr_groups {
ARMPMU_NR_ATTR_GROUPS
};
enum armpmu_pmu_states {
ARM_PMU_STATE_OFF,
ARM_PMU_STATE_RUNNING,
ARM_PMU_STATE_GOING_DOWN,
};
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
@ -111,6 +117,8 @@ struct arm_pmu {
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
int num_events;
int pmu_state;
int percpu_irq;
u64 max_period;
bool secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40

View File

@ -266,6 +266,8 @@ struct pmu {
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
u32 events_across_hotplug:1,
reserved:31;
/* number of address filters this PMU can do */
unsigned int nr_addr_filters;

View File

@ -379,6 +379,7 @@ static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static DEFINE_PER_CPU(bool, is_idle);
static DEFINE_PER_CPU(bool, is_hotplugging);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@ -3632,6 +3633,9 @@ static void __perf_event_read(void *info)
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct pmu *pmu = event->pmu;
if (__this_cpu_read(is_hotplugging))
return;
/*
* If this is a task context, we need to check whether it is
* the current task context of this cpu. If not it has been
@ -3757,7 +3761,8 @@ static int perf_event_read(struct perf_event *event, bool group)
return 0;
if (cpu_isolated(event_cpu) ||
(event->attr.exclude_idle &&
per_cpu(is_idle, event_cpu)))
per_cpu(is_idle, event_cpu)) ||
per_cpu(is_hotplugging, event_cpu))
active_event_skip_read = true;
}
@ -3787,7 +3792,8 @@ static int perf_event_read(struct perf_event *event, bool group)
preempt_enable();
ret = data.ret;
} else if (event->state == PERF_EVENT_STATE_INACTIVE ||
active_event_skip_read) {
(active_event_skip_read &&
!per_cpu(is_hotplugging, event_cpu))) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
@ -7923,6 +7929,7 @@ static struct pmu perf_swevent = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
.events_across_hotplug = 1,
};
#ifdef CONFIG_EVENT_TRACING
@ -8072,6 +8079,7 @@ static struct pmu perf_tracepoint = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
.events_across_hotplug = 1,
};
static inline void perf_tp_register(void)
@ -8816,6 +8824,7 @@ static struct pmu perf_cpu_clock = {
.start = cpu_clock_event_start,
.stop = cpu_clock_event_stop,
.read = cpu_clock_event_read,
.events_across_hotplug = 1,
};
/*
@ -8897,6 +8906,7 @@ static struct pmu perf_task_clock = {
.start = task_clock_event_start,
.stop = task_clock_event_stop,
.read = task_clock_event_read,
.events_across_hotplug = 1,
};
static void perf_pmu_nop_void(struct pmu *pmu)
@ -11098,6 +11108,8 @@ static void __init perf_event_init_all_cpus(void)
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
per_cpu(is_hotplugging, cpu) = false;
per_cpu(is_idle, cpu) = false;
}
}
@ -11117,6 +11129,59 @@ void perf_swevent_init_cpu(unsigned int cpu)
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void
check_hotplug_start_event(struct perf_event *event)
{
if (event->pmu->events_across_hotplug &&
event->attr.type == PERF_TYPE_SOFTWARE &&
event->pmu->start)
event->pmu->start(event, 0);
}
static int perf_event_start_swevents(unsigned int cpu)
{
struct perf_event_context *ctx;
struct pmu *pmu;
struct perf_event *event;
int idx;
idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
mutex_lock(&ctx->mutex);
raw_spin_lock(&ctx->lock);
list_for_each_entry(event, &ctx->event_list, event_entry)
check_hotplug_start_event(event);
raw_spin_unlock(&ctx->lock);
mutex_unlock(&ctx->mutex);
}
srcu_read_unlock(&pmus_srcu, idx);
per_cpu(is_hotplugging, cpu) = false;
return 0;
}
/*
* If keeping events across hotplugging is supported, do not
* remove the event list so event lives beyond CPU hotplug.
* The context is exited via an fd close path when userspace
* is done and the target CPU is online. If software clock
* event is active, then stop hrtimer associated with it.
* Start the timer when the CPU comes back online.
*/
static void
check_hotplug_remove_from_context(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
if (event->pmu->events_across_hotplug &&
event->attr.type == PERF_TYPE_SOFTWARE &&
event->pmu->stop)
event->pmu->stop(event, PERF_EF_UPDATE);
else if (!event->pmu->events_across_hotplug)
__perf_remove_from_context(event, cpuctx,
ctx, (void *)DETACH_GROUP);
}
static void __perf_event_exit_context(void *__info)
{
struct perf_event_context *ctx = __info;
@ -11125,7 +11190,7 @@ static void __perf_event_exit_context(void *__info)
raw_spin_lock(&ctx->lock);
list_for_each_entry(event, &ctx->event_list, event_entry)
__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
check_hotplug_remove_from_context(event, cpuctx, ctx);
raw_spin_unlock(&ctx->lock);
}
@ -11179,6 +11244,7 @@ int perf_event_init_cpu(unsigned int cpu)
int perf_event_exit_cpu(unsigned int cpu)
{
per_cpu(is_hotplugging, cpu) = true;
perf_event_exit_cpu_context(cpu);
return 0;
}
@ -11222,6 +11288,24 @@ static struct notifier_block perf_event_idle_nb = {
.notifier_call = event_idle_notif,
};
#ifdef CONFIG_HOTPLUG_CPU
static int perf_cpu_hp_init(void)
{
int ret;
ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ONLINE,
"PERF/CORE/CPUHP_AP_PERF_ONLINE",
perf_event_start_swevents,
perf_event_exit_cpu);
if (ret)
pr_err("CPU hotplug notifier for perf core could not be registered: %d\n",
ret);
return ret;
}
#else
static int perf_cpu_hp_init(void) { return 0; }
#endif
void __init perf_event_init(void)
{
@ -11238,6 +11322,8 @@ void __init perf_event_init(void)
perf_event_init_cpu(smp_processor_id());
idle_notifier_register(&perf_event_idle_nb);
register_reboot_notifier(&perf_reboot_notifier);
ret = perf_cpu_hp_init();
WARN(ret, "core perf_cpu_hp_init() failed with: %d", ret);
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);