qos: Replace cpumasks with atomic bitwise operations

This is expensive and unneeded, especially since the qos functions are
hot code paths. Mose of the cpumask functions use the bitmap API, which
is also more expensive than just doing some simple operations on a word.

Since we're operating with a CPU count that can fit within a word,
replace the expensive cpumask operations with raw bitwise operations
wherever possible to make the pm_qos framework more efficient.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
Signed-off-by: azrim <mirzaspc@gmail.com>
This commit is contained in:
Sultan Alsawaf 2020-12-26 12:40:57 -08:00 committed by azrim
parent 6cbd8dba73
commit 7978c43d40
No known key found for this signature in database
GPG Key ID: 497F8FB059B45D1C
4 changed files with 26 additions and 37 deletions

View File

@ -304,7 +304,6 @@ static void _sde_encoder_pm_qos_add_request(struct drm_encoder *drm_enc,
struct pm_qos_request *req;
u32 cpu_mask;
u32 cpu_dma_latency;
int cpu;
if (!sde_kms->catalog || !sde_kms->catalog->perf.cpu_mask)
return;
@ -314,11 +313,7 @@ static void _sde_encoder_pm_qos_add_request(struct drm_encoder *drm_enc,
req = &sde_enc->pm_qos_cpu_req;
req->type = PM_QOS_REQ_AFFINE_CORES;
cpumask_empty(&req->cpus_affine);
for_each_possible_cpu(cpu) {
if ((1 << cpu) & cpu_mask)
cpumask_set_cpu(cpu, &req->cpus_affine);
}
atomic_set(&req->cpus_affine, cpu_mask);
pm_qos_add_request(req, PM_QOS_CPU_DMA_LATENCY, cpu_dma_latency);
SDE_EVT32_VERBOSE(DRMID(drm_enc), cpu_mask, cpu_dma_latency);

View File

@ -4162,8 +4162,8 @@ void sdhci_msm_pm_qos_irq_init(struct sdhci_host *host)
(msm_host->pm_qos_irq.req.type != PM_QOS_REQ_ALL_CORES))
set_affine_irq(msm_host, host);
else
cpumask_copy(&msm_host->pm_qos_irq.req.cpus_affine,
cpumask_of(msm_host->pdata->pm_qos_data.irq_cpu));
atomic_set(&msm_host->pm_qos_irq.req.cpus_affine,
msm_host->pdata->pm_qos_data.irq_cpu);
sdhci_msm_pm_qos_wq_init(msm_host);
@ -4217,8 +4217,8 @@ static ssize_t sdhci_msm_pm_qos_group_show(struct device *dev,
for (i = 0; i < nr_groups; i++) {
group = &msm_host->pm_qos[i];
offset += snprintf(&buf[offset], PAGE_SIZE,
"Group #%d (mask=0x%lx) PM QoS: enabled=%d, counter=%d, latency=%d\n",
i, group->req.cpus_affine.bits[0],
"Group #%d (mask=0x%d) PM QoS: enabled=%d, counter=%d, latency=%d\n",
i, atomic_read(&group->req.cpus_affine),
msm_host->pm_qos_group_enable,
atomic_read(&group->counter),
group->latency);
@ -4377,15 +4377,15 @@ void sdhci_msm_pm_qos_cpu_init(struct sdhci_host *host,
sdhci_msm_pm_qos_cpu_unvote_work);
atomic_set(&group->counter, 0);
group->req.type = PM_QOS_REQ_AFFINE_CORES;
cpumask_copy(&group->req.cpus_affine,
&msm_host->pdata->pm_qos_data.cpu_group_map.mask[i]);
atomic_set(&group->req.cpus_affine,
*cpumask_bits(&msm_host->pdata->pm_qos_data.cpu_group_map.mask[i]));
/* We set default latency here for all pm_qos cpu groups. */
group->latency = PM_QOS_DEFAULT_VALUE;
pm_qos_add_request(&group->req, PM_QOS_CPU_DMA_LATENCY,
group->latency);
pr_info("%s (): voted for group #%d (mask=0x%lx) latency=%d\n",
pr_info("%s (): voted for group #%d (mask=0x%d) latency=%d\n",
__func__, i,
group->req.cpus_affine.bits[0],
atomic_read(&group->req.cpus_affine),
group->latency);
}
msm_host->pm_qos_prev_cpu = -1;

View File

@ -54,7 +54,7 @@ enum pm_qos_req_type {
struct pm_qos_request {
enum pm_qos_req_type type;
struct cpumask cpus_affine;
atomic_t cpus_affine;
#ifdef CONFIG_SMP
uint32_t irq;
/* Internal structure members */

View File

@ -51,6 +51,8 @@
#include <linux/export.h>
#include <trace/events/power.h>
#define CPUMASK_ALL (BIT(NR_CPUS) - 1)
/*
* locking rule: all changes to constraints or notifiers lists
* or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
@ -267,7 +269,7 @@ static const struct file_operations pm_qos_debug_fops = {
};
static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,
struct cpumask *cpus)
unsigned long *cpus)
{
struct pm_qos_request *req = NULL;
int cpu;
@ -282,7 +284,9 @@ static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,
return -EINVAL;
plist_for_each_entry(req, &c->list, node) {
for_each_cpu(cpu, &req->cpus_affine) {
unsigned long affined_cpus = atomic_read(&req->cpus_affine);
for_each_cpu(cpu, to_cpumask(&affined_cpus)) {
switch (c->type) {
case PM_QOS_MIN:
if (qos_val[cpu] > req->node.prio)
@ -303,7 +307,7 @@ static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,
for_each_possible_cpu(cpu) {
if (c->target_per_cpu[cpu] != qos_val[cpu])
cpumask_set_cpu(cpu, cpus);
*cpus |= BIT(cpu);
c->target_per_cpu[cpu] = qos_val[cpu];
}
@ -325,7 +329,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
enum pm_qos_req_action action, int value)
{
int prev_value, curr_value, new_value;
struct cpumask cpus;
unsigned long cpus = 0;
int ret;
spin_lock(&pm_qos_lock);
@ -356,7 +360,6 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
}
curr_value = pm_qos_get_value(c);
cpumask_clear(&cpus);
pm_qos_set_value(c, curr_value);
ret = pm_qos_set_value_for_cpus(c, &cpus);
@ -369,7 +372,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
* to update the new qos restriction for the cores
*/
if (!cpumask_empty(&cpus) ||
if (cpus ||
(ret && prev_value != curr_value)) {
ret = 1;
if (c->notifiers)
@ -531,7 +534,6 @@ static void pm_qos_work_fn(struct work_struct *work)
#ifdef CONFIG_SMP
static void pm_qos_irq_release(struct kref *ref)
{
unsigned long flags;
struct irq_affinity_notify *notify = container_of(ref,
struct irq_affinity_notify, kref);
struct pm_qos_request *req = container_of(notify,
@ -539,10 +541,7 @@ static void pm_qos_irq_release(struct kref *ref)
struct pm_qos_constraints *c =
pm_qos_array[req->pm_qos_class]->constraints;
spin_lock_irqsave(&pm_qos_lock, flags);
cpumask_setall(&req->cpus_affine);
spin_unlock_irqrestore(&pm_qos_lock, flags);
atomic_set(&req->cpus_affine, CPUMASK_ALL);
pm_qos_update_target(c, &req->node, PM_QOS_UPDATE_REQ,
c->default_value);
}
@ -550,16 +549,12 @@ static void pm_qos_irq_release(struct kref *ref)
static void pm_qos_irq_notify(struct irq_affinity_notify *notify,
const cpumask_t *mask)
{
unsigned long flags;
struct pm_qos_request *req = container_of(notify,
struct pm_qos_request, irq_notify);
struct pm_qos_constraints *c =
pm_qos_array[req->pm_qos_class]->constraints;
spin_lock_irqsave(&pm_qos_lock, flags);
cpumask_copy(&req->cpus_affine, mask);
spin_unlock_irqrestore(&pm_qos_lock, flags);
atomic_set(&req->cpus_affine, *cpumask_bits(mask));
pm_qos_update_target(c, &req->node, PM_QOS_UPDATE_REQ, req->node.prio);
}
#endif
@ -590,9 +585,8 @@ void pm_qos_add_request(struct pm_qos_request *req,
switch (req->type) {
case PM_QOS_REQ_AFFINE_CORES:
if (cpumask_empty(&req->cpus_affine)) {
if (!atomic_cmpxchg_relaxed(&req->cpus_affine, 0, CPUMASK_ALL)) {
req->type = PM_QOS_REQ_ALL_CORES;
cpumask_setall(&req->cpus_affine);
WARN(1, "Affine cores not set for request with affinity flag\n");
}
break;
@ -608,14 +602,14 @@ void pm_qos_add_request(struct pm_qos_request *req,
mask = desc->irq_data.common->affinity;
/* Get the current affinity */
cpumask_copy(&req->cpus_affine, mask);
atomic_set(&req->cpus_affine, *cpumask_bits(mask));
req->irq_notify.irq = req->irq;
req->irq_notify.notify = pm_qos_irq_notify;
req->irq_notify.release = pm_qos_irq_release;
} else {
req->type = PM_QOS_REQ_ALL_CORES;
cpumask_setall(&req->cpus_affine);
atomic_set(&req->cpus_affine, CPUMASK_ALL);
WARN(1, "IRQ-%d not set for request with affinity flag\n",
req->irq);
}
@ -625,7 +619,7 @@ void pm_qos_add_request(struct pm_qos_request *req,
WARN(1, "Unknown request type %d\n", req->type);
/* fall through */
case PM_QOS_REQ_ALL_CORES:
cpumask_setall(&req->cpus_affine);
atomic_set(&req->cpus_affine, CPUMASK_ALL);
break;
}
@ -645,7 +639,7 @@ void pm_qos_add_request(struct pm_qos_request *req,
if (ret) {
WARN(1, "IRQ affinity notify set failed\n");
req->type = PM_QOS_REQ_ALL_CORES;
cpumask_setall(&req->cpus_affine);
atomic_set(&req->cpus_affine, CPUMASK_ALL);
pm_qos_update_target(
pm_qos_array[pm_qos_class]->constraints,
&req->node, PM_QOS_UPDATE_REQ, value);