qos: Replace cpumasks with atomic bitwise operations

This is expensive and unneeded, especially since the qos functions are hot code paths. Mose of the cpumask functions use the bitmap API, which is also more expensive than just doing some simple operations on a word. Since we're operating with a CPU count that can fit within a word, replace the expensive cpumask operations with raw bitwise operations wherever possible to make the pm_qos framework more efficient. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> Signed-off-by: azrim <mirzaspc@gmail.com>
2025-02-20 11:45:48 +08:00 · 2020-12-26 12:40:57 -08:00 · 2020-12-26 12:40:57 -08:00 · 7978c43d40
commit 7978c43d40
parent 6cbd8dba73
4 changed files with 26 additions and 37 deletions
--- a/drivers/gpu/drm/msm/sde/sde_encoder.c
+++ b/drivers/gpu/drm/msm/sde/sde_encoder.c
@ -304,7 +304,6 @@ static void _sde_encoder_pm_qos_add_request(struct drm_encoder *drm_enc,
 	struct pm_qos_request *req;
 	u32 cpu_mask;
 	u32 cpu_dma_latency;
-	int cpu;

 	if (!sde_kms->catalog || !sde_kms->catalog->perf.cpu_mask)
 		return;
@ -314,11 +313,7 @@ static void _sde_encoder_pm_qos_add_request(struct drm_encoder *drm_enc,

 	req = &sde_enc->pm_qos_cpu_req;
 	req->type = PM_QOS_REQ_AFFINE_CORES;
-	cpumask_empty(&req->cpus_affine);
-	for_each_possible_cpu(cpu) {
-		if ((1 << cpu) & cpu_mask)
-			cpumask_set_cpu(cpu, &req->cpus_affine);
-	}
+	atomic_set(&req->cpus_affine, cpu_mask);
 	pm_qos_add_request(req, PM_QOS_CPU_DMA_LATENCY, cpu_dma_latency);

 	SDE_EVT32_VERBOSE(DRMID(drm_enc), cpu_mask, cpu_dma_latency);
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@ -4162,8 +4162,8 @@ void sdhci_msm_pm_qos_irq_init(struct sdhci_host *host)
 		(msm_host->pm_qos_irq.req.type != PM_QOS_REQ_ALL_CORES))
 		set_affine_irq(msm_host, host);
 	else
-		cpumask_copy(&msm_host->pm_qos_irq.req.cpus_affine,
-			cpumask_of(msm_host->pdata->pm_qos_data.irq_cpu));
+		atomic_set(&msm_host->pm_qos_irq.req.cpus_affine,
+			msm_host->pdata->pm_qos_data.irq_cpu);

 	sdhci_msm_pm_qos_wq_init(msm_host);

@ -4217,8 +4217,8 @@ static ssize_t sdhci_msm_pm_qos_group_show(struct device *dev,
 	for (i = 0; i < nr_groups; i++) {
 		group = &msm_host->pm_qos[i];
 		offset += snprintf(&buf[offset], PAGE_SIZE,
-			"Group #%d (mask=0x%lx) PM QoS: enabled=%d, counter=%d, latency=%d\n",
-			i, group->req.cpus_affine.bits[0],
+			"Group #%d (mask=0x%d) PM QoS: enabled=%d, counter=%d, latency=%d\n",
+			i, atomic_read(&group->req.cpus_affine),
 			msm_host->pm_qos_group_enable,
 			atomic_read(&group->counter),
 			group->latency);
@ -4377,15 +4377,15 @@ void sdhci_msm_pm_qos_cpu_init(struct sdhci_host *host,
 			sdhci_msm_pm_qos_cpu_unvote_work);
 		atomic_set(&group->counter, 0);
 		group->req.type = PM_QOS_REQ_AFFINE_CORES;
-		cpumask_copy(&group->req.cpus_affine,
-			&msm_host->pdata->pm_qos_data.cpu_group_map.mask[i]);
+		atomic_set(&group->req.cpus_affine,
+			*cpumask_bits(&msm_host->pdata->pm_qos_data.cpu_group_map.mask[i]));
 		/* We set default latency here for all pm_qos cpu groups. */
 		group->latency = PM_QOS_DEFAULT_VALUE;
 		pm_qos_add_request(&group->req, PM_QOS_CPU_DMA_LATENCY,
 			group->latency);
-		pr_info("%s (): voted for group #%d (mask=0x%lx) latency=%d\n",
+		pr_info("%s (): voted for group #%d (mask=0x%d) latency=%d\n",
 			__func__, i,
-			group->req.cpus_affine.bits[0],
+			atomic_read(&group->req.cpus_affine),
 			group->latency);
 	}
 	msm_host->pm_qos_prev_cpu = -1;
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@ -54,7 +54,7 @@ enum pm_qos_req_type {

 struct pm_qos_request {
 	enum pm_qos_req_type type;
-	struct cpumask cpus_affine;
+	atomic_t cpus_affine;
 #ifdef CONFIG_SMP
 	uint32_t irq;
 	/* Internal structure members */
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@ -51,6 +51,8 @@
 #include <linux/export.h>
 #include <trace/events/power.h>

+#define CPUMASK_ALL (BIT(NR_CPUS) - 1)
+
 /*
 * locking rule: all changes to constraints or notifiers lists
 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
@ -267,7 +269,7 @@ static const struct file_operations pm_qos_debug_fops = {
 };

 static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,
-		struct cpumask *cpus)
+					    unsigned long *cpus)
 {
 	struct pm_qos_request *req = NULL;
 	int cpu;
@ -282,7 +284,9 @@ static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,
 		return -EINVAL;

 	plist_for_each_entry(req, &c->list, node) {
-		for_each_cpu(cpu, &req->cpus_affine) {
+		unsigned long affined_cpus = atomic_read(&req->cpus_affine);
+
+		for_each_cpu(cpu, to_cpumask(&affined_cpus)) {
 			switch (c->type) {
 			case PM_QOS_MIN:
 				if (qos_val[cpu] > req->node.prio)
@ -303,7 +307,7 @@ static inline int pm_qos_set_value_for_cpus(struct pm_qos_constraints *c,

 	for_each_possible_cpu(cpu) {
 		if (c->target_per_cpu[cpu] != qos_val[cpu])
-			cpumask_set_cpu(cpu, cpus);
+			*cpus |= BIT(cpu);
 		c->target_per_cpu[cpu] = qos_val[cpu];
 	}

@ -325,7 +329,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 			 enum pm_qos_req_action action, int value)
 {
 	int prev_value, curr_value, new_value;
-	struct cpumask cpus;
+	unsigned long cpus = 0;
 	int ret;

 	spin_lock(&pm_qos_lock);
@ -356,7 +360,6 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 	}

 	curr_value = pm_qos_get_value(c);
-	cpumask_clear(&cpus);
 	pm_qos_set_value(c, curr_value);
 	ret = pm_qos_set_value_for_cpus(c, &cpus);

@ -369,7 +372,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 	 * to update the new qos restriction for the cores
 	 */

-	if (!cpumask_empty(&cpus) ||
+	if (cpus ||
 	   (ret && prev_value != curr_value)) {
 		ret = 1;
 		if (c->notifiers)
@ -531,7 +534,6 @@ static void pm_qos_work_fn(struct work_struct *work)
 #ifdef CONFIG_SMP
 static void pm_qos_irq_release(struct kref *ref)
 {
-	unsigned long flags;
 	struct irq_affinity_notify *notify = container_of(ref,
 					struct irq_affinity_notify, kref);
 	struct pm_qos_request *req = container_of(notify,
@ -539,10 +541,7 @@ static void pm_qos_irq_release(struct kref *ref)
 	struct pm_qos_constraints *c =
 				pm_qos_array[req->pm_qos_class]->constraints;

-	spin_lock_irqsave(&pm_qos_lock, flags);
-	cpumask_setall(&req->cpus_affine);
-	spin_unlock_irqrestore(&pm_qos_lock, flags);
-
+	atomic_set(&req->cpus_affine, CPUMASK_ALL);
 	pm_qos_update_target(c, &req->node, PM_QOS_UPDATE_REQ,
 			c->default_value);
 }
@ -550,16 +549,12 @@ static void pm_qos_irq_release(struct kref *ref)
 static void pm_qos_irq_notify(struct irq_affinity_notify *notify,
 		const cpumask_t *mask)
 {
-	unsigned long flags;
 	struct pm_qos_request *req = container_of(notify,
 					struct pm_qos_request, irq_notify);
 	struct pm_qos_constraints *c =
 				pm_qos_array[req->pm_qos_class]->constraints;

-	spin_lock_irqsave(&pm_qos_lock, flags);
-	cpumask_copy(&req->cpus_affine, mask);
-	spin_unlock_irqrestore(&pm_qos_lock, flags);
-
+	atomic_set(&req->cpus_affine, *cpumask_bits(mask));
 	pm_qos_update_target(c, &req->node, PM_QOS_UPDATE_REQ, req->node.prio);
 }
 #endif
@ -590,9 +585,8 @@ void pm_qos_add_request(struct pm_qos_request *req,

 	switch (req->type) {
 	case PM_QOS_REQ_AFFINE_CORES:
-		if (cpumask_empty(&req->cpus_affine)) {
+		if (!atomic_cmpxchg_relaxed(&req->cpus_affine, 0, CPUMASK_ALL)) {
 			req->type = PM_QOS_REQ_ALL_CORES;
-			cpumask_setall(&req->cpus_affine);
 			WARN(1, "Affine cores not set for request with affinity flag\n");
 		}
 		break;
@ -608,14 +602,14 @@ void pm_qos_add_request(struct pm_qos_request *req,
 			mask = desc->irq_data.common->affinity;

 			/* Get the current affinity */
-			cpumask_copy(&req->cpus_affine, mask);
+			atomic_set(&req->cpus_affine, *cpumask_bits(mask));
 			req->irq_notify.irq = req->irq;
 			req->irq_notify.notify = pm_qos_irq_notify;
 			req->irq_notify.release = pm_qos_irq_release;

 		} else {
 			req->type = PM_QOS_REQ_ALL_CORES;
-			cpumask_setall(&req->cpus_affine);
+			atomic_set(&req->cpus_affine, CPUMASK_ALL);
 			WARN(1, "IRQ-%d not set for request with affinity flag\n",
 					req->irq);
 		}
@ -625,7 +619,7 @@ void pm_qos_add_request(struct pm_qos_request *req,
 		WARN(1, "Unknown request type %d\n", req->type);
 		/* fall through */
 	case PM_QOS_REQ_ALL_CORES:
-		cpumask_setall(&req->cpus_affine);
+		atomic_set(&req->cpus_affine, CPUMASK_ALL);
 		break;
 	}

@ -645,7 +639,7 @@ void pm_qos_add_request(struct pm_qos_request *req,
 		if (ret) {
 			WARN(1, "IRQ affinity notify set failed\n");
 			req->type = PM_QOS_REQ_ALL_CORES;
-			cpumask_setall(&req->cpus_affine);
+			atomic_set(&req->cpus_affine, CPUMASK_ALL);
 			pm_qos_update_target(
 				pm_qos_array[pm_qos_class]->constraints,
 				&req->node, PM_QOS_UPDATE_REQ, value);