mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
sched/cass: Fix suboptimal task placement when uclamp is used
Uclamp is designed to specify a process' CPU performance requirement scaled as a CPU capacity value. It simply denotes the process' requirement for the CPU's raw performance and thus P-state. CASS currently treats uclamp as a CPU load value however, producing wildly suboptimal CPU placement decisions for tasks which use uclamp. This hurts performance and, even worse, massively hurts energy efficiency, with CASS sometimes yielding power consumption that is a few times higher than EAS. Since uclamp inherently throws a wrench into CASS's goal of keeping relative P-states as low as possible across all CPUs, making it cooperate with CASS requires a multipronged approach. Make the following three changes to fix the uclamp task placement issue: 1. Treat uclamp as a CPU performance value rather than a CPU load value. 2. Clamp a CPU's utilization to the task's uclamp floor in order to keep relative P-states as low as possible across all CPUs. 3. Consider preferring a non-idle CPU for uclamped tasks to avoid pushing up the P-state of more than one CPU when there are multiple concurrent uclamped tasks. This fixes CASS's massive energy efficiency and performance issues when uclamp is used. Change-Id: Ib274ceecfbbe9c2eeb1738f97029e1f4cbc68ec0 Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> Signed-off-by: Richard Raya <rdxzv.dev@gmail.com>
This commit is contained in:
parent
226fafc0b1
commit
6a8fed2d40
@ -29,6 +29,7 @@ struct cass_cpu_cand {
|
||||
int cpu;
|
||||
unsigned int exit_lat;
|
||||
unsigned long cap;
|
||||
unsigned long cap_max;
|
||||
unsigned long util;
|
||||
};
|
||||
|
||||
@ -50,17 +51,14 @@ void cass_cpu_util(struct cass_cpu_cand *c, bool sync)
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the capacity of this CPU adjusted for thermal pressure */
|
||||
c->cap = arch_scale_cpu_capacity(c->cpu) - thermal_load_avg(rq);
|
||||
|
||||
/*
|
||||
* Account for lost capacity due to time spent in RT/DL tasks and IRQs.
|
||||
* Capacity is considered lost to RT tasks even when @p is an RT task in
|
||||
* order to produce consistently balanced task placement results between
|
||||
* CFS and RT tasks when CASS selects a CPU for them.
|
||||
*/
|
||||
c->cap -= min(cpu_util_rt(rq) + cpu_util_dl(rq) + cpu_util_irq(rq),
|
||||
c->cap - 1);
|
||||
c->cap = c->cap_max - min(cpu_util_rt(rq) + cpu_util_dl(rq) +
|
||||
cpu_util_irq(rq), c->cap_max - 1);
|
||||
|
||||
/*
|
||||
* Deduct @current's util from this CPU if this is a sync wake, unless
|
||||
@ -84,6 +82,10 @@ bool cass_cpu_better(const struct cass_cpu_cand *a,
|
||||
if (cass_cmp(b->util, a->util))
|
||||
goto done;
|
||||
|
||||
/* Prefer the CPU that is idle (only relevant for uclamped tasks) */
|
||||
if (cass_cmp(!!a->exit_lat, !!b->exit_lat))
|
||||
goto done;
|
||||
|
||||
/* Prefer the current CPU for sync wakes */
|
||||
if (sync && (cass_eq(a->cpu, smp_processor_id()) ||
|
||||
!cass_cmp(b->cpu, smp_processor_id())))
|
||||
@ -116,17 +118,16 @@ static int cass_best_cpu(struct task_struct *p, int prev_cpu, bool sync, bool rt
|
||||
{
|
||||
/* Initialize @best such that @best always has a valid CPU at the end */
|
||||
struct cass_cpu_cand cands[2], *best = cands;
|
||||
unsigned long p_util, uc_min;
|
||||
bool has_idle = false;
|
||||
unsigned long p_util;
|
||||
int cidx = 0, cpu;
|
||||
|
||||
/*
|
||||
* Get the utilization for this task. Note that RT tasks don't have
|
||||
* per-entity load tracking.
|
||||
* Get the utilization and uclamp minimum threshold for this task. Note
|
||||
* that RT tasks don't have per-entity load tracking.
|
||||
*/
|
||||
p_util = clamp(rt ? 0 : task_util_est(p),
|
||||
uclamp_eff_value(p, UCLAMP_MIN),
|
||||
uclamp_eff_value(p, UCLAMP_MAX));
|
||||
p_util = rt ? 0 : task_util_est(p);
|
||||
uc_min = uclamp_eff_value(p, UCLAMP_MIN);
|
||||
|
||||
/*
|
||||
* Find the best CPU to wake @p on. Although idle_get_state() requires
|
||||
@ -143,6 +144,15 @@ static int cass_best_cpu(struct task_struct *p, int prev_cpu, bool sync, bool rt
|
||||
/* Use the free candidate slot for @curr */
|
||||
struct cass_cpu_cand *curr = &cands[cidx];
|
||||
struct cpuidle_state *idle_state;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
/* Get the capacity of this CPU adjusted for thermal pressure */
|
||||
curr->cap_max = arch_scale_cpu_capacity(cpu) -
|
||||
thermal_load_avg(rq);
|
||||
|
||||
/* Prefer the CPU that meets the uclamp minimum requirement */
|
||||
if (curr->cap_max < uc_min && best->cap_max >= uc_min)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Check if this CPU is idle or only has SCHED_IDLE tasks. For
|
||||
@ -150,16 +160,22 @@ static int cass_best_cpu(struct task_struct *p, int prev_cpu, bool sync, bool rt
|
||||
*/
|
||||
if ((sync && cpu == smp_processor_id()) ||
|
||||
idle_cpu(cpu) || sched_idle_cpu(cpu)) {
|
||||
/* Discard any previous non-idle candidate */
|
||||
if (!has_idle)
|
||||
best = curr;
|
||||
has_idle = true;
|
||||
/*
|
||||
* A non-idle candidate may be better when @p is uclamp
|
||||
* boosted. Otherwise, always prefer idle candidates.
|
||||
*/
|
||||
if (!uc_min) {
|
||||
/* Discard any previous non-idle candidate */
|
||||
if (!has_idle)
|
||||
best = curr;
|
||||
has_idle = true;
|
||||
}
|
||||
|
||||
/* Nonzero exit latency indicates this CPU is idle */
|
||||
curr->exit_lat = 1;
|
||||
|
||||
/* Add on the actual idle exit latency, if any */
|
||||
idle_state = idle_get_state(cpu_rq(cpu));
|
||||
idle_state = idle_get_state(rq);
|
||||
if (idle_state)
|
||||
curr->exit_lat += idle_state->exit_latency;
|
||||
} else {
|
||||
@ -183,6 +199,10 @@ static int cass_best_cpu(struct task_struct *p, int prev_cpu, bool sync, bool rt
|
||||
if (cpu != task_cpu(p))
|
||||
curr->util += p_util;
|
||||
|
||||
/* Clamp the utilization to the minimum performance threshold */
|
||||
if (curr->util < uc_min)
|
||||
curr->util = uc_min;
|
||||
|
||||
/* Calculate the relative utilization for this CPU candidate */
|
||||
curr->util = curr->util * SCHED_CAPACITY_SCALE / curr->cap;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user