mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
sched: Add support to spread tasks
If sysctl_sched_prefer_spread is enabled, then tasks would be freely migrated to idle cpus within same cluster to reduce runnables. By default, the feature is disabled. User can trigger feature with: echo 1 > /proc/sys/kernel/sched_prefer_spread Aggressively spread tasks with in little cluster. echo 2 > /proc/sys/kernel/sched_prefer_spread Aggressively spread tasks with in little cluster as well as big cluster, but not between big and little. Change-Id: I0a4d87bd17de3525548765472e6f388a9970f13c Signed-off-by: Lingutla Chandrasekhar <clingutla@codeaurora.org> [render: minor fixups] Signed-off-by: Zachariah Kennedy <zkennedy87@gmail.com>
This commit is contained in:
parent
f8cd7097e7
commit
8cf155167d
@ -55,6 +55,7 @@ extern unsigned int sysctl_sched_coloc_busy_hyst_max_ms;
|
||||
extern unsigned int sysctl_sched_window_stats_policy;
|
||||
extern unsigned int sysctl_sched_ravg_window_nr_ticks;
|
||||
extern unsigned int sysctl_sched_dynamic_ravg_window_enable;
|
||||
extern unsigned int sysctl_sched_prefer_spread;
|
||||
|
||||
extern int
|
||||
walt_proc_group_thresholds_handler(struct ctl_table *table, int write,
|
||||
|
@ -273,10 +273,11 @@ TRACE_EVENT(sched_load_balance,
|
||||
TP_PROTO(int cpu, enum cpu_idle_type idle, int balance,
|
||||
unsigned long group_mask, int busiest_nr_running,
|
||||
unsigned long imbalance, unsigned int env_flags, int ld_moved,
|
||||
unsigned int balance_interval, int active_balance),
|
||||
unsigned int balance_interval, int active_balance, int prefer_spread),
|
||||
|
||||
TP_ARGS(cpu, idle, balance, group_mask, busiest_nr_running,
|
||||
imbalance, env_flags, ld_moved, balance_interval, active_balance),
|
||||
imbalance, env_flags, ld_moved, balance_interval, active_balance,
|
||||
prefer_spread),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, cpu)
|
||||
@ -289,6 +290,7 @@ TRACE_EVENT(sched_load_balance,
|
||||
__field( int, ld_moved)
|
||||
__field( unsigned int, balance_interval)
|
||||
__field( int, active_balance)
|
||||
__field( int, prefer_spread)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -302,16 +304,18 @@ TRACE_EVENT(sched_load_balance,
|
||||
__entry->ld_moved = ld_moved;
|
||||
__entry->balance_interval = balance_interval;
|
||||
__entry->active_balance = active_balance;
|
||||
__entry->prefer_spread = prefer_spread;
|
||||
),
|
||||
|
||||
TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d",
|
||||
TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d prefer_spread=%d",
|
||||
__entry->cpu,
|
||||
__entry->idle == CPU_IDLE ? "idle" :
|
||||
(__entry->idle == CPU_NEWLY_IDLE ? "newly_idle" : "busy"),
|
||||
__entry->balance,
|
||||
__entry->group_mask, __entry->busiest_nr_running,
|
||||
__entry->imbalance, __entry->env_flags, __entry->ld_moved,
|
||||
__entry->balance_interval, __entry->active_balance)
|
||||
__entry->balance_interval, __entry->active_balance,
|
||||
__entry->prefer_spread)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_load_balance_nohz_kick,
|
||||
@ -1464,7 +1468,7 @@ TRACE_EVENT(sched_energy_diff,
|
||||
TRACE_EVENT(sched_task_util,
|
||||
|
||||
TP_PROTO(struct task_struct *p, int next_cpu, int backup_cpu,
|
||||
int target_cpu, bool sync, bool need_idle, int fastpath,
|
||||
int target_cpu, bool sync, int need_idle, int fastpath,
|
||||
bool placement_boost, u64 start_t,
|
||||
bool stune_boosted, bool is_rtg, bool rtg_skip_min,
|
||||
int start_cpu),
|
||||
@ -1482,7 +1486,7 @@ TRACE_EVENT(sched_task_util,
|
||||
__field(int, backup_cpu )
|
||||
__field(int, target_cpu )
|
||||
__field(bool, sync )
|
||||
__field(bool, need_idle )
|
||||
__field(int, need_idle )
|
||||
__field(int, fastpath )
|
||||
__field(int, placement_boost )
|
||||
__field(int, rtg_cpu )
|
||||
|
@ -191,6 +191,7 @@ unsigned int sysctl_sched_min_task_util_for_boost = 51;
|
||||
/* 0.68ms default for 20ms window size scaled to 1024 */
|
||||
unsigned int sysctl_sched_min_task_util_for_colocation = 35;
|
||||
unsigned int sched_task_filter_util = 35;
|
||||
__read_mostly unsigned int sysctl_sched_prefer_spread;
|
||||
#endif
|
||||
static unsigned int __maybe_unused sched_small_task_threshold = 102;
|
||||
|
||||
@ -7378,6 +7379,17 @@ struct find_best_target_env {
|
||||
int start_cpu;
|
||||
};
|
||||
|
||||
static inline bool prefer_spread_on_idle(int cpu)
|
||||
{
|
||||
if (likely(!sysctl_sched_prefer_spread))
|
||||
return false;
|
||||
|
||||
if (is_min_capacity_cpu(cpu))
|
||||
return sysctl_sched_prefer_spread >= 1;
|
||||
|
||||
return sysctl_sched_prefer_spread > 1;
|
||||
}
|
||||
|
||||
static inline void adjust_cpus_for_packing(struct task_struct *p,
|
||||
int *target_cpu, int *best_idle_cpu,
|
||||
int shallowest_idle_cstate,
|
||||
@ -7389,8 +7401,11 @@ static inline void adjust_cpus_for_packing(struct task_struct *p,
|
||||
if (*best_idle_cpu == -1 || *target_cpu == -1)
|
||||
return;
|
||||
|
||||
if (task_placement_boost_enabled(p) || fbt_env->need_idle || boosted ||
|
||||
shallowest_idle_cstate <= 0) {
|
||||
if (prefer_spread_on_idle(*best_idle_cpu))
|
||||
fbt_env->need_idle |= 2;
|
||||
|
||||
if (fbt_env->need_idle || task_placement_boost_enabled(p) || boosted ||
|
||||
shallowest_idle_cstate <= 0) {
|
||||
*target_cpu = -1;
|
||||
return;
|
||||
}
|
||||
@ -8127,7 +8142,7 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
|
||||
curr_is_rtg = task_in_related_thread_group(cpu_rq(cpu)->curr);
|
||||
|
||||
fbt_env.fastpath = 0;
|
||||
fbt_env.need_idle = 0;
|
||||
fbt_env.need_idle = need_idle;
|
||||
|
||||
if (trace_sched_task_util_enabled())
|
||||
start_t = sched_clock();
|
||||
@ -8201,7 +8216,6 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
|
||||
|
||||
fbt_env.is_rtg = is_rtg;
|
||||
fbt_env.placement_boost = placement_boost;
|
||||
fbt_env.need_idle = need_idle;
|
||||
fbt_env.start_cpu = start_cpu;
|
||||
fbt_env.boosted = boosted;
|
||||
fbt_env.skip_cpu = is_many_wakeup(sibling_count_hint) ?
|
||||
@ -8221,7 +8235,7 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
|
||||
if (p->state == TASK_WAKING)
|
||||
delta = task_util(p);
|
||||
#endif
|
||||
if (task_placement_boost_enabled(p) || need_idle || boosted ||
|
||||
if (task_placement_boost_enabled(p) || fbt_env.need_idle || boosted ||
|
||||
is_rtg || __cpu_overutilized(prev_cpu, delta) ||
|
||||
!task_fits_max(p, prev_cpu) || cpu_isolated(prev_cpu))
|
||||
goto out;
|
||||
@ -8255,7 +8269,7 @@ out:
|
||||
target_cpu = prev_cpu;
|
||||
|
||||
trace_sched_task_util(p, next_cpu, backup_cpu, target_cpu, sync,
|
||||
need_idle, fbt_env.fastpath, placement_boost,
|
||||
fbt_env.need_idle, fbt_env.fastpath, placement_boost,
|
||||
start_t, boosted, is_rtg, get_rtg_status(p),
|
||||
start_cpu);
|
||||
return target_cpu;
|
||||
@ -9019,6 +9033,7 @@ struct lb_env {
|
||||
unsigned int loop;
|
||||
unsigned int loop_break;
|
||||
unsigned int loop_max;
|
||||
bool prefer_spread;
|
||||
|
||||
enum fbq_type fbq_type;
|
||||
enum group_type src_grp_type;
|
||||
@ -9171,7 +9186,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
||||
env->flags &= ~LBF_ALL_PINNED;
|
||||
|
||||
if (energy_aware() && !sd_overutilized(env->sd) &&
|
||||
env->idle == CPU_NEWLY_IDLE &&
|
||||
env->idle == CPU_NEWLY_IDLE && !env->prefer_spread &&
|
||||
!task_in_related_thread_group(p)) {
|
||||
long util_cum_dst, util_cum_src;
|
||||
unsigned long demand;
|
||||
@ -9349,8 +9364,13 @@ redo:
|
||||
* So only when there is other tasks can be balanced or
|
||||
* there is situation to ignore big task, it is needed
|
||||
* to skip the task load bigger than 2*imbalance.
|
||||
*
|
||||
* And load based checks are skipped for prefer_spread in
|
||||
* finding busiest group, ignore the task's h_load.
|
||||
*/
|
||||
if (((cpu_rq(env->src_cpu)->nr_running > 2) ||
|
||||
|
||||
if (!env->prefer_spread &&
|
||||
((cpu_rq(env->src_cpu)->nr_running > 2) ||
|
||||
(env->flags & LBF_IGNORE_BIG_TASKS)) &&
|
||||
((load / 2) > env->imbalance))
|
||||
goto next;
|
||||
@ -10076,6 +10096,11 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
||||
if (sgs->group_type < busiest->group_type)
|
||||
return false;
|
||||
|
||||
if (env->prefer_spread && env->idle != CPU_NOT_IDLE &&
|
||||
(sgs->sum_nr_running > busiest->sum_nr_running) &&
|
||||
(sgs->group_util > busiest->group_util))
|
||||
return true;
|
||||
|
||||
if (sgs->avg_load <= busiest->avg_load)
|
||||
return false;
|
||||
|
||||
@ -10109,6 +10134,11 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
||||
return false;
|
||||
|
||||
asym_packing:
|
||||
|
||||
if (env->prefer_spread &&
|
||||
(sgs->sum_nr_running < busiest->sum_nr_running))
|
||||
return false;
|
||||
|
||||
/* This is the busiest node in its class. */
|
||||
if (!(env->sd->flags & SD_ASYM_PACKING))
|
||||
return true;
|
||||
@ -10591,6 +10621,15 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
|
||||
return fix_small_imbalance(env, sds);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we couldn't find any imbalance, then boost the imbalance
|
||||
* with the group util.
|
||||
*/
|
||||
if (env->prefer_spread && !env->imbalance &&
|
||||
env->idle != CPU_NOT_IDLE &&
|
||||
busiest->sum_nr_running > busiest->group_weight)
|
||||
env->imbalance = busiest->group_util;
|
||||
}
|
||||
|
||||
/******* find_busiest_group() helpers end here *********************/
|
||||
@ -10971,6 +11010,11 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
||||
.loop = 0,
|
||||
};
|
||||
|
||||
env.prefer_spread = (prefer_spread_on_idle(this_cpu) &&
|
||||
!((sd->flags & SD_ASYM_CPUCAPACITY) &&
|
||||
!cpumask_test_cpu(this_cpu,
|
||||
&asym_cap_sibling_cpus)));
|
||||
|
||||
cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask);
|
||||
|
||||
schedstat_inc(sd->lb_count[idle]);
|
||||
@ -11255,7 +11299,8 @@ out:
|
||||
group ? group->cpumask[0] : 0,
|
||||
busiest ? busiest->nr_running : 0,
|
||||
env.imbalance, env.flags, ld_moved,
|
||||
sd->balance_interval, active_balance);
|
||||
sd->balance_interval, active_balance,
|
||||
env.prefer_spread);
|
||||
return ld_moved;
|
||||
}
|
||||
|
||||
@ -11332,6 +11377,10 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
|
||||
int pulled_task = 0;
|
||||
u64 curr_cost = 0;
|
||||
u64 avg_idle = this_rq->avg_idle;
|
||||
bool prefer_spread = prefer_spread_on_idle(this_cpu);
|
||||
bool force_lb = (!is_min_capacity_cpu(this_cpu) &&
|
||||
silver_has_big_tasks() &&
|
||||
(atomic_read(&this_rq->nr_iowait) == 0));
|
||||
|
||||
if (cpu_isolated(this_cpu))
|
||||
return 0;
|
||||
@ -11348,8 +11397,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
|
||||
if (!cpu_active(this_cpu))
|
||||
return 0;
|
||||
|
||||
if (!is_min_capacity_cpu(this_cpu) && silver_has_big_tasks()
|
||||
&& (atomic_read(&this_rq->nr_iowait) == 0))
|
||||
if (force_lb || prefer_spread)
|
||||
avg_idle = ULLONG_MAX;
|
||||
|
||||
/*
|
||||
@ -11386,6 +11434,11 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prefer_spread && !force_lb &&
|
||||
(sd->flags & SD_ASYM_CPUCAPACITY) &&
|
||||
!(cpumask_test_cpu(this_cpu, &asym_cap_sibling_cpus)))
|
||||
avg_idle = this_rq->avg_idle;
|
||||
|
||||
if (avg_idle < curr_cost + sd->max_newidle_lb_cost) {
|
||||
update_next_balance(sd, &next_balance);
|
||||
break;
|
||||
@ -11792,7 +11845,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
|
||||
}
|
||||
max_cost += sd->max_newidle_lb_cost;
|
||||
|
||||
if (energy_aware() && !sd_overutilized(sd))
|
||||
if (energy_aware() && !sd_overutilized(sd) && !prefer_spread_on_idle(cpu))
|
||||
continue;
|
||||
|
||||
if (!(sd->flags & SD_LOAD_BALANCE)) {
|
||||
@ -12020,7 +12073,8 @@ static inline bool nohz_kick_needed(struct rq *rq, bool only_update)
|
||||
* at least 2 tasks and cpu is overutilized
|
||||
*/
|
||||
if (rq->nr_running >= 2 &&
|
||||
(!energy_aware() || cpu_overutilized(cpu)))
|
||||
(!energy_aware() || (cpu_overutilized(cpu) ||
|
||||
prefer_spread_on_idle(cpu))))
|
||||
return true;
|
||||
|
||||
if (energy_aware())
|
||||
|
@ -542,6 +542,15 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_updown_migrate_handler,
|
||||
},
|
||||
{
|
||||
.procname = "sched_prefer_spread",
|
||||
.data = &sysctl_sched_prefer_spread,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &two,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user