Revert "sched: Remove sched_ktime_clock()"

This reverts 'commit 24c18127e9ba ("sched: Remove sched_ktime_clock()")'

WALT accounting uses ktime_get() as time source to keep windows in
align with the tick. ktime_get() API should not be called while the
timekeeping subsystem is suspended during the system suspend. The
code before the reverted patch has a wrapper around ktime_get() to
avoid calling ktime_get() when timekeeping subsystem is suspended.

The reverted patch removed this wrapper with the assumption that there
will not be any scheduler activity while timekeeping subsystem is
suspended. The timekeeping subsystem is resumed very early even before
non-boot CPUs are brought online. However it is possible that tasks
can wake up from the idle notifiers which gets called before timekeeping
subsystem is resumed.

When this happens, the time read from ktime_get() will not be consistent.
We see a jump from the values that would be returned later when timekeeping
subsystem is resumed. The rq->window_start update happens with incorrect
time. This rq->window_start becomes inconsistent with the rest of the
CPUs's rq->window_start and wallclock time after timekeeping subsystem is
resumed. This results in WALT accounting bugs.

Change-Id: I9c3b2fb9ffbf1103d1bd78778882450560dac09f
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
[clingutla@codeaurora.org: Resolved trivial merge conflicts.]
Signed-off-by: Lingutla Chandrasekhar <clingutla@codeaurora.org>
This commit is contained in:
Pavankumar Kondeti 2018-06-25 16:13:39 +05:30 committed by Lingutla Chandrasekhar
parent ceac4d9e49
commit c7ae8d74de
6 changed files with 65 additions and 22 deletions

View File

@ -785,7 +785,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
p->sched_class->dequeue_task(rq, p, flags);
#ifdef CONFIG_SCHED_WALT
if (p == rq->ed_task)
early_detection_notify(rq, ktime_get_ns());
early_detection_notify(rq, sched_ktime_clock());
#endif
trace_sched_enq_deq_task(p, 0, cpumask_bits(&p->cpus_allowed)[0]);
}
@ -2041,7 +2041,7 @@ static inline void walt_try_to_wake_up(struct task_struct *p)
rq_lock_irqsave(rq, &rf);
old_load = task_load(p);
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
rq_unlock_irqrestore(rq, &rf);
@ -2175,7 +2175,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
set_task_cpu(p, cpu);
}
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
note_task_waking(p, wallclock);
#else /* CONFIG_SMP */
@ -2240,7 +2240,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
trace_sched_waking(p);
if (!task_on_rq_queued(p)) {
u64 wallclock = ktime_get_ns();
u64 wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
@ -3163,7 +3163,7 @@ void scheduler_tick(void)
old_load = task_load(curr);
set_window_start(rq);
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
@ -3547,7 +3547,7 @@ static void __sched notrace __schedule(bool preempt)
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
if (likely(prev != next)) {
if (!prev->on_rq)
prev->last_sleep_ts = wallclock;
@ -7426,7 +7426,7 @@ void sched_exit(struct task_struct *p)
rq = task_rq_lock(p, &rf);
/* rq->curr == p */
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
dequeue_task(rq, p, 0);
/*

View File

@ -540,7 +540,7 @@ static void sugov_work(struct kthread_work *work)
mutex_lock(&sg_policy->work_lock);
raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
sugov_track_cycles(sg_policy, sg_policy->policy->cur,
ktime_get_ns());
sched_ktime_clock());
raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
__cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
CPUFREQ_RELATION_L);
@ -993,7 +993,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
mutex_lock(&sg_policy->work_lock);
raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
sugov_track_cycles(sg_policy, sg_policy->policy->cur,
ktime_get_ns());
sched_ktime_clock());
raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
cpufreq_policy_apply_limits(policy);
mutex_unlock(&sg_policy->work_lock);

View File

@ -12750,7 +12750,7 @@ static void walt_check_for_rotation(struct rq *src_rq)
if (!is_min_capacity_cpu(src_cpu))
return;
wc = ktime_get_ns();
wc = sched_ktime_clock();
for_each_possible_cpu(i) {
struct rq *rq = cpu_rq(i);

View File

@ -1841,6 +1841,15 @@ static inline int hrtick_enabled(struct rq *rq)
#endif /* CONFIG_SCHED_HRTICK */
#ifdef CONFIG_SCHED_WALT
u64 sched_ktime_clock(void);
#else
static inline u64 sched_ktime_clock(void)
{
return 0;
}
#endif
#ifdef CONFIG_SMP
extern void sched_avg_update(struct rq *rq);
extern unsigned long sched_get_rt_rq_util(int cpu);
@ -2479,7 +2488,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
cpu_of(rq)));
if (data)
data->func(data, ktime_get_ns(), flags);
data->func(data, sched_ktime_clock(), flags);
}
#else
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}

View File

@ -19,6 +19,7 @@
* and Todd Kjos
*/
#include <linux/syscore_ops.h>
#include <linux/cpufreq.h>
#include <linux/list_sort.h>
#include <linux/jiffies.h>
@ -42,6 +43,8 @@ const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP",
#define EARLY_DETECTION_DURATION 9500000
static ktime_t ktime_last;
static bool sched_ktime_suspended;
static struct cpu_cycle_counter_cb cpu_cycle_counter_cb;
static bool use_cycle_counter;
DEFINE_MUTEX(cluster_lock);
@ -51,6 +54,37 @@ u64 walt_load_reported_window;
static struct irq_work walt_cpufreq_irq_work;
static struct irq_work walt_migration_irq_work;
u64 sched_ktime_clock(void)
{
if (unlikely(sched_ktime_suspended))
return ktime_to_ns(ktime_last);
return ktime_get_ns();
}
static void sched_resume(void)
{
sched_ktime_suspended = false;
}
static int sched_suspend(void)
{
ktime_last = ktime_get();
sched_ktime_suspended = true;
return 0;
}
static struct syscore_ops sched_syscore_ops = {
.resume = sched_resume,
.suspend = sched_suspend
};
static int __init sched_init_ops(void)
{
register_syscore_ops(&sched_syscore_ops);
return 0;
}
late_initcall(sched_init_ops);
static void acquire_rq_locks_irqsave(const cpumask_t *cpus,
unsigned long *flags)
{
@ -361,7 +395,7 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock)
if (is_idle_task(curr)) {
/* We're here without rq->lock held, IRQ disabled */
raw_spin_lock(&rq->lock);
update_task_cpu_cycles(curr, cpu, ktime_get_ns());
update_task_cpu_cycles(curr, cpu, sched_ktime_clock());
raw_spin_unlock(&rq->lock);
}
}
@ -416,7 +450,7 @@ void sched_account_irqtime(int cpu, struct task_struct *curr,
cur_jiffies_ts = get_jiffies_64();
if (is_idle_task(curr))
update_task_ravg(curr, rq, IRQ_UPDATE, ktime_get_ns(),
update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(),
delta);
nr_windows = cur_jiffies_ts - rq->irqload_ts;
@ -756,7 +790,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
if (sched_disable_window_stats)
goto done;
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
update_task_ravg(task_rq(p)->curr, task_rq(p),
TASK_UPDATE,
@ -2055,7 +2089,7 @@ void mark_task_starting(struct task_struct *p)
return;
}
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
p->ravg.mark_start = p->last_wake_ts = wallclock;
p->last_enqueued_ts = wallclock;
update_task_cpu_cycles(p, cpu_of(rq), wallclock);
@ -2401,7 +2435,7 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
raw_spin_lock_irqsave(&rq->lock, flags);
update_task_ravg(rq->curr, rq, TASK_UPDATE,
ktime_get_ns(), 0);
sched_ktime_clock(), 0);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
@ -2561,7 +2595,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
if (list_empty(&grp->tasks))
return;
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
/*
* wakeup of two or more related tasks could race with each other and
@ -2587,7 +2621,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
grp->preferred_cluster = best_cluster(grp,
combined_demand, group_boost);
grp->last_update = ktime_get_ns();
grp->last_update = sched_ktime_clock();
trace_sched_set_preferred_cluster(grp, combined_demand);
}
@ -2611,7 +2645,7 @@ int update_preferred_cluster(struct related_thread_group *grp,
* has passed since we last updated preference
*/
if (abs(new_load - old_load) > sched_ravg_window / 4 ||
ktime_get_ns() - grp->last_update > sched_ravg_window)
sched_ktime_clock() - grp->last_update > sched_ravg_window)
return 1;
return 0;
@ -2994,7 +3028,7 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
bool new_task;
int i;
wallclock = ktime_get_ns();
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
@ -3166,7 +3200,7 @@ void walt_irq_work(struct irq_work *irq_work)
for_each_cpu(cpu, cpu_possible_mask)
raw_spin_lock(&cpu_rq(cpu)->lock);
wc = ktime_get_ns();
wc = sched_ktime_clock();
walt_load_reported_window = atomic64_read(&walt_irq_work_lastq_ws);
for_each_sched_cluster(cluster) {
u64 aggr_grp_load = 0;

View File

@ -299,7 +299,7 @@ void walt_sched_init_rq(struct rq *rq);
static inline void walt_update_last_enqueue(struct task_struct *p)
{
p->last_enqueued_ts = ktime_get_ns();
p->last_enqueued_ts = sched_ktime_clock();
}
extern void walt_rotate_work_init(void);
extern void walt_rotation_checkpoint(int nr_big);