cpuidle: Fix cpu frequent exits from low power mode

Following upstream changes are reverted due cpu low
power mode use cases regressions.

'commit 55e591cc1879 ("BACKPORT: time: tick-sched: Reorganize idle tick management code")'
'commit f6d3093dfc66 ("BACKPORT: sched: idle: Do not stop the tick upfront in the idle loop")'
'commit 27e8616e4282 ("UPSTREAM: sched: idle: Do not stop the tick before cpuidle_idle_call()")'
'commit 8b468535dfdc ("UPSTREAM: jiffies: Introduce USER_TICK_USEC and redefine TICK_USEC")'
'commit 3a25735bd7ec ("UPSTREAM: cpuidle: Return nohz hint from cpuidle_select()")'
'commit f69cfc8ef98a ("BACKPORT: time: tick-sched: Split tick_nohz_stop_sched_tick()")'
'commit 6277dd586f11 ("BACKPORT: time: hrtimer: Introduce hrtimer_next_event_without()")'
'commit 8c71f69fb440 ("UPSTREAM: sched: idle: Select idle state before stopping the tick")'
'commit 30693a4f0909 ("UPSTREAM: cpuidle: menu: Refine idle state selection for running tick")'
'commit e32966ced86f ("UPSTREAM: cpuidle: menu: Avoid selecting shallow states with stopped tick")'

Also fix the compilation errrors as tick_nohz_get_sleep_length()
function signature is changed.

Change-Id: I21488a5a91f1eac11d4e139fd44968d52563717c
Signed-off-by: Prasad Sodagudi <psodagud@codeaurora.org>
This commit is contained in:
Prasad Sodagudi 2018-09-14 14:54:31 -07:00
parent 1e465bc92a
commit ae04811d41
15 changed files with 114 additions and 387 deletions

View File

@ -430,7 +430,6 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
* data back is to call:
*/
tick_nohz_idle_enter();
tick_nohz_idle_stop_tick_protected();
cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
}

View File

@ -264,18 +264,12 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
*
* @drv: the cpuidle driver
* @dev: the cpuidle device
* @stop_tick: indication on whether or not to stop the tick
*
* Returns the index of the idle state. The return value must not be negative.
*
* The memory location pointed to by @stop_tick is expected to be written the
* 'false' boolean value if the scheduler tick should not be stopped before
* entering the returned state.
*/
int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
return cpuidle_curr_governor->select(drv, dev, stop_tick);
return cpuidle_curr_governor->select(drv, dev);
}
/**

View File

@ -62,10 +62,9 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
* ladder_select_state - selects the next state to enter
* @drv: cpuidle driver
* @dev: the CPU
* @dummy: not used
*/
static int ladder_select_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, bool *dummy)
struct cpuidle_device *dev)
{
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct ladder_device_state *last_state;

View File

@ -123,7 +123,6 @@
struct menu_device {
int last_state_idx;
int needs_update;
int tick_wakeup;
unsigned int next_timer_us;
unsigned int predicted_us;
@ -285,10 +284,8 @@ again:
* menu_select - selects the next idle state to enter
* @drv: cpuidle driver containing state data
* @dev: the CPU
* @stop_tick: indication on whether or not to stop the tick
*/
static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
struct device *device = get_cpu_device(dev->cpu);
@ -300,7 +297,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
unsigned int expected_interval;
unsigned long nr_iowaiters, cpu_load;
int resume_latency = dev_pm_qos_raw_read_value(device);
ktime_t delta_next;
if (data->needs_update) {
menu_update(drv, dev);
@ -312,13 +308,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
latency_req = resume_latency;
/* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0)) {
*stop_tick = false;
if (unlikely(latency_req == 0))
return 0;
}
/* determine the expected residency time, round up */
data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
get_iowait_load(&nr_iowaiters, &cpu_load);
data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
@ -357,30 +351,14 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
*/
data->predicted_us = min(data->predicted_us, expected_interval);
if (tick_nohz_tick_stopped()) {
/*
* If the tick is already stopped, the cost of possible short
* idle duration misprediction is much higher, because the CPU
* may be stuck in a shallow idle state for a long time as a
* result of it. In that case say we might mispredict and try
* to force the CPU into a state for which we would have stopped
* the tick, unless a timer is going to expire really soon
* anyway.
*/
if (data->predicted_us < TICK_USEC)
data->predicted_us = min_t(unsigned int, TICK_USEC,
ktime_to_us(delta_next));
} else {
/*
* Use the performance multiplier and the user-configurable
* latency_req to determine the maximum exit latency.
*/
interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
if (latency_req > interactivity_req)
latency_req = interactivity_req;
}
/*
* Use the performance multiplier and the user-configurable
* latency_req to determine the maximum exit latency.
*/
interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
if (latency_req > interactivity_req)
latency_req = interactivity_req;
expected_interval = data->predicted_us;
/*
* Find the idle state with the lowest power while satisfying
* our constraints.
@ -396,52 +374,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
idx = i; /* first enabled state */
if (s->target_residency > data->predicted_us)
break;
if (s->exit_latency > latency_req) {
/*
* If we break out of the loop for latency reasons, use
* the target residency of the selected state as the
* expected idle duration so that the tick is retained
* as long as that target residency is low enough.
*/
expected_interval = drv->states[idx].target_residency;
if (s->exit_latency > latency_req)
break;
}
idx = i;
}
if (idx == -1)
idx = 0; /* No states enabled. Must use 0. */
/*
* Don't stop the tick if the selected state is a polling one or if the
* expected idle duration is shorter than the tick period length.
*/
if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
expected_interval < TICK_USEC) {
unsigned int delta_next_us = ktime_to_us(delta_next);
*stop_tick = false;
if (!tick_nohz_tick_stopped() && idx > 0 &&
drv->states[idx].target_residency > delta_next_us) {
/*
* The tick is not going to be stopped and the target
* residency of the state to be returned is not within
* the time until the next timer event including the
* tick, so try to correct that.
*/
for (i = idx - 1; i >= 0; i--) {
if (drv->states[i].disabled ||
dev->states_usage[i].disable)
continue;
idx = i;
if (drv->states[i].target_residency <= delta_next_us)
break;
}
}
}
data->last_state_idx = idx;
return data->last_state_idx;
@ -461,7 +402,6 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
data->last_state_idx = index;
data->needs_update = 1;
data->tick_wakeup = tick_nohz_idle_got_tick();
}
/**
@ -492,27 +432,14 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* assume the state was never reached and the exit latency is 0.
*/
if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
/*
* The nohz code said that there wouldn't be any events within
* the tick boundary (if the tick was stopped), but the idle
* duration predictor had a differing opinion. Since the CPU
* was woken up by a tick (that wasn't stopped after all), the
* predictor was not quite right, so assume that the CPU could
* have been idle long (but not forever) to help the idle
* duration predictor do a better job next time.
*/
measured_us = 9 * MAX_INTERESTING / 10;
} else {
/* measured value */
measured_us = cpuidle_get_last_residency(dev);
/* measured value */
measured_us = cpuidle_get_last_residency(dev);
/* Deduct exit latency */
if (measured_us > 2 * target->exit_latency)
measured_us -= target->exit_latency;
else
measured_us /= 2;
}
/* Deduct exit latency */
if (measured_us > 2 * target->exit_latency)
measured_us -= target->exit_latency;
else
measured_us /= 2;
/* Make sure our coefficients do not exceed unity */
if (measured_us > data->next_timer_us)

View File

@ -596,8 +596,7 @@ static int cpu_power_select(struct cpuidle_device *dev,
int best_level = 0;
uint32_t latency_us = pm_qos_request_for_cpu(PM_QOS_CPU_DMA_LATENCY,
dev->cpu);
ktime_t delta_next;
s64 sleep_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
s64 sleep_us = ktime_to_us(tick_nohz_get_sleep_length());
uint32_t modified_time_us = 0;
uint32_t next_event_us = 0;
int i, idx_restrict;
@ -1327,7 +1326,7 @@ static bool psci_enter_sleep(struct lpm_cpu *cpu, int idx, bool from_idle)
}
static int lpm_cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev, bool *stop_tick)
struct cpuidle_device *dev)
{
struct lpm_cpu *cpu = per_cpu(cpu_lpm, dev->cpu);

View File

@ -376,7 +376,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
* because generally mcdi responses are fast. After that, back off
* and poll once a jiffy (approximately)
*/
spins = USER_TICK_USEC;
spins = TICK_USEC;
finish = jiffies + MCDI_RPC_TIMEOUT;
while (1) {

View File

@ -131,8 +131,7 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv,
struct cpuidle_device *dev);
extern int cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev,
bool *stop_tick);
struct cpuidle_device *dev);
extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@ -164,7 +163,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{return true; }
static inline int cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev, bool *stop_tick)
struct cpuidle_device *dev)
{return -ENODEV; }
static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
@ -247,8 +246,7 @@ struct cpuidle_governor {
struct cpuidle_device *dev);
int (*select) (struct cpuidle_driver *drv,
struct cpuidle_device *dev,
bool *stop_tick);
struct cpuidle_device *dev);
void (*reflect) (struct cpuidle_device *dev, int index);
};

View File

@ -411,7 +411,6 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
}
extern u64 hrtimer_get_next_event(void);
extern u64 hrtimer_next_event_without(const struct hrtimer *exclude);
extern bool hrtimer_active(const struct hrtimer *timer);

View File

@ -62,11 +62,8 @@ extern int register_refined_jiffies(long clock_tick_rate);
/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
#ifndef __jiffy_arch_data
#define __jiffy_arch_data

View File

@ -114,45 +114,26 @@ enum tick_dep_bits {
#ifdef CONFIG_NO_HZ_COMMON
extern bool tick_nohz_enabled;
extern int tick_nohz_tick_stopped(void);
extern void tick_nohz_idle_stop_tick(void);
extern void tick_nohz_idle_retain_tick(void);
extern void tick_nohz_idle_restart_tick(void);
extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void);
extern bool tick_nohz_idle_got_tick(void);
extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
extern ktime_t tick_nohz_get_sleep_length(void);
extern unsigned long tick_nohz_get_idle_calls(void);
extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
static inline void tick_nohz_idle_stop_tick_protected(void)
{
local_irq_disable();
tick_nohz_idle_stop_tick();
local_irq_enable();
}
#else /* !CONFIG_NO_HZ_COMMON */
#define tick_nohz_enabled (0)
static inline int tick_nohz_tick_stopped(void) { return 0; }
static inline void tick_nohz_idle_stop_tick(void) { }
static inline void tick_nohz_idle_retain_tick(void) { }
static inline void tick_nohz_idle_restart_tick(void) { }
static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { }
static inline bool tick_nohz_idle_got_tick(void) { return false; }
static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
static inline ktime_t tick_nohz_get_sleep_length(void)
{
*delta_next = TICK_NSEC;
return *delta_next;
return NSEC_PER_SEC / HZ;
}
static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
static inline void tick_nohz_idle_stop_tick_protected(void) { }
#endif /* !CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL

View File

@ -147,15 +147,13 @@ static void cpuidle_idle_call(void)
}
/*
* The RCU framework needs to be told that we are entering an idle
* section, so no more rcu read side critical sections and one more
* Tell the RCU framework we are entering an idle section,
* so no more rcu read side critical sections and one more
* step to the grace period
*/
rcu_idle_enter();
if (cpuidle_not_available(drv, dev)) {
tick_nohz_idle_stop_tick();
rcu_idle_enter();
default_idle_call();
goto exit_idle;
}
@ -172,37 +170,20 @@ static void cpuidle_idle_call(void)
if (idle_should_enter_s2idle() || dev->use_deepest_state) {
if (idle_should_enter_s2idle()) {
rcu_idle_enter();
entered_state = cpuidle_enter_s2idle(drv, dev);
if (entered_state > 0) {
local_irq_enable();
goto exit_idle;
}
rcu_idle_exit();
}
tick_nohz_idle_stop_tick();
rcu_idle_enter();
next_state = cpuidle_find_deepest_state(drv, dev);
call_cpuidle(drv, dev, next_state);
} else {
bool stop_tick = true;
/*
* Ask the cpuidle framework to choose a convenient idle state.
*/
next_state = cpuidle_select(drv, dev, &stop_tick);
if (stop_tick)
tick_nohz_idle_stop_tick();
else
tick_nohz_idle_retain_tick();
rcu_idle_enter();
next_state = cpuidle_select(drv, dev);
entered_state = call_cpuidle(drv, dev, next_state);
/*
* Give the governor an opportunity to reflect on the outcome
@ -247,7 +228,6 @@ static void do_idle(void)
rmb();
if (cpu_is_offline(smp_processor_id())) {
tick_nohz_idle_stop_tick_protected();
cpuhp_report_idle_dead();
arch_cpu_idle_dead();
}
@ -261,12 +241,10 @@ static void do_idle(void)
* broadcast device expired for us, we don't want to go deep
* idle as we know that the IPI is going to arrive right away.
*/
if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
tick_nohz_idle_restart_tick();
if (cpu_idle_force_poll || tick_check_broadcast_expired())
cpu_idle_poll();
} else {
else
cpuidle_idle_call();
}
arch_cpu_idle_exit();
}

View File

@ -463,8 +463,7 @@ static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
#endif
}
static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base,
const struct hrtimer *exclude)
static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
{
struct hrtimer_clock_base *base = cpu_base->clock_base;
unsigned int active = cpu_base->active_bases;
@ -480,24 +479,9 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base,
next = timerqueue_getnext(&base->active);
timer = container_of(next, struct hrtimer, node);
if (timer == exclude) {
/* Get to the next timer in the queue. */
struct rb_node *rbn = rb_next(&next->node);
next = rb_entry_safe(rbn, struct timerqueue_node, node);
if (!next)
continue;
timer = container_of(next, struct hrtimer, node);
}
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
if (expires < expires_next) {
expires_next = expires;
/* Skip cpu_base update if a timer is being excluded. */
if (exclude)
continue;
hrtimer_update_next_timer(cpu_base, timer);
}
}
@ -576,7 +560,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
if (!cpu_base->hres_active)
return;
expires_next = __hrtimer_get_next_event(cpu_base, NULL);
expires_next = __hrtimer_get_next_event(cpu_base);
if (skip_equal && expires_next == cpu_base->expires_next)
return;
@ -1102,30 +1086,7 @@ u64 hrtimer_get_next_event(void)
raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (!__hrtimer_hres_active(cpu_base))
expires = __hrtimer_get_next_event(cpu_base, NULL);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
return expires;
}
/**
* hrtimer_next_event_without - time until next expiry event w/o one timer
* @exclude: timer to exclude
*
* Returns the next expiry time over all timers except for the @exclude one or
* KTIME_MAX if none of them is pending.
*/
u64 hrtimer_next_event_without(const struct hrtimer *exclude)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
u64 expires = KTIME_MAX;
unsigned long flags;
raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (__hrtimer_hres_active(cpu_base))
expires = __hrtimer_get_next_event(cpu_base, exclude);
expires = __hrtimer_get_next_event(cpu_base);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
@ -1367,7 +1328,7 @@ retry:
__hrtimer_run_queues(cpu_base, now);
/* Reevaluate the clock bases for the next expiry */
expires_next = __hrtimer_get_next_event(cpu_base, NULL);
expires_next = __hrtimer_get_next_event(cpu_base);
/*
* Store the new expiry value so the migration code can verify
* against it.

View File

@ -31,7 +31,7 @@
/* USER_HZ period (usecs): */
unsigned long tick_usec = USER_TICK_USEC;
unsigned long tick_usec = TICK_USEC;
/* SHIFTED_HZ period (nsecs): */
unsigned long tick_nsec;

View File

@ -569,11 +569,14 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
sched_clock_idle_wakeup_event();
}
static void tick_nohz_start_idle(struct tick_sched *ts)
static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
{
ts->idle_entrytime = ktime_get();
ktime_t now = ktime_get();
ts->idle_entrytime = now;
ts->idle_active = 1;
sched_clock_idle_sleep_event();
return now;
}
/**
@ -682,10 +685,13 @@ static inline bool local_timer_softirq_pending(void)
return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
}
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ktime_t now, int cpu)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
unsigned long seq, basejiff;
ktime_t tick;
/* Read jiffies and the time when jiffies were updated last */
do {
@ -694,7 +700,6 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
basejiff = jiffies;
} while (read_seqretry(&jiffies_lock, seq));
ts->last_jiffies = basejiff;
ts->timer_expires_base = basemono;
/*
* Keep the periodic tick, when RCU, architecture or irq_work
@ -739,20 +744,32 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* next period, so no point in stopping it either, bail.
*/
if (!ts->tick_stopped) {
ts->timer_expires = 0;
tick = 0;
goto out;
}
}
/*
* If this CPU is the one which had the do_timer() duty last, we limit
* the sleep time to the timekeeping max_deferment value.
* If this CPU is the one which updates jiffies, then give up
* the assignment and let it be taken by the CPU which runs
* the tick timer next, which might be this CPU as well. If we
* don't drop this here the jiffies might be stale and
* do_timer() never invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last. If this CPU
* is the one which had the do_timer() duty last, we limit the
* sleep time to the timekeeping max_deferment value.
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
if (cpu != tick_do_timer_cpu &&
(tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->do_timer_last = 1;
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
delta = KTIME_MAX;
ts->do_timer_last = 0;
} else if (!ts->do_timer_last) {
delta = KTIME_MAX;
}
#ifdef CONFIG_NO_HZ_FULL
/* Limit the tick delta to the maximum scheduler deferment */
@ -766,42 +783,14 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
else
expires = KTIME_MAX;
ts->timer_expires = min_t(u64, expires, next_tick);
out:
return ts->timer_expires;
}
static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
u64 basemono = ts->timer_expires_base;
u64 expires = ts->timer_expires;
ktime_t tick = expires;
/* Make sure we won't be trying to stop it twice in a row. */
ts->timer_expires_base = 0;
/*
* If this CPU is the one which updates jiffies, then give up
* the assignment and let it be taken by the CPU which runs
* the tick timer next, which might be this CPU as well. If we
* don't drop this here the jiffies might be stale and
* do_timer() never invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->do_timer_last = 1;
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
ts->do_timer_last = 0;
}
expires = min_t(u64, expires, next_tick);
tick = expires;
/* Skip reprogram of event if its not changed */
if (ts->tick_stopped && (expires == ts->next_tick)) {
/* Sanity check: make sure clockevent is actually programmed */
if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
return;
goto out;
WARN_ON_ONCE(1);
printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
@ -834,7 +823,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
if (unlikely(expires == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer);
return;
goto out;
}
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
@ -843,23 +832,16 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
hrtimer_set_expires(&ts->sched_timer, tick);
tick_program_event(tick, 1);
}
}
static void tick_nohz_retain_tick(struct tick_sched *ts)
{
ts->timer_expires_base = 0;
out:
/*
* Update the estimated sleep length until the next timer
* (not only the tick).
*/
ts->sleep_length = ktime_sub(dev->next_event, now);
return tick;
}
#ifdef CONFIG_NO_HZ_FULL
static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
{
if (tick_nohz_next_event(ts, cpu))
tick_nohz_stop_tick(ts, cpu);
else
tick_nohz_retain_tick(ts);
}
#endif /* CONFIG_NO_HZ_FULL */
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
@ -895,7 +877,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
return;
if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, cpu);
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get());
#endif
@ -921,8 +903,10 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
ts->sleep_length = NSEC_PER_SEC / HZ;
return false;
}
if (need_resched())
return false;
@ -957,70 +941,46 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return true;
}
static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
static void __tick_nohz_idle_enter(struct tick_sched *ts)
{
ktime_t expires;
ktime_t now, expires;
int cpu = smp_processor_id();
#ifdef CONFIG_SMP
if (check_pending_deferrable_timers(cpu))
raise_softirq_irqoff(TIMER_SOFTIRQ);
#endif
now = tick_nohz_start_idle(ts);
/*
* If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
* tick timer expiration time is known already.
*/
if (ts->timer_expires_base)
expires = ts->timer_expires;
else if (can_stop_idle_tick(cpu, ts))
expires = tick_nohz_next_event(ts, cpu);
else
return;
ts->idle_calls++;
if (expires > 0LL) {
if (can_stop_idle_tick(cpu, ts)) {
int was_stopped = ts->tick_stopped;
tick_nohz_stop_tick(ts, cpu);
ts->idle_calls++;
ts->idle_sleeps++;
ts->idle_expires = expires;
expires = tick_nohz_stop_sched_tick(ts, now, cpu);
if (expires > 0LL) {
ts->idle_sleeps++;
ts->idle_expires = expires;
}
if (!was_stopped && ts->tick_stopped) {
ts->idle_jiffies = ts->last_jiffies;
nohz_balance_enter_idle(cpu);
}
} else {
tick_nohz_retain_tick(ts);
}
}
/**
* tick_nohz_idle_stop_tick - stop the idle tick from the idle task
* tick_nohz_idle_enter - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
*/
void tick_nohz_idle_stop_tick(void)
{
__tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
}
void tick_nohz_idle_retain_tick(void)
{
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
/*
* Undo the effect of get_next_timer_interrupt() called from
* tick_nohz_next_event().
*/
timer_clear_idle();
}
/**
* tick_nohz_idle_enter - prepare for entering idle on the current CPU
*
* Called when we start the idle loop.
*
* The arch is responsible of calling:
*
* - rcu_idle_enter() after its last use of RCU before the CPU is put
* to sleep.
* - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
*/
void tick_nohz_idle_enter(void)
{
@ -1030,7 +990,7 @@ void tick_nohz_idle_enter(void)
/*
* Update the idle state in the scheduler domain hierarchy
* when tick_nohz_stop_tick() is called from the idle loop.
* when tick_nohz_stop_sched_tick() is called from the idle loop.
* State will be updated to busy during the first busy tick after
* exiting idle.
*/
@ -1039,11 +999,8 @@ void tick_nohz_idle_enter(void)
local_irq_disable();
ts = this_cpu_ptr(&tick_cpu_sched);
WARN_ON_ONCE(ts->timer_expires_base);
ts->inidle = 1;
tick_nohz_start_idle(ts);
__tick_nohz_idle_enter(ts);
local_irq_enable();
}
@ -1061,62 +1018,21 @@ void tick_nohz_irq_exit(void)
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->inidle)
tick_nohz_start_idle(ts);
__tick_nohz_idle_enter(ts);
else
tick_nohz_full_update_tick(ts);
}
/**
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
*/
bool tick_nohz_idle_got_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->inidle > 1) {
ts->inidle = 1;
return true;
}
return false;
}
/**
* tick_nohz_get_sleep_length - return the expected length of the current sleep
* @delta_next: duration until the next event if the tick cannot be stopped
* tick_nohz_get_sleep_length - return the length of the current sleep
*
* Called from power state control code with interrupts disabled
*/
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
ktime_t tick_nohz_get_sleep_length(void)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
int cpu = smp_processor_id();
/*
* The idle entry time is expected to be a sufficient approximation of
* the current time at this point.
*/
ktime_t now = ts->idle_entrytime;
ktime_t next_event;
WARN_ON_ONCE(!ts->inidle);
*delta_next = ktime_sub(dev->next_event, now);
if (!can_stop_idle_tick(cpu, ts))
return *delta_next;
next_event = tick_nohz_next_event(ts, cpu);
if (!next_event)
return *delta_next;
/*
* If the next highres timer to expire is earlier than next_event, the
* idle governor needs to know that.
*/
next_event = min_t(u64, next_event,
hrtimer_next_event_without(&ts->sched_timer));
return ktime_sub(next_event, now);
return ts->sleep_length;
}
/**
@ -1165,20 +1081,6 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#endif
}
static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
{
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->tick_stopped)
__tick_nohz_idle_restart_tick(ts, ktime_get());
}
/**
* tick_nohz_idle_exit - restart the idle tick from the idle task
*
@ -1194,7 +1096,6 @@ void tick_nohz_idle_exit(void)
local_irq_disable();
WARN_ON_ONCE(!ts->inidle);
WARN_ON_ONCE(ts->timer_expires_base);
ts->inidle = 0;
@ -1204,8 +1105,10 @@ void tick_nohz_idle_exit(void)
if (ts->idle_active)
tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped)
__tick_nohz_idle_restart_tick(ts, now);
if (ts->tick_stopped) {
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
local_irq_enable();
}
@ -1219,9 +1122,6 @@ static void tick_nohz_handler(struct clock_event_device *dev)
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
if (ts->inidle)
ts->inidle = 2;
dev->next_event = KTIME_MAX;
tick_sched_do_timer(now);
@ -1330,9 +1230,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
if (ts->inidle)
ts->inidle = 2;
tick_sched_do_timer(now);
/*

View File

@ -38,8 +38,7 @@ enum tick_nohz_mode {
* @idle_exittime: Time when the idle state was left
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @sleep_length: Duration of the current idle sleep
* @do_timer_lst: CPU was the last one doing do_timer before going idle
*/
struct tick_sched {
@ -59,9 +58,8 @@ struct tick_sched {
ktime_t idle_exittime;
ktime_t idle_sleeptime;
ktime_t iowait_sleeptime;
ktime_t sleep_length;
unsigned long last_jiffies;
u64 timer_expires;
u64 timer_expires_base;
u64 next_timer;
ktime_t idle_expires;
int do_timer_last;