hrtimer: Use and report correct timerslack values for realtime tasks

The timerslack_ns setting is used to specify how much the hardware
timers should be delayed, to potentially dispatch multiple timers in a
single interrupt. This is a performance optimization. Timers of
realtime tasks (having a realtime scheduling policy) should not be
delayed.

This logic was inconsitently applied to the hrtimers, leading to delays
of realtime tasks which used timed waits for events (e.g. condition
variables). Due to the downstream override of the slack for rt tasks,
the procfs reported incorrect (non-zero) timerslack_ns values.

This is changed by setting the timer_slack_ns task attribute to 0 for
all tasks with a rt policy. By that, downstream users do not need to
specially handle rt tasks (w.r.t. the slack), and the procfs entry
shows the correct value of "0". Setting non-zero slack values (either
via procfs or PR_SET_TIMERSLACK) on tasks with a rt policy is ignored,
as stated in "man 2 PR_SET_TIMERSLACK":

  Timer slack is not applied to threads that are scheduled under a
  real-time scheduling policy (see sched_setscheduler(2)).

The special handling of timerslack on rt tasks in downstream users
is removed as well.

Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240814121032.368444-2-felix.moessbauer@siemens.com
[Sultan Alsawaf: backport to 6.1]
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
Change-Id: I8b2c81ffdeea181ab729935de71d1b0131c16ffc
Signed-off-by: Richard Raya <rdxzv.dev@gmail.com>
This commit is contained in:
Felix Moessbauer 2024-08-14 14:10:32 +02:00 committed by Richard Raya
parent 84a3e4b166
commit d3cbc3f787
5 changed files with 20 additions and 17 deletions

View File

@ -2540,9 +2540,10 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
}
task_lock(p);
if (slack_ns == 0)
p->timer_slack_ns = p->default_timer_slack_ns;
else
if (task_is_realtime(p))
slack_ns = 0;
else if (slack_ns == 0)
slack_ns = p->default_timer_slack_ns;
p->timer_slack_ns = slack_ns;
task_unlock(p);

View File

@ -76,19 +76,16 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
{
u64 ret;
struct timespec64 now;
u64 slack = current->timer_slack_ns;
/*
* Realtime tasks get a slack of 0 for obvious reasons.
*/
if (rt_task(current))
if (slack == 0)
return 0;
ktime_get_ts64(&now);
now = timespec64_sub(*tv, now);
ret = __estimate_accuracy(&now);
if (ret < current->timer_slack_ns)
return current->timer_slack_ns;
if (ret < slack)
return slack;
return ret;
}

View File

@ -5195,6 +5195,14 @@ static void __setscheduler_params(struct task_struct *p,
else if (fair_policy(policy))
p->static_prio = NICE_TO_PRIO(attr->sched_nice);
/* rt-policy tasks do not have a timerslack */
if (task_is_realtime(p)) {
p->timer_slack_ns = 0;
} else if (p->timer_slack_ns == 0) {
/* when switching back to non-rt policy, restore timerslack */
p->timer_slack_ns = p->default_timer_slack_ns;
}
/*
* __sched_setscheduler() ensures attr->sched_priority == 0 when
* !rt_policy. Always setting this ensures that things like

View File

@ -2475,6 +2475,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = current->timer_slack_ns;
break;
case PR_SET_TIMERSLACK:
if (task_is_realtime(current))
break;
if (arg2 <= 0)
current->timer_slack_ns =
current->default_timer_slack_ns;

View File

@ -1965,14 +1965,9 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
struct restart_block *restart;
struct hrtimer_sleeper t;
int ret = 0;
u64 slack;
slack = current->timer_slack_ns;
if (dl_task(current) || rt_task(current))
slack = 0;
hrtimer_init_sleeper_on_stack(&t, clockid, mode);
hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), current->timer_slack_ns);
ret = do_nanosleep(&t, mode);
if (ret != -ERESTART_RESTARTBLOCK)
goto out;