sched/fair: Don’t push cfs_bandwith slack timers forward [Linux 5.3]

This Linux kernel change "sched/fair: Don’t push cfs_bandwith slack timers forward" is included in the Linux 5.3 release. This change is authored by bsegall [at] <bsegall [at]> on Thu Jun 6 10:21:01 2019 -0700. The commit for this change in Linux stable tree is 66567fc (patch).

sched/fair: Don't push cfs_bandwith slack timers forward

When a cfs_rq sleeps and returns its quota, we delay for 5ms before
waking any throttled cfs_rqs to coalesce with other cfs_rqs going to
sleep, as this has to be done outside of the rq lock we hold.

The current code waits for 5ms without any sleeps, instead of waiting
for 5ms from the first sleep, which can delay the unthrottle more than
we want. Switch this around so that we can't push this forward forever.

This requires an extra flag rather than using hrtimer_active, since we
need to start a new timer if the current one is in the process of

Signed-off-by: Ben Segall <>
Signed-off-by: Peter Zijlstra (Intel) <>
Reviewed-by: Xunlei Pang <>
Acked-by: Phil Auld <>
Cc: Linus Torvalds <>
Cc: Peter Zijlstra <>
Cc: Thomas Gleixner <>
Signed-off-by: Ingo Molnar <>

There are 15 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 kernel/sched/fair.c  | 7 +++++++
 kernel/sched/sched.h | 8 ++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4c8f45e..3c11dcd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4729,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
    if (runtime_refresh_within(cfs_b, min_left))

+   /* don't push forwards an existing deferred unthrottle */
+   if (cfs_b->slack_started)
+       return;
+   cfs_b->slack_started = true;
@@ -4782,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)

    /* confirm we're still not at a refresh boundary */
    raw_spin_lock_irqsave(&cfs_b->lock, flags);
+   cfs_b->slack_started = false;
    if (cfs_b->distribute_running) {
        raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
@@ -4945,6 +4951,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
    hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
    cfs_b->slack_timer.function = sched_cfs_slack_timer;
    cfs_b->distribute_running = 0;
+   cfs_b->slack_started = false;

 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 607859a..b08dee2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -338,8 +338,10 @@ struct cfs_bandwidth {
    u64         runtime_expires;
    int         expires_seq;

-   short           idle;
-   short           period_active;
+   u8          idle;
+   u8          period_active;
+   u8          distribute_running;
+   u8          slack_started;
    struct hrtimer      period_timer;
    struct hrtimer      slack_timer;
    struct list_head    throttled_cfs_rq;
@@ -348,8 +350,6 @@ struct cfs_bandwidth {
    int         nr_periods;
    int         nr_throttled;
    u64         throttled_time;
-   bool                    distribute_running;

Leave a Reply

Your email address will not be published. Required fields are marked *