diff options
| -rw-r--r-- | include/linux/sched.h | 1 | ||||
| -rw-r--r-- | kernel/sched/core.c | 18 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 36 | ||||
| -rw-r--r-- | kernel/sched/ext.c | 1 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 27 | ||||
| -rw-r--r-- | kernel/sched/syscalls.c | 32 |
6 files changed, 59 insertions, 56 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index d395f2810fac..da0133524d08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1874,7 +1874,6 @@ static inline int task_nice(const struct task_struct *p) extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int available_idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern void sched_set_fifo(struct task_struct *p); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 60afadb6eede..045f83ad261e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4950,9 +4950,13 @@ struct balance_callback *splice_balance_callbacks(struct rq *rq) return __splice_balance_callbacks(rq, true); } -static void __balance_callbacks(struct rq *rq) +void __balance_callbacks(struct rq *rq, struct rq_flags *rf) { + if (rf) + rq_unpin_lock(rq, rf); do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); + if (rf) + rq_repin_lock(rq, rf); } void balance_callbacks(struct rq *rq, struct balance_callback *head) @@ -4991,7 +4995,7 @@ static inline void finish_lock_switch(struct rq *rq) * prev into current: */ spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_); - __balance_callbacks(rq); + __balance_callbacks(rq, NULL); raw_spin_rq_unlock_irq(rq); } @@ -6867,7 +6871,7 @@ keep_resched: proxy_tag_curr(rq, next); rq_unpin_lock(rq, &rf); - __balance_callbacks(rq); + __balance_callbacks(rq, NULL); raw_spin_rq_unlock_irq(rq); } trace_sched_exit_tp(is_switch); @@ -7316,7 +7320,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) trace_sched_pi_setprio(p, pi_task); oldprio = p->prio; - if (oldprio == prio) + if (oldprio == prio && !dl_prio(prio)) queue_flag &= ~DEQUEUE_MOVE; prev_class = p->sched_class; @@ -7362,9 +7366,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) out_unlock: /* Caller holds task_struct::pi_lock, IRQs are still disabled */ - rq_unpin_lock(rq, &rf); - __balance_callbacks(rq); - rq_repin_lock(rq, &rf); + __balance_callbacks(rq, &rf); __task_rq_unlock(rq, p, &rf); } #endif /* CONFIG_RT_MUTEXES */ @@ -9124,6 +9126,8 @@ void sched_move_task(struct task_struct *tsk, bool for_autogroup) if (resched) resched_curr(rq); + + __balance_callbacks(rq, &rq_guard.rf); } static struct cgroup_subsys_state * diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 319439fe1870..c509f2e7d69d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -752,8 +752,6 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) struct dl_rq *dl_rq = dl_rq_of_se(dl_se); struct rq *rq = rq_of_dl_rq(dl_rq); - update_rq_clock(rq); - WARN_ON(is_dl_boosted(dl_se)); WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline)); @@ -1420,7 +1418,7 @@ update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, int static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec) { - bool idle = rq->curr == rq->idle; + bool idle = idle_rq(rq); s64 scaled_delta_exec; if (unlikely(delta_exec <= 0)) { @@ -1603,8 +1601,8 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) * | 8 | B:zero_laxity-wait | | | * | | | <---+ | * | +--------------------------------+ | - * | | ^ ^ 2 | - * | | 7 | 2 +--------------------+ + * | | ^ ^ 2 | + * | | 7 | 2, 1 +----------------+ * | v | * | +-------------+ | * +-- | C:idle-wait | -+ @@ -1649,8 +1647,11 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) * dl_defer_idle = 0 * * - * [1] A->B, A->D + * [1] A->B, A->D, C->B * dl_server_start() + * dl_defer_idle = 0; + * if (dl_server_active) + * return; // [B] * dl_server_active = 1; * enqueue_dl_entity() * update_dl_entity(WAKEUP) @@ -1759,6 +1760,7 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) * "B:zero_laxity-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"] * "B:zero_laxity-wait" -> "D:running" [label="3:dl_server_timer"] * "C:idle-wait" -> "A:init" [label="8:dl_server_timer"] + * "C:idle-wait" -> "B:zero_laxity-wait" [label="1:dl_server_start"] * "C:idle-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"] * "C:idle-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"] * "D:running" -> "A:init" [label="4:pick_task_dl"] @@ -1784,6 +1786,7 @@ void dl_server_start(struct sched_dl_entity *dl_se) { struct rq *rq = dl_se->rq; + dl_se->dl_defer_idle = 0; if (!dl_server(dl_se) || dl_se->dl_server_active) return; @@ -1834,6 +1837,7 @@ void sched_init_dl_servers(void) rq = cpu_rq(cpu); guard(rq_lock_irq)(rq); + update_rq_clock(rq); dl_se = &rq->fair_server; @@ -2210,7 +2214,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) update_dl_entity(dl_se); } else if (flags & ENQUEUE_REPLENISH) { replenish_dl_entity(dl_se); - } else if ((flags & ENQUEUE_RESTORE) && + } else if ((flags & ENQUEUE_MOVE) && !is_dl_boosted(dl_se) && dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_se(dl_se)))) { setup_new_dl_entity(dl_se); @@ -3154,7 +3158,7 @@ void dl_add_task_root_domain(struct task_struct *p) struct rq *rq; struct dl_bw *dl_b; unsigned int cpu; - struct cpumask *msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); + struct cpumask *msk; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); if (!dl_task(p) || dl_entity_is_special(&p->dl)) { @@ -3162,20 +3166,12 @@ void dl_add_task_root_domain(struct task_struct *p) return; } - /* - * Get an active rq, whose rq->rd traces the correct root - * domain. - * Ideally this would be under cpuset reader lock until rq->rd is - * fetched. However, sleepable locks cannot nest inside pi_lock, so we - * rely on the caller of dl_add_task_root_domain() holds 'cpuset_mutex' - * to guarantee the CPU stays in the cpuset. - */ + msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); dl_get_task_effective_cpus(p, msk); cpu = cpumask_first_and(cpu_active_mask, msk); BUG_ON(cpu >= nr_cpu_ids); rq = cpu_rq(cpu); dl_b = &rq->rd->dl_bw; - /* End of fetching rd */ raw_spin_lock(&dl_b->lock); __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span)); @@ -3299,6 +3295,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) static u64 get_prio_dl(struct rq *rq, struct task_struct *p) { + /* + * Make sure to update current so we don't return a stale value. + */ + if (task_current_donor(rq, p)) + update_curr_dl(rq); + return p->dl.deadline; } diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8f6d8d7f895c..afe28c04d5aa 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -545,6 +545,7 @@ static void scx_task_iter_start(struct scx_task_iter *iter) static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter) { if (iter->locked_task) { + __balance_callbacks(iter->rq, &iter->rf); task_rq_unlock(iter->rq, iter->locked_task, &iter->rf); iter->locked_task = NULL; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d30cca6870f5..93fce4bbff5e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1364,6 +1364,28 @@ static inline u32 sched_rng(void) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() raw_cpu_ptr(&runqueues) +static inline bool idle_rq(struct rq *rq) +{ + return rq->curr == rq->idle && !rq->nr_running && !rq->ttwu_pending; +} + +/** + * available_idle_cpu - is a given CPU idle for enqueuing work. + * @cpu: the CPU in question. + * + * Return: 1 if the CPU is currently idle. 0 otherwise. + */ +static inline bool available_idle_cpu(int cpu) +{ + if (!idle_rq(cpu_rq(cpu))) + return 0; + + if (vcpu_is_preempted(cpu)) + return 0; + + return 1; +} + #ifdef CONFIG_SCHED_PROXY_EXEC static inline void rq_set_donor(struct rq *rq, struct task_struct *t) { @@ -2366,7 +2388,8 @@ extern const u32 sched_prio_to_wmult[40]; * should preserve as much state as possible. * * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location - * in the runqueue. + * in the runqueue. IOW the priority is allowed to change. Callers + * must expect to deal with balance callbacks. * * NOCLOCK - skip the update_rq_clock() (avoids double updates) * @@ -3947,6 +3970,8 @@ extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags); extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags); extern struct balance_callback *splice_balance_callbacks(struct rq *rq); + +extern void __balance_callbacks(struct rq *rq, struct rq_flags *rf); extern void balance_callbacks(struct rq *rq, struct balance_callback *head); /* diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 0496dc29ed0f..6f10db3646e7 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -180,35 +180,7 @@ int task_prio(const struct task_struct *p) */ int idle_cpu(int cpu) { - struct rq *rq = cpu_rq(cpu); - - if (rq->curr != rq->idle) - return 0; - - if (rq->nr_running) - return 0; - - if (rq->ttwu_pending) - return 0; - - return 1; -} - -/** - * available_idle_cpu - is a given CPU idle for enqueuing work. - * @cpu: the CPU in question. - * - * Return: 1 if the CPU is currently idle. 0 otherwise. - */ -int available_idle_cpu(int cpu) -{ - if (!idle_cpu(cpu)) - return 0; - - if (vcpu_is_preempted(cpu)) - return 0; - - return 1; + return idle_rq(cpu_rq(cpu)); } /** @@ -667,7 +639,7 @@ change: * itself. */ newprio = rt_effective_prio(p, newprio); - if (newprio == oldprio) + if (newprio == oldprio && !dl_prio(newprio)) queue_flags &= ~DEQUEUE_MOVE; } |
