summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorAndrea Righi <arighi@nvidia.com>2026-03-20 18:28:31 +0100
committerTejun Heo <tj@kernel.org>2026-03-21 08:31:16 -1000
commit2197cecdb02c57b08340059452540fcf101fa30d (patch)
tree31201f4f244cc5ade37b682691d484710e1269e4 /kernel
parentf6689792ffc4bc226636a513f8b0ac7bd45c5091 (diff)
sched_ext: idle: Prioritize idle SMT sibling
In the default built-in idle CPU selection policy, when @prev_cpu is busy and no fully idle core is available, try to place the task on its SMT sibling if that sibling is idle, before searching any other idle CPU in the same LLC. Migration to the sibling is cheap and keeps the task on the same core, preserving L1 cache and reducing wakeup latency. On large SMT systems this appears to consistently boost throughput by roughly 2-3% on CPU-bound workloads (running a number of tasks equal to the number of SMT cores). Cc: Cheng-Yang Chou <yphbchou0911@gmail.com> Signed-off-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/ext_idle.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index c7e405262697..d9596427b5aa 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -424,18 +424,24 @@ static inline bool task_affinity_all(const struct task_struct *p)
* - prefer the last used CPU to take advantage of cached data (L1, L2) and
* branch prediction optimizations.
*
- * 3. Pick a CPU within the same LLC (Last-Level Cache):
+ * 3. Prefer @prev_cpu's SMT sibling:
+ * - if @prev_cpu is busy and no fully idle core is available, try to
+ * place the task on an idle SMT sibling of @prev_cpu; keeping the
+ * task on the same core makes migration cheaper, preserves L1 cache
+ * locality and reduces wakeup latency.
+ *
+ * 4. Pick a CPU within the same LLC (Last-Level Cache):
* - if the above conditions aren't met, pick a CPU that shares the same
* LLC, if the LLC domain is a subset of @cpus_allowed, to maintain
* cache locality.
*
- * 4. Pick a CPU within the same NUMA node, if enabled:
+ * 5. Pick a CPU within the same NUMA node, if enabled:
* - choose a CPU from the same NUMA node, if the node cpumask is a
* subset of @cpus_allowed, to reduce memory access latency.
*
- * 5. Pick any idle CPU within the @cpus_allowed domain.
+ * 6. Pick any idle CPU within the @cpus_allowed domain.
*
- * Step 3 and 4 are performed only if the system has, respectively,
+ * Step 4 and 5 are performed only if the system has, respectively,
* multiple LLCs / multiple NUMA nodes (see scx_selcpu_topo_llc and
* scx_selcpu_topo_numa) and they don't contain the same subset of CPUs.
*
@@ -617,6 +623,18 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
}
/*
+ * Use @prev_cpu's sibling if it's idle.
+ */
+ if (sched_smt_active()) {
+ for_each_cpu_and(cpu, cpu_smt_mask(prev_cpu), allowed) {
+ if (cpu == prev_cpu)
+ continue;
+ if (scx_idle_test_and_clear_cpu(cpu))
+ goto out_unlock;
+ }
+ }
+
+ /*
* Search for any idle CPU in the same LLC domain.
*/
if (llc_cpus) {