From ea7c716a24aebe887e0990649ab697bd698cc325 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Refuse cross-task select_cpu_from_kfunc calls select_cpu_from_kfunc() skipped pi_lock for @p when called from ops.select_cpu() or another rq-locked SCX op, assuming the held lock protects @p. scx_bpf_select_cpu_dfl() / __scx_bpf_select_cpu_and() accept an arbitrary KF_RCU task_struct, so a caller in e.g. ops.select_cpu(p1) or ops.enqueue(p1) can pass some other p2 - the held pi_lock / rq lock is p1's, not p2's - and reading p2->cpus_ptr / nr_cpus_allowed races with set_cpus_allowed_ptr() and migrate_disable_switch() on another CPU. Abort the scheduler on cross-task calls in both branches: for ops.select_cpu() use scx_kf_arg_task_ok() to verify @p is the wake-up task recorded in current->scx.kf_tasks[] by SCX_CALL_OP_TASK_RET(); for other rq-locked SCX ops compare task_rq(p) against scx_locked_rq(). v2: Switch the in_select_cpu cross-task check from direct_dispatch_task comparison to scx_kf_arg_task_ok(). The former spuriously rejects when ops.select_cpu() calls scx_bpf_dsq_insert() first, then calls scx_bpf_select_cpu_*() on the same task. (Andrea Righi) Fixes: 0022b328504d ("sched_ext: Decouple kfunc unlocked-context check from kf_mask") Reported-by: Chris Mason Signed-off-by: Tejun Heo Cc: Andrea Righi --- kernel/sched/ext_idle.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index c43d62d90e40..7468560a6d80 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -927,14 +927,24 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p, * Accessing p->cpus_ptr / p->nr_cpus_allowed needs either @p's rq * lock or @p's pi_lock. Three cases: * - * - inside ops.select_cpu(): try_to_wake_up() holds @p's pi_lock. + * - inside ops.select_cpu(): try_to_wake_up() holds the wake-up + * task's pi_lock; the wake-up task is recorded in kf_tasks[0] + * by SCX_CALL_OP_TASK_RET(). * - other rq-locked SCX op: scx_locked_rq() points at the held rq. * - truly unlocked (UNLOCKED ops, SYSCALL, non-SCX struct_ops): * nothing held, take pi_lock ourselves. + * + * In the first two cases, BPF schedulers may pass an arbitrary task + * that the held lock doesn't cover. Refuse those. */ if (this_rq()->scx.in_select_cpu) { + if (!scx_kf_arg_task_ok(sch, p)) + return -EINVAL; lockdep_assert_held(&p->pi_lock); - } else if (!scx_locked_rq()) { + } else if (scx_locked_rq()) { + if (task_rq(p) != scx_locked_rq()) + goto cross_task; + } else { raw_spin_lock_irqsave(&p->pi_lock, irq_flags); we_locked = true; } @@ -960,6 +970,11 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p, raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); return cpu; + +cross_task: + scx_error(sch, "select_cpu kfunc called cross-task on %s[%d]", + p->comm, p->pid); + return -EINVAL; } /** -- cgit v1.2.3