diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-13 09:36:45 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-13 09:36:45 -0700 |
| commit | 28483203f7d7fe4f123ed08266c381fac96b0701 (patch) | |
| tree | e7577f4be920dc08895245d0c3e3cf4d69049f01 /kernel | |
| parent | 599bbba5a36f6de57ab14c373c25881e2b5273f5 (diff) | |
| parent | 95c7d025cc8c3c6c41206e2a18332eb04878b7ef (diff) | |
Merge tag 'rcu.2026.03.31a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux
Pull RCU updates from Joel Fernandes:
"NOCB CPU management:
- Consolidate rcu_nocb_cpu_offload() and rcu_nocb_cpu_deoffload() to
reduce code duplication
- Extract nocb_bypass_needs_flush() helper to reduce duplication in
NOCB bypass path
rcutorture/torture infrastructure:
- Add NOCB01 config for RCU_LAZY torture testing
- Add NOCB02 config for NOCB poll mode testing
- Add TRIVIAL-PREEMPT config for textbook-style preemptible RCU
torture
- Test call_srcu() with preemption both disabled and enabled
- Remove kvm-check-branches.sh in favor of kvm-series.sh
- Make hangs more visible in torture.sh output
- Add informative message for tests without a recheck file
- Fix numeric test comparison in srcu_lockdep.sh
- Use torture_shutdown_init() in refscale and rcuscale instead of
open-coded shutdown functions
- Fix modulo-zero error in torture_hrtimeout_ns().
SRCU:
- Fix SRCU read flavor macro comments
- Fix s/they disables/they disable/ typo in srcu_read_unlock_fast()
RCU Tasks:
- Document that RCU Tasks Trace grace periods now imply RCU grace
periods
- Remove unnecessary smp_store_release() in cblist_init_generic()"
* tag 'rcu.2026.03.31a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux:
rcutorture: Test call_srcu() with preemption disabled and not
rcu: Add BOOTPARAM_RCU_STALL_PANIC Kconfig option
torture: Avoid modulo-zero error in torture_hrtimeout_ns()
rcu/nocb: Extract nocb_bypass_needs_flush() to reduce duplication
rcu/nocb: Consolidate rcu_nocb_cpu_offload/deoffload functions
rcu-tasks: Remove unnecessary smp_store_release() in cblist_init_generic()
rcutorture: Add NOCB02 config for nocb poll mode testing
rcutorture: Add NOCB01 config for RCU_LAZY torture testing
rcu-tasks: Document that RCU Tasks Trace grace periods now imply RCU grace periods
srcu: Fix s/they disables/they disable/ typo in srcu_read_unlock_fast()
srcu: Fix SRCU read flavor macro comments
rcuscale: Ditch rcu_scale_shutdown in favor of torture_shutdown_init()
refscale: Ditch ref_scale_shutdown in favor of torture_shutdown_init()
rcutorture: Fix numeric "test" comparison in srcu_lockdep.sh
torture: Print informative message for test without recheck file
torture: Make hangs more visible in torture.sh output
kvm-check-branches.sh: Remove in favor of kvm-series.sh
rcutorture: Add a textbook-style trivial preemptible RCU
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/rcu/Kconfig.debug | 11 | ||||
| -rw-r--r-- | kernel/rcu/rcu.h | 4 | ||||
| -rw-r--r-- | kernel/rcu/rcuscale.c | 78 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 64 | ||||
| -rw-r--r-- | kernel/rcu/refscale.c | 51 | ||||
| -rw-r--r-- | kernel/rcu/tasks.h | 6 | ||||
| -rw-r--r-- | kernel/rcu/tree_nocb.h | 121 | ||||
| -rw-r--r-- | kernel/rcu/update.c | 22 | ||||
| -rw-r--r-- | kernel/torture.c | 2 |
9 files changed, 211 insertions, 148 deletions
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 625d75392647..e078e988773d 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -228,4 +228,15 @@ config RCU_DYNTICKS_TORTURE This has no value for production and is only for testing. +config TRIVIAL_PREEMPT_RCU + bool "Textbook trivial preemptible RCU in rcutorture" + depends on RCU_EXPERT && RCU_TORTURE_TEST + default n + help + This option enables a textbook preemptible RCU that is + implemented in rcutorture. Its sole purpose is to validate + code used in books, papers, and presentations. + + This has no value for production and is only for testing. + endmenu # "RCU Debugging" diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 9b10b57b79ad..fa6d30ce73d1 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -691,4 +691,8 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v); static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; } #endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU +void synchronize_rcu_trivial_preempt(void); +#endif // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 4ac2b134a983..ac0b1c6b7dae 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -79,12 +79,6 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); * test-end checks, and the pair of calls through pointers. */ -#ifdef MODULE -# define RCUSCALE_SHUTDOWN 0 -#else -# define RCUSCALE_SHUTDOWN 1 -#endif - torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); @@ -92,8 +86,8 @@ torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); torture_param(int, minruntime, 0, "Minimum run time (s)"); torture_param(int, nreaders, -1, "Number of RCU reader threads"); torture_param(int, nwriters, -1, "Number of RCU updater threads"); -torture_param(bool, shutdown, RCUSCALE_SHUTDOWN, - "Shutdown at end of scalability tests."); +torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_RCU_SCALE_TEST) * 300, + "Shutdown at end of scalability tests or at specified timeout (s)."); torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable"); @@ -123,7 +117,6 @@ static int nrealreaders; static int nrealwriters; static struct task_struct **writer_tasks; static struct task_struct **reader_tasks; -static struct task_struct *shutdown_task; static u64 **writer_durations; static bool *writer_done; @@ -132,7 +125,6 @@ static int *writer_n_durations; static atomic_t n_rcu_scale_reader_started; static atomic_t n_rcu_scale_writer_started; static atomic_t n_rcu_scale_writer_finished; -static wait_queue_head_t shutdown_wq; static u64 t_rcu_scale_writer_started; static u64 t_rcu_scale_writer_finished; static unsigned long b_rcu_gp_test_started; @@ -519,6 +511,8 @@ static void rcu_scale_async_cb(struct rcu_head *rhp) rcu_scale_free(wmbp); } +static void rcu_scale_cleanup(void); + /* * RCU scale writer kthread. Repeatedly does a grace period. */ @@ -622,9 +616,11 @@ rcu_scale_writer(void *arg) b_rcu_gp_test_finished = cur_ops->get_gp_seq(); } - if (shutdown) { + if (shutdown_secs) { + writer_tasks[me] = NULL; smp_mb(); /* Assign before wake. */ - wake_up(&shutdown_wq); + rcu_scale_cleanup(); + kernel_power_off(); } } } @@ -668,8 +664,8 @@ static void rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag) { pr_alert("%s" SCALE_FLAG - "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n", - scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown); + "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown_secs=%d\n", + scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown_secs); } /* @@ -722,6 +718,8 @@ static void kfree_call_rcu(struct rcu_head *rh) kfree(obj); } +static void kfree_scale_cleanup(void); + static int kfree_scale_thread(void *arg) { @@ -791,9 +789,11 @@ kfree_scale_thread(void *arg) rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started), PAGES_TO_MB(mem_begin - mem_during)); - if (shutdown) { + if (shutdown_secs) { + kfree_reader_tasks[me] = NULL; smp_mb(); /* Assign before wake. */ - wake_up(&shutdown_wq); + kfree_scale_cleanup(); + kernel_power_off(); } } @@ -820,22 +820,6 @@ kfree_scale_cleanup(void) torture_cleanup_end(); } -/* - * shutdown kthread. Just waits to be awakened, then shuts down system. - */ -static int -kfree_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, - atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads); - - smp_mb(); /* Wake before output. */ - - kfree_scale_cleanup(); - kernel_power_off(); - return -EINVAL; -} - // Used if doing RCU-kfree'ing via call_rcu(). static unsigned long jiffies_at_lazy_cb; static struct rcu_head lazy_test1_rh; @@ -895,13 +879,10 @@ kfree_scale_init(void) kfree_nrealthreads = compute_real(kfree_nthreads); /* Start up the kthreads. */ - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(kfree_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, kfree_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n", @@ -1058,20 +1039,6 @@ rcu_scale_cleanup(void) torture_cleanup_end(); } -/* - * RCU scalability shutdown kthread. Just waits to be awakened, then shuts - * down system. - */ -static int -rcu_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); - smp_mb(); /* Wake before output. */ - rcu_scale_cleanup(); - kernel_power_off(); - return -EINVAL; -} - static int __init rcu_scale_init(void) { @@ -1121,13 +1088,10 @@ rcu_scale_init(void) /* Start up the kthreads. */ - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(rcu_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, rcu_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } reader_tasks = kzalloc_objs(reader_tasks[0], nrealreaders); if (reader_tasks == NULL) { @@ -1201,7 +1165,7 @@ rcu_scale_init(void) unwind: torture_init_end(); rcu_scale_cleanup(); - if (shutdown) { + if (shutdown_secs) { WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST)); kernel_power_off(); } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 8a9282a0245c..5f2848b828dc 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -842,7 +842,14 @@ static unsigned long srcu_torture_completed(void) static void srcu_torture_deferred_free(struct rcu_torture *rp) { + unsigned long flags; + bool lockit = jiffies & 0x1; + + if (lockit) + raw_spin_lock_irqsave(¤t->pi_lock, flags); call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb); + if (lockit) + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); } static void srcu_torture_synchronize(void) @@ -1061,6 +1068,61 @@ static struct rcu_torture_ops trivial_ops = { .name = "trivial" }; +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU + +/* + * Definitions for trivial CONFIG_PREEMPT=y torture testing. This + * implementation does not work well with large numbers of tasks or with + * long-term preemption. Either or both get you RCU CPU stall warnings. + */ + +static void rcu_sync_torture_init_trivial_preempt(void) +{ + rcu_sync_torture_init(); + if (WARN_ONCE(onoff_interval || shuffle_interval, "%s: Non-zero onoff_interval (%d) or shuffle_interval (%d) breaks trivial RCU, resetting to zero", __func__, onoff_interval, shuffle_interval)) { + onoff_interval = 0; + shuffle_interval = 0; + } +} + +static int rcu_torture_read_lock_trivial_preempt(void) +{ + struct task_struct *t = current; + + WRITE_ONCE(t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting + 1); + smp_mb(); + return 0; +} + +static void rcu_torture_read_unlock_trivial_preempt(int idx) +{ + struct task_struct *t = current; + + smp_store_release(&t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting - 1); +} + +static struct rcu_torture_ops trivial_preempt_ops = { + .ttype = RCU_TRIVIAL_FLAVOR, + .init = rcu_sync_torture_init_trivial_preempt, + .readlock = rcu_torture_read_lock_trivial_preempt, + .read_delay = rcu_read_delay, // just reuse rcu's version. + .readunlock = rcu_torture_read_unlock_trivial_preempt, + .readlock_held = torture_readlock_not_held, + .get_gp_seq = rcu_no_completed, + .sync = synchronize_rcu_trivial_preempt, + .exp_sync = synchronize_rcu_trivial_preempt, + .irq_capable = 0, // In theory it should be, but let's keep it trivial. + .name = "trivial-preempt" +}; + +#define TRIVIAL_PREEMPT_OPS &trivial_preempt_ops, + +#else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + +#define TRIVIAL_PREEMPT_OPS + +#endif // #else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + #ifdef CONFIG_TASKS_RCU /* @@ -4449,7 +4511,7 @@ rcu_torture_init(void) static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS - &trivial_ops, + &trivial_ops, TRIVIAL_PREEMPT_OPS }; if (!torture_init_begin(torture_type, verbose)) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index c158b6a947cd..a2d9d75d88a1 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -92,15 +92,9 @@ torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); torture_param(int, nruns, 30, "Number of experiments to run."); // Reader delay in nanoseconds, 0 for no delay. torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); - -#ifdef MODULE -# define REFSCALE_SHUTDOWN 0 -#else -# define REFSCALE_SHUTDOWN 1 -#endif - -torture_param(bool, shutdown, REFSCALE_SHUTDOWN, - "Shutdown at end of scalability tests."); +// Maximum shutdown delay in seconds, or zero for no shutdown. +torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_REPRO_TEST) * 300, + "Shutdown at end of scalability tests or at specified timeout (s)."); struct reader_task { struct task_struct *task; @@ -109,12 +103,8 @@ struct reader_task { u64 last_duration_ns; }; -static struct task_struct *shutdown_task; -static wait_queue_head_t shutdown_wq; - static struct task_struct *main_task; static wait_queue_head_t main_wq; -static int shutdown_start; static struct reader_task *reader_tasks; @@ -1357,6 +1347,8 @@ static u64 process_durations(int n) return sum; } +static void ref_scale_cleanup(void); + // The main_func is the main orchestrator, it performs a bunch of // experiments. For every experiment, it orders all the readers // involved to start and waits for them to finish the experiment. It @@ -1443,9 +1435,10 @@ static int main_func(void *arg) oom_exit: // This will shutdown everything including us. - if (shutdown) { - shutdown_start = 1; - wake_up(&shutdown_wq); + if (shutdown_secs) { + main_task = NULL; // Avoid self-kill deadlock. + ref_scale_cleanup(); + kernel_power_off(); } // Wait for torture to stop us @@ -1463,8 +1456,8 @@ static void ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag) { pr_alert("%s" SCALE_FLAG - "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, - verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); + "--- %s: verbose=%d verbose_batched=%d shutdown_secs=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, + verbose, verbose_batched, shutdown_secs, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); } static void @@ -1497,19 +1490,6 @@ ref_scale_cleanup(void) torture_cleanup_end(); } -// Shutdown kthread. Just waits to be awakened, then shuts down system. -static int -ref_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, shutdown_start); - - smp_mb(); // Wake before output. - ref_scale_cleanup(); - kernel_power_off(); - - return -EINVAL; -} - static int __init ref_scale_init(void) { @@ -1553,13 +1533,10 @@ ref_scale_init(void) ref_scale_print_module_parms(cur_ops, "Start of test"); // Shutdown task - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(ref_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, ref_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } // Reader tasks (default to ~75% of online CPUs). @@ -1604,7 +1581,7 @@ ref_scale_init(void) unwind: torture_init_end(); ref_scale_cleanup(); - if (shutdown) { + if (shutdown_secs) { WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); kernel_power_off(); } diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 2b55e6acf3c1..48f0d803c8e2 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -291,9 +291,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp) shift = ilog2(rcu_task_cpu_ids / lim); if (((rcu_task_cpu_ids - 1) >> shift) >= lim) shift++; - WRITE_ONCE(rtp->percpu_enqueue_shift, shift); - WRITE_ONCE(rtp->percpu_dequeue_lim, lim); - smp_store_release(&rtp->percpu_enqueue_lim, lim); + rtp->percpu_enqueue_shift = shift; + rtp->percpu_dequeue_lim = lim; + rtp->percpu_enqueue_lim = lim; pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d rcu_task_cpu_ids=%d.\n", rtp->name, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index b3337c7231cc..1047b30cd46b 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -379,6 +379,38 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) } /* + * Determine if the bypass queue needs to be flushed based on time and size. + * For lazy-only bypass queues, use the lazy flush timeout; otherwise flush + * based on jiffy advancement. The flush_faster controls flush aggressiveness. + */ +static bool nocb_bypass_needs_flush(struct rcu_data *rdp, long bypass_ncbs, + long lazy_ncbs, unsigned long j, + bool flush_faster) +{ + bool bypass_is_lazy; + unsigned long bypass_first; + unsigned long flush_timeout; + long qhimark_thresh; + + if (!bypass_ncbs) + return false; + + qhimark_thresh = flush_faster ? qhimark : 2 * qhimark; + if (bypass_ncbs >= qhimark_thresh) + return true; + + bypass_first = READ_ONCE(rdp->nocb_bypass_first); + bypass_is_lazy = (bypass_ncbs == lazy_ncbs); + + if (bypass_is_lazy) + flush_timeout = rcu_get_jiffies_lazy_flush(); + else + flush_timeout = flush_faster ? 0 : 1; + + return time_after(j, bypass_first + flush_timeout); +} + +/* * See whether it is appropriate to use the ->nocb_bypass list in order * to control contention on ->nocb_lock. A limited number of direct * enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass @@ -404,7 +436,8 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, unsigned long cur_gp_seq; unsigned long j = jiffies; long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); - bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); + long lazy_len = READ_ONCE(rdp->lazy_len); + bool bypass_is_lazy = (ncbs == lazy_len); lockdep_assert_irqs_disabled(); @@ -456,10 +489,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, // If ->nocb_bypass has been used too long or is too full, // flush ->nocb_bypass to ->cblist. - if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || - (ncbs && bypass_is_lazy && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) || - ncbs >= qhimark) { + if (nocb_bypass_needs_flush(rdp, ncbs, lazy_len, j, true)) { rcu_nocb_lock(rdp); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); @@ -673,15 +703,8 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); lazy_ncbs = READ_ONCE(rdp->lazy_len); - if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) || - bypass_ncbs > 2 * qhimark)) { - flush_bypass = true; - } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || - bypass_ncbs > 2 * qhimark)) { - flush_bypass = true; - } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { + flush_bypass = nocb_bypass_needs_flush(rdp, bypass_ncbs, lazy_ncbs, j, false); + if (!flush_bypass && !bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { rcu_nocb_unlock_irqrestore(rdp, flags); continue; /* No callbacks here, try next. */ } @@ -1081,30 +1104,6 @@ static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp) return 0; } -int rcu_nocb_cpu_deoffload(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - int ret = 0; - - cpus_read_lock(); - mutex_lock(&rcu_state.nocb_mutex); - if (rcu_rdp_is_offloaded(rdp)) { - if (!cpu_online(cpu)) { - ret = rcu_nocb_rdp_deoffload(rdp); - if (!ret) - cpumask_clear_cpu(cpu, rcu_nocb_mask); - } else { - pr_info("NOCB: Cannot CB-deoffload online CPU %d\n", rdp->cpu); - ret = -EINVAL; - } - } - mutex_unlock(&rcu_state.nocb_mutex); - cpus_read_unlock(); - - return ret; -} -EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); - static bool rcu_nocb_rdp_offload_wait_cond(struct rcu_data *rdp) { unsigned long flags; @@ -1149,28 +1148,52 @@ static int rcu_nocb_rdp_offload(struct rcu_data *rdp) return 0; } -int rcu_nocb_cpu_offload(int cpu) +/* Common helper for CPU offload/deoffload operations. */ +static int rcu_nocb_cpu_toggle_offload(int cpu, bool offload) { struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); int ret = 0; cpus_read_lock(); mutex_lock(&rcu_state.nocb_mutex); - if (!rcu_rdp_is_offloaded(rdp)) { - if (!cpu_online(cpu)) { - ret = rcu_nocb_rdp_offload(rdp); - if (!ret) - cpumask_set_cpu(cpu, rcu_nocb_mask); - } else { - pr_info("NOCB: Cannot CB-offload online CPU %d\n", rdp->cpu); - ret = -EINVAL; - } + + /* Already in desired state, nothing to do. */ + if (rcu_rdp_is_offloaded(rdp) == offload) + goto out_unlock; + + if (cpu_online(cpu)) { + pr_info("NOCB: Cannot CB-%soffload online CPU %d\n", + offload ? "" : "de", rdp->cpu); + ret = -EINVAL; + goto out_unlock; } + + if (offload) { + ret = rcu_nocb_rdp_offload(rdp); + if (!ret) + cpumask_set_cpu(cpu, rcu_nocb_mask); + } else { + ret = rcu_nocb_rdp_deoffload(rdp); + if (!ret) + cpumask_clear_cpu(cpu, rcu_nocb_mask); + } + +out_unlock: mutex_unlock(&rcu_state.nocb_mutex); cpus_read_unlock(); - return ret; } + +int rcu_nocb_cpu_deoffload(int cpu) +{ + return rcu_nocb_cpu_toggle_offload(cpu, false /* de-offload */); +} +EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); + +int rcu_nocb_cpu_offload(int cpu) +{ + return rcu_nocb_cpu_toggle_offload(cpu, true /* offload */); +} EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); #ifdef CONFIG_RCU_LAZY diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index d98a5c38e19c..b62735a67884 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -538,6 +538,28 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool do EXPORT_SYMBOL_GPL(torture_sched_setaffinity); #endif +#if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU) +// Trivial and stupid grace-period wait. Defined here so that lockdep +// kernels can find tasklist_lock. +void synchronize_rcu_trivial_preempt(void) +{ + struct task_struct *g; + struct task_struct *t; + + smp_mb(); // Order prior accesses before grace-period start. + rcu_read_lock(); // Protect task list. + for_each_process_thread(g, t) { + if (t == current) + continue; // Don't deadlock on ourselves! + // Order later rcu_read_lock() on other tasks after QS. + while (smp_load_acquire(&t->rcu_trivial_preempt_nesting)) + continue; + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_trivial_preempt); +#endif // #if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU) + int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful) EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers); diff --git a/kernel/torture.c b/kernel/torture.c index ec3370986976..62c1ac777694 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -93,7 +93,7 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode { ktime_t hto = baset_ns; - if (trsp) + if (trsp && fuzzt_ns) hto += torture_random(trsp) % fuzzt_ns; set_current_state(TASK_IDLE); return schedule_hrtimeout(&hto, mode); |
