summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 09:36:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 09:36:45 -0700
commit28483203f7d7fe4f123ed08266c381fac96b0701 (patch)
treee7577f4be920dc08895245d0c3e3cf4d69049f01 /kernel
parent599bbba5a36f6de57ab14c373c25881e2b5273f5 (diff)
parent95c7d025cc8c3c6c41206e2a18332eb04878b7ef (diff)
Merge tag 'rcu.2026.03.31a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux
Pull RCU updates from Joel Fernandes: "NOCB CPU management: - Consolidate rcu_nocb_cpu_offload() and rcu_nocb_cpu_deoffload() to reduce code duplication - Extract nocb_bypass_needs_flush() helper to reduce duplication in NOCB bypass path rcutorture/torture infrastructure: - Add NOCB01 config for RCU_LAZY torture testing - Add NOCB02 config for NOCB poll mode testing - Add TRIVIAL-PREEMPT config for textbook-style preemptible RCU torture - Test call_srcu() with preemption both disabled and enabled - Remove kvm-check-branches.sh in favor of kvm-series.sh - Make hangs more visible in torture.sh output - Add informative message for tests without a recheck file - Fix numeric test comparison in srcu_lockdep.sh - Use torture_shutdown_init() in refscale and rcuscale instead of open-coded shutdown functions - Fix modulo-zero error in torture_hrtimeout_ns(). SRCU: - Fix SRCU read flavor macro comments - Fix s/they disables/they disable/ typo in srcu_read_unlock_fast() RCU Tasks: - Document that RCU Tasks Trace grace periods now imply RCU grace periods - Remove unnecessary smp_store_release() in cblist_init_generic()" * tag 'rcu.2026.03.31a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux: rcutorture: Test call_srcu() with preemption disabled and not rcu: Add BOOTPARAM_RCU_STALL_PANIC Kconfig option torture: Avoid modulo-zero error in torture_hrtimeout_ns() rcu/nocb: Extract nocb_bypass_needs_flush() to reduce duplication rcu/nocb: Consolidate rcu_nocb_cpu_offload/deoffload functions rcu-tasks: Remove unnecessary smp_store_release() in cblist_init_generic() rcutorture: Add NOCB02 config for nocb poll mode testing rcutorture: Add NOCB01 config for RCU_LAZY torture testing rcu-tasks: Document that RCU Tasks Trace grace periods now imply RCU grace periods srcu: Fix s/they disables/they disable/ typo in srcu_read_unlock_fast() srcu: Fix SRCU read flavor macro comments rcuscale: Ditch rcu_scale_shutdown in favor of torture_shutdown_init() refscale: Ditch ref_scale_shutdown in favor of torture_shutdown_init() rcutorture: Fix numeric "test" comparison in srcu_lockdep.sh torture: Print informative message for test without recheck file torture: Make hangs more visible in torture.sh output kvm-check-branches.sh: Remove in favor of kvm-series.sh rcutorture: Add a textbook-style trivial preemptible RCU
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcu/Kconfig.debug11
-rw-r--r--kernel/rcu/rcu.h4
-rw-r--r--kernel/rcu/rcuscale.c78
-rw-r--r--kernel/rcu/rcutorture.c64
-rw-r--r--kernel/rcu/refscale.c51
-rw-r--r--kernel/rcu/tasks.h6
-rw-r--r--kernel/rcu/tree_nocb.h121
-rw-r--r--kernel/rcu/update.c22
-rw-r--r--kernel/torture.c2
9 files changed, 211 insertions, 148 deletions
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 625d75392647..e078e988773d 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -228,4 +228,15 @@ config RCU_DYNTICKS_TORTURE
This has no value for production and is only for testing.
+config TRIVIAL_PREEMPT_RCU
+ bool "Textbook trivial preemptible RCU in rcutorture"
+ depends on RCU_EXPERT && RCU_TORTURE_TEST
+ default n
+ help
+ This option enables a textbook preemptible RCU that is
+ implemented in rcutorture. Its sole purpose is to validate
+ code used in books, papers, and presentations.
+
+ This has no value for production and is only for testing.
+
endmenu # "RCU Debugging"
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 9b10b57b79ad..fa6d30ce73d1 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -691,4 +691,8 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v);
static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; }
#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
+#ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+void synchronize_rcu_trivial_preempt(void);
+#endif // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 4ac2b134a983..ac0b1c6b7dae 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -79,12 +79,6 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
* test-end checks, and the pair of calls through pointers.
*/
-#ifdef MODULE
-# define RCUSCALE_SHUTDOWN 0
-#else
-# define RCUSCALE_SHUTDOWN 1
-#endif
-
torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer");
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
@@ -92,8 +86,8 @@ torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
torture_param(int, minruntime, 0, "Minimum run time (s)");
torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, nwriters, -1, "Number of RCU updater threads");
-torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
- "Shutdown at end of scalability tests.");
+torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_RCU_SCALE_TEST) * 300,
+ "Shutdown at end of scalability tests or at specified timeout (s).");
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
@@ -123,7 +117,6 @@ static int nrealreaders;
static int nrealwriters;
static struct task_struct **writer_tasks;
static struct task_struct **reader_tasks;
-static struct task_struct *shutdown_task;
static u64 **writer_durations;
static bool *writer_done;
@@ -132,7 +125,6 @@ static int *writer_n_durations;
static atomic_t n_rcu_scale_reader_started;
static atomic_t n_rcu_scale_writer_started;
static atomic_t n_rcu_scale_writer_finished;
-static wait_queue_head_t shutdown_wq;
static u64 t_rcu_scale_writer_started;
static u64 t_rcu_scale_writer_finished;
static unsigned long b_rcu_gp_test_started;
@@ -519,6 +511,8 @@ static void rcu_scale_async_cb(struct rcu_head *rhp)
rcu_scale_free(wmbp);
}
+static void rcu_scale_cleanup(void);
+
/*
* RCU scale writer kthread. Repeatedly does a grace period.
*/
@@ -622,9 +616,11 @@ rcu_scale_writer(void *arg)
b_rcu_gp_test_finished =
cur_ops->get_gp_seq();
}
- if (shutdown) {
+ if (shutdown_secs) {
+ writer_tasks[me] = NULL;
smp_mb(); /* Assign before wake. */
- wake_up(&shutdown_wq);
+ rcu_scale_cleanup();
+ kernel_power_off();
}
}
}
@@ -668,8 +664,8 @@ static void
rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
{
pr_alert("%s" SCALE_FLAG
- "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n",
- scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown);
+ "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown_secs=%d\n",
+ scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown_secs);
}
/*
@@ -722,6 +718,8 @@ static void kfree_call_rcu(struct rcu_head *rh)
kfree(obj);
}
+static void kfree_scale_cleanup(void);
+
static int
kfree_scale_thread(void *arg)
{
@@ -791,9 +789,11 @@ kfree_scale_thread(void *arg)
rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started),
PAGES_TO_MB(mem_begin - mem_during));
- if (shutdown) {
+ if (shutdown_secs) {
+ kfree_reader_tasks[me] = NULL;
smp_mb(); /* Assign before wake. */
- wake_up(&shutdown_wq);
+ kfree_scale_cleanup();
+ kernel_power_off();
}
}
@@ -820,22 +820,6 @@ kfree_scale_cleanup(void)
torture_cleanup_end();
}
-/*
- * shutdown kthread. Just waits to be awakened, then shuts down system.
- */
-static int
-kfree_scale_shutdown(void *arg)
-{
- wait_event_idle(shutdown_wq,
- atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads);
-
- smp_mb(); /* Wake before output. */
-
- kfree_scale_cleanup();
- kernel_power_off();
- return -EINVAL;
-}
-
// Used if doing RCU-kfree'ing via call_rcu().
static unsigned long jiffies_at_lazy_cb;
static struct rcu_head lazy_test1_rh;
@@ -895,13 +879,10 @@ kfree_scale_init(void)
kfree_nrealthreads = compute_real(kfree_nthreads);
/* Start up the kthreads. */
- if (shutdown) {
- init_waitqueue_head(&shutdown_wq);
- firsterr = torture_create_kthread(kfree_scale_shutdown, NULL,
- shutdown_task);
+ if (shutdown_secs) {
+ firsterr = torture_shutdown_init(shutdown_secs, kfree_scale_cleanup);
if (torture_init_error(firsterr))
goto unwind;
- schedule_timeout_uninterruptible(1);
}
pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n",
@@ -1058,20 +1039,6 @@ rcu_scale_cleanup(void)
torture_cleanup_end();
}
-/*
- * RCU scalability shutdown kthread. Just waits to be awakened, then shuts
- * down system.
- */
-static int
-rcu_scale_shutdown(void *arg)
-{
- wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
- smp_mb(); /* Wake before output. */
- rcu_scale_cleanup();
- kernel_power_off();
- return -EINVAL;
-}
-
static int __init
rcu_scale_init(void)
{
@@ -1121,13 +1088,10 @@ rcu_scale_init(void)
/* Start up the kthreads. */
- if (shutdown) {
- init_waitqueue_head(&shutdown_wq);
- firsterr = torture_create_kthread(rcu_scale_shutdown, NULL,
- shutdown_task);
+ if (shutdown_secs) {
+ firsterr = torture_shutdown_init(shutdown_secs, rcu_scale_cleanup);
if (torture_init_error(firsterr))
goto unwind;
- schedule_timeout_uninterruptible(1);
}
reader_tasks = kzalloc_objs(reader_tasks[0], nrealreaders);
if (reader_tasks == NULL) {
@@ -1201,7 +1165,7 @@ rcu_scale_init(void)
unwind:
torture_init_end();
rcu_scale_cleanup();
- if (shutdown) {
+ if (shutdown_secs) {
WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST));
kernel_power_off();
}
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 8a9282a0245c..5f2848b828dc 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -842,7 +842,14 @@ static unsigned long srcu_torture_completed(void)
static void srcu_torture_deferred_free(struct rcu_torture *rp)
{
+ unsigned long flags;
+ bool lockit = jiffies & 0x1;
+
+ if (lockit)
+ raw_spin_lock_irqsave(&current->pi_lock, flags);
call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb);
+ if (lockit)
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags);
}
static void srcu_torture_synchronize(void)
@@ -1061,6 +1068,61 @@ static struct rcu_torture_ops trivial_ops = {
.name = "trivial"
};
+#ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
+/*
+ * Definitions for trivial CONFIG_PREEMPT=y torture testing. This
+ * implementation does not work well with large numbers of tasks or with
+ * long-term preemption. Either or both get you RCU CPU stall warnings.
+ */
+
+static void rcu_sync_torture_init_trivial_preempt(void)
+{
+ rcu_sync_torture_init();
+ if (WARN_ONCE(onoff_interval || shuffle_interval, "%s: Non-zero onoff_interval (%d) or shuffle_interval (%d) breaks trivial RCU, resetting to zero", __func__, onoff_interval, shuffle_interval)) {
+ onoff_interval = 0;
+ shuffle_interval = 0;
+ }
+}
+
+static int rcu_torture_read_lock_trivial_preempt(void)
+{
+ struct task_struct *t = current;
+
+ WRITE_ONCE(t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting + 1);
+ smp_mb();
+ return 0;
+}
+
+static void rcu_torture_read_unlock_trivial_preempt(int idx)
+{
+ struct task_struct *t = current;
+
+ smp_store_release(&t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting - 1);
+}
+
+static struct rcu_torture_ops trivial_preempt_ops = {
+ .ttype = RCU_TRIVIAL_FLAVOR,
+ .init = rcu_sync_torture_init_trivial_preempt,
+ .readlock = rcu_torture_read_lock_trivial_preempt,
+ .read_delay = rcu_read_delay, // just reuse rcu's version.
+ .readunlock = rcu_torture_read_unlock_trivial_preempt,
+ .readlock_held = torture_readlock_not_held,
+ .get_gp_seq = rcu_no_completed,
+ .sync = synchronize_rcu_trivial_preempt,
+ .exp_sync = synchronize_rcu_trivial_preempt,
+ .irq_capable = 0, // In theory it should be, but let's keep it trivial.
+ .name = "trivial-preempt"
+};
+
+#define TRIVIAL_PREEMPT_OPS &trivial_preempt_ops,
+
+#else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
+#define TRIVIAL_PREEMPT_OPS
+
+#endif // #else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU
+
#ifdef CONFIG_TASKS_RCU
/*
@@ -4449,7 +4511,7 @@ rcu_torture_init(void)
static struct rcu_torture_ops *torture_ops[] = {
&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
- &trivial_ops,
+ &trivial_ops, TRIVIAL_PREEMPT_OPS
};
if (!torture_init_begin(torture_type, verbose))
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index c158b6a947cd..a2d9d75d88a1 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -92,15 +92,9 @@ torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs.");
torture_param(int, nruns, 30, "Number of experiments to run.");
// Reader delay in nanoseconds, 0 for no delay.
torture_param(int, readdelay, 0, "Read-side delay in nanoseconds.");
-
-#ifdef MODULE
-# define REFSCALE_SHUTDOWN 0
-#else
-# define REFSCALE_SHUTDOWN 1
-#endif
-
-torture_param(bool, shutdown, REFSCALE_SHUTDOWN,
- "Shutdown at end of scalability tests.");
+// Maximum shutdown delay in seconds, or zero for no shutdown.
+torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_REPRO_TEST) * 300,
+ "Shutdown at end of scalability tests or at specified timeout (s).");
struct reader_task {
struct task_struct *task;
@@ -109,12 +103,8 @@ struct reader_task {
u64 last_duration_ns;
};
-static struct task_struct *shutdown_task;
-static wait_queue_head_t shutdown_wq;
-
static struct task_struct *main_task;
static wait_queue_head_t main_wq;
-static int shutdown_start;
static struct reader_task *reader_tasks;
@@ -1357,6 +1347,8 @@ static u64 process_durations(int n)
return sum;
}
+static void ref_scale_cleanup(void);
+
// The main_func is the main orchestrator, it performs a bunch of
// experiments. For every experiment, it orders all the readers
// involved to start and waits for them to finish the experiment. It
@@ -1443,9 +1435,10 @@ static int main_func(void *arg)
oom_exit:
// This will shutdown everything including us.
- if (shutdown) {
- shutdown_start = 1;
- wake_up(&shutdown_wq);
+ if (shutdown_secs) {
+ main_task = NULL; // Avoid self-kill deadlock.
+ ref_scale_cleanup();
+ kernel_power_off();
}
// Wait for torture to stop us
@@ -1463,8 +1456,8 @@ static void
ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag)
{
pr_alert("%s" SCALE_FLAG
- "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
- verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay);
+ "--- %s: verbose=%d verbose_batched=%d shutdown_secs=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
+ verbose, verbose_batched, shutdown_secs, holdoff, lookup_instances, loops, nreaders, nruns, readdelay);
}
static void
@@ -1497,19 +1490,6 @@ ref_scale_cleanup(void)
torture_cleanup_end();
}
-// Shutdown kthread. Just waits to be awakened, then shuts down system.
-static int
-ref_scale_shutdown(void *arg)
-{
- wait_event_idle(shutdown_wq, shutdown_start);
-
- smp_mb(); // Wake before output.
- ref_scale_cleanup();
- kernel_power_off();
-
- return -EINVAL;
-}
-
static int __init
ref_scale_init(void)
{
@@ -1553,13 +1533,10 @@ ref_scale_init(void)
ref_scale_print_module_parms(cur_ops, "Start of test");
// Shutdown task
- if (shutdown) {
- init_waitqueue_head(&shutdown_wq);
- firsterr = torture_create_kthread(ref_scale_shutdown, NULL,
- shutdown_task);
+ if (shutdown_secs) {
+ firsterr = torture_shutdown_init(shutdown_secs, ref_scale_cleanup);
if (torture_init_error(firsterr))
goto unwind;
- schedule_timeout_uninterruptible(1);
}
// Reader tasks (default to ~75% of online CPUs).
@@ -1604,7 +1581,7 @@ ref_scale_init(void)
unwind:
torture_init_end();
ref_scale_cleanup();
- if (shutdown) {
+ if (shutdown_secs) {
WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST));
kernel_power_off();
}
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 2b55e6acf3c1..48f0d803c8e2 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -291,9 +291,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
shift = ilog2(rcu_task_cpu_ids / lim);
if (((rcu_task_cpu_ids - 1) >> shift) >= lim)
shift++;
- WRITE_ONCE(rtp->percpu_enqueue_shift, shift);
- WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
- smp_store_release(&rtp->percpu_enqueue_lim, lim);
+ rtp->percpu_enqueue_shift = shift;
+ rtp->percpu_dequeue_lim = lim;
+ rtp->percpu_enqueue_lim = lim;
pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d rcu_task_cpu_ids=%d.\n",
rtp->name, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim),
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index b3337c7231cc..1047b30cd46b 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -379,6 +379,38 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
}
/*
+ * Determine if the bypass queue needs to be flushed based on time and size.
+ * For lazy-only bypass queues, use the lazy flush timeout; otherwise flush
+ * based on jiffy advancement. The flush_faster controls flush aggressiveness.
+ */
+static bool nocb_bypass_needs_flush(struct rcu_data *rdp, long bypass_ncbs,
+ long lazy_ncbs, unsigned long j,
+ bool flush_faster)
+{
+ bool bypass_is_lazy;
+ unsigned long bypass_first;
+ unsigned long flush_timeout;
+ long qhimark_thresh;
+
+ if (!bypass_ncbs)
+ return false;
+
+ qhimark_thresh = flush_faster ? qhimark : 2 * qhimark;
+ if (bypass_ncbs >= qhimark_thresh)
+ return true;
+
+ bypass_first = READ_ONCE(rdp->nocb_bypass_first);
+ bypass_is_lazy = (bypass_ncbs == lazy_ncbs);
+
+ if (bypass_is_lazy)
+ flush_timeout = rcu_get_jiffies_lazy_flush();
+ else
+ flush_timeout = flush_faster ? 0 : 1;
+
+ return time_after(j, bypass_first + flush_timeout);
+}
+
+/*
* See whether it is appropriate to use the ->nocb_bypass list in order
* to control contention on ->nocb_lock. A limited number of direct
* enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass
@@ -404,7 +436,8 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long cur_gp_seq;
unsigned long j = jiffies;
long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
- bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
+ long lazy_len = READ_ONCE(rdp->lazy_len);
+ bool bypass_is_lazy = (ncbs == lazy_len);
lockdep_assert_irqs_disabled();
@@ -456,10 +489,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// If ->nocb_bypass has been used too long or is too full,
// flush ->nocb_bypass to ->cblist.
- if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
- (ncbs && bypass_is_lazy &&
- (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) ||
- ncbs >= qhimark) {
+ if (nocb_bypass_needs_flush(rdp, ncbs, lazy_len, j, true)) {
rcu_nocb_lock(rdp);
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
@@ -673,15 +703,8 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
lazy_ncbs = READ_ONCE(rdp->lazy_len);
- if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
- (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) ||
- bypass_ncbs > 2 * qhimark)) {
- flush_bypass = true;
- } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
- (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
- bypass_ncbs > 2 * qhimark)) {
- flush_bypass = true;
- } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
+ flush_bypass = nocb_bypass_needs_flush(rdp, bypass_ncbs, lazy_ncbs, j, false);
+ if (!flush_bypass && !bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
rcu_nocb_unlock_irqrestore(rdp, flags);
continue; /* No callbacks here, try next. */
}
@@ -1081,30 +1104,6 @@ static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
return 0;
}
-int rcu_nocb_cpu_deoffload(int cpu)
-{
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
- int ret = 0;
-
- cpus_read_lock();
- mutex_lock(&rcu_state.nocb_mutex);
- if (rcu_rdp_is_offloaded(rdp)) {
- if (!cpu_online(cpu)) {
- ret = rcu_nocb_rdp_deoffload(rdp);
- if (!ret)
- cpumask_clear_cpu(cpu, rcu_nocb_mask);
- } else {
- pr_info("NOCB: Cannot CB-deoffload online CPU %d\n", rdp->cpu);
- ret = -EINVAL;
- }
- }
- mutex_unlock(&rcu_state.nocb_mutex);
- cpus_read_unlock();
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
-
static bool rcu_nocb_rdp_offload_wait_cond(struct rcu_data *rdp)
{
unsigned long flags;
@@ -1149,28 +1148,52 @@ static int rcu_nocb_rdp_offload(struct rcu_data *rdp)
return 0;
}
-int rcu_nocb_cpu_offload(int cpu)
+/* Common helper for CPU offload/deoffload operations. */
+static int rcu_nocb_cpu_toggle_offload(int cpu, bool offload)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
int ret = 0;
cpus_read_lock();
mutex_lock(&rcu_state.nocb_mutex);
- if (!rcu_rdp_is_offloaded(rdp)) {
- if (!cpu_online(cpu)) {
- ret = rcu_nocb_rdp_offload(rdp);
- if (!ret)
- cpumask_set_cpu(cpu, rcu_nocb_mask);
- } else {
- pr_info("NOCB: Cannot CB-offload online CPU %d\n", rdp->cpu);
- ret = -EINVAL;
- }
+
+ /* Already in desired state, nothing to do. */
+ if (rcu_rdp_is_offloaded(rdp) == offload)
+ goto out_unlock;
+
+ if (cpu_online(cpu)) {
+ pr_info("NOCB: Cannot CB-%soffload online CPU %d\n",
+ offload ? "" : "de", rdp->cpu);
+ ret = -EINVAL;
+ goto out_unlock;
}
+
+ if (offload) {
+ ret = rcu_nocb_rdp_offload(rdp);
+ if (!ret)
+ cpumask_set_cpu(cpu, rcu_nocb_mask);
+ } else {
+ ret = rcu_nocb_rdp_deoffload(rdp);
+ if (!ret)
+ cpumask_clear_cpu(cpu, rcu_nocb_mask);
+ }
+
+out_unlock:
mutex_unlock(&rcu_state.nocb_mutex);
cpus_read_unlock();
-
return ret;
}
+
+int rcu_nocb_cpu_deoffload(int cpu)
+{
+ return rcu_nocb_cpu_toggle_offload(cpu, false /* de-offload */);
+}
+EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
+
+int rcu_nocb_cpu_offload(int cpu)
+{
+ return rcu_nocb_cpu_toggle_offload(cpu, true /* offload */);
+}
EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
#ifdef CONFIG_RCU_LAZY
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index d98a5c38e19c..b62735a67884 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -538,6 +538,28 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool do
EXPORT_SYMBOL_GPL(torture_sched_setaffinity);
#endif
+#if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU)
+// Trivial and stupid grace-period wait. Defined here so that lockdep
+// kernels can find tasklist_lock.
+void synchronize_rcu_trivial_preempt(void)
+{
+ struct task_struct *g;
+ struct task_struct *t;
+
+ smp_mb(); // Order prior accesses before grace-period start.
+ rcu_read_lock(); // Protect task list.
+ for_each_process_thread(g, t) {
+ if (t == current)
+ continue; // Don't deadlock on ourselves!
+ // Order later rcu_read_lock() on other tasks after QS.
+ while (smp_load_acquire(&t->rcu_trivial_preempt_nesting))
+ continue;
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_trivial_preempt);
+#endif // #if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU)
+
int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful)
EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers);
diff --git a/kernel/torture.c b/kernel/torture.c
index ec3370986976..62c1ac777694 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -93,7 +93,7 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode
{
ktime_t hto = baset_ns;
- if (trsp)
+ if (trsp && fuzzt_ns)
hto += torture_random(trsp) % fuzzt_ns;
set_current_state(TASK_IDLE);
return schedule_hrtimeout(&hto, mode);