diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-15 17:15:18 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-15 17:15:18 -0700 |
| commit | fdbfee9fc56e13a1307868829d438ad66ab308a4 (patch) | |
| tree | 228c066fa11b1fdf44cff5d2df75c597580e5993 /kernel | |
| parent | 5ed19574ebf0ba857c8a0d3d80ee409ff9498363 (diff) | |
| parent | 00f0dadde8c5036fe6462621a6920549036dce70 (diff) | |
Merge tag 'trace-rv-v7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull runtime verification updates from Steven Rostedt:
- Refactor da_monitor header to share handlers across monitor types
No functional changes, only less code duplication.
- Add Hybrid Automata model class
Add a new model class that extends deterministic automata by adding
constraints on transitions and states. Those constraints can take
into account wall-clock time and as such allow RV monitor to make
assertions on real time. Add documentation and code generation
scripts.
- Add stall monitor as hybrid automaton example
Add a monitor that triggers a violation when a task is stalling as an
example of automaton working with real time variables.
- Convert the opid monitor to a hybrid automaton
The opid monitor can be heavily simplified if written as a hybrid
automaton: instead of tracking preempt and interrupt enable/disable
events, it can just run constraints on the preemption/interrupt
states when events like wakeup and need_resched verify.
- Add support for per-object monitors in DA/HA
Allow writing deterministic and hybrid automata monitors for generic
objects (e.g. any struct), by exploiting a hash table where objects
are saved. This allows to track more than just tasks in RV. For
instance it will be used to track deadline entities in deadline
monitors.
- Add deadline tracepoints and move some deadline utilities
Prepare the ground for deadline monitors by defining events and
exporting helpers.
- Add nomiss deadline monitor
Add first example of deadline monitor asserting all entities complete
before their deadline.
- Improve rvgen error handling
Introduce AutomataError exception class and better handle expected
exceptions while showing a backtrace for unexpected ones.
- Improve python code quality in rvgen
Refactor the rvgen generation scripts to align with python best
practices: use f-strings instead of %, use len() instead of
__len__(), remove semicolons, use context managers for file
operations, fix whitespace violations, extract magic strings into
constants, remove unused imports and methods.
- Fix small bugs in rvgen
The generator scripts presented some corner case bugs: logical error
in validating what a correct dot file looks like, fix an isinstance()
check, enforce a dot file has an initial state, fix type annotations
and typos in comments.
- rvgen refactoring
Refactor automata.py to use iterator-based parsing and handle
required arguments directly in argparse.
- Allow epoll in rtapp-sleep monitor
The epoll_wait call is now rt-friendly so it should be allowed in the
sleep monitor as a valid sleep method.
* tag 'trace-rv-v7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (32 commits)
rv: Allow epoll in rtapp-sleep monitor
rv/rvgen: fix _fill_states() return type annotation
rv/rvgen: fix unbound loop variable warning
rv/rvgen: enforce presence of initial state
rv/rvgen: extract node marker string to class constant
rv/rvgen: fix isinstance check in Variable.expand()
rv/rvgen: make monitor arguments required in rvgen
rv/rvgen: remove unused __get_main_name method
rv/rvgen: remove unused sys import from dot2c
rv/rvgen: refactor automata.py to use iterator-based parsing
rv/rvgen: use class constant for init marker
rv/rvgen: fix DOT file validation logic error
rv/rvgen: fix PEP 8 whitespace violations
rv/rvgen: fix typos in automata and generator docstring and comments
rv/rvgen: use context managers for file operations
rv/rvgen: remove unnecessary semicolons
rv/rvgen: replace __len__() calls with len()
rv/rvgen: replace % string formatting with f-strings
rv/rvgen: remove bare except clauses in generator
rv/rvgen: introduce AutomataError exception class
...
Diffstat (limited to 'kernel')
22 files changed, 1205 insertions, 226 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8952f5764517..da20fb6ea25a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -122,6 +122,11 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_entry_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_exit_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_set_need_resched_tp); +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_dl_throttle_tp); +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_dl_replenish_tp); +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_dl_update_tp); +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_dl_server_start_tp); +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_dl_server_stop_tp); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); DEFINE_PER_CPU(struct rnd_state, sched_rnd_state); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 0b8de7156f38..edca7849b165 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -18,6 +18,7 @@ #include <linux/cpuset.h> #include <linux/sched/clock.h> +#include <linux/sched/deadline.h> #include <uapi/linux/sched/types.h> #include "sched.h" #include "pelt.h" @@ -57,17 +58,6 @@ static int __init sched_dl_sysctl_init(void) late_initcall(sched_dl_sysctl_init); #endif /* CONFIG_SYSCTL */ -static bool dl_server(struct sched_dl_entity *dl_se) -{ - return dl_se->dl_server; -} - -static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) -{ - BUG_ON(dl_server(dl_se)); - return container_of(dl_se, struct task_struct, dl); -} - static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq) { return container_of(dl_rq, struct rq, dl); @@ -115,6 +105,19 @@ static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) } #endif /* !CONFIG_RT_MUTEXES */ +static inline u8 dl_get_type(struct sched_dl_entity *dl_se, struct rq *rq) +{ + if (!dl_server(dl_se)) + return DL_TASK; + if (dl_se == &rq->fair_server) + return DL_SERVER_FAIR; +#ifdef CONFIG_SCHED_CLASS_EXT + if (dl_se == &rq->ext_server) + return DL_SERVER_EXT; +#endif + return DL_OTHER; +} + static inline struct dl_bw *dl_bw_of(int i) { RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), @@ -733,6 +736,7 @@ static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se, dl_se->dl_throttled = 1; dl_se->dl_defer_armed = 1; } + trace_sched_dl_replenish_tp(dl_se, cpu_of(rq), dl_get_type(dl_se, rq)); } /* @@ -848,6 +852,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) if (dl_se->dl_throttled) dl_se->dl_throttled = 0; + trace_sched_dl_replenish_tp(dl_se, cpu_of(rq), dl_get_type(dl_se, rq)); + /* * If this is the replenishment of a deferred reservation, * clear the flag and return. @@ -975,22 +981,6 @@ update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq) } /* - * Regarding the deadline, a task with implicit deadline has a relative - * deadline == relative period. A task with constrained deadline has a - * relative deadline <= relative period. - * - * We support constrained deadline tasks. However, there are some restrictions - * applied only for tasks which do not have an implicit deadline. See - * update_dl_entity() to know more about such restrictions. - * - * The dl_is_implicit() returns true if the task has an implicit deadline. - */ -static inline bool dl_is_implicit(struct sched_dl_entity *dl_se) -{ - return dl_se->dl_deadline == dl_se->dl_period; -} - -/* * When a deadline entity is placed in the runqueue, its runtime and deadline * might need to be updated. This is done by a CBS wake up rule. There are two * different rules: 1) the original CBS; and 2) the Revisited CBS. @@ -1345,6 +1335,7 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se))) return; + trace_sched_dl_throttle_tp(dl_se, cpu_of(rq), dl_get_type(dl_se, rq)); dl_se->dl_throttled = 1; if (dl_se->runtime > 0) dl_se->runtime = 0; @@ -1508,6 +1499,7 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 throttle: if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { + trace_sched_dl_throttle_tp(dl_se, cpu_of(rq), dl_get_type(dl_se, rq)); dl_se->dl_throttled = 1; /* If requested, inform the user about runtime overruns. */ @@ -1532,6 +1524,8 @@ throttle: if (!is_leftmost(dl_se, &rq->dl)) resched_curr(rq); + } else { + trace_sched_dl_update_tp(dl_se, cpu_of(rq), dl_get_type(dl_se, rq)); } /* @@ -1810,6 +1804,7 @@ void dl_server_start(struct sched_dl_entity *dl_se) if (WARN_ON_ONCE(!cpu_online(cpu_of(rq)))) return; + trace_sched_dl_server_start_tp(dl_se, cpu_of(rq), dl_get_type(dl_se, rq)); dl_se->dl_server_active = 1; enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) @@ -1821,6 +1816,8 @@ void dl_server_stop(struct sched_dl_entity *dl_se) if (!dl_server(dl_se) || !dl_server_active(dl_se)) return; + trace_sched_dl_server_stop_tp(dl_se, cpu_of(dl_se->rq), + dl_get_type(dl_se, dl_se->rq)); dequeue_dl_entity(dl_se, DEQUEUE_SLEEP); hrtimer_try_to_cancel(&dl_se->dl_timer); dl_se->dl_defer_armed = 0; diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index 5b4be87ba59d..3884b14df375 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -23,6 +23,19 @@ config LTL_MON_EVENTS_ID config RV_LTL_MONITOR bool +config RV_HA_MONITOR + bool + +config HA_MON_EVENTS_IMPLICIT + select DA_MON_EVENTS_IMPLICIT + select RV_HA_MONITOR + bool + +config HA_MON_EVENTS_ID + select DA_MON_EVENTS_ID + select RV_HA_MONITOR + bool + menuconfig RV bool "Runtime Verification" select TRACING @@ -65,6 +78,11 @@ source "kernel/trace/rv/monitors/pagefault/Kconfig" source "kernel/trace/rv/monitors/sleep/Kconfig" # Add new rtapp monitors here +source "kernel/trace/rv/monitors/stall/Kconfig" +source "kernel/trace/rv/monitors/deadline/Kconfig" +source "kernel/trace/rv/monitors/nomiss/Kconfig" +# Add new deadline monitors here + # Add new monitors here config RV_REACTORS diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile index 750e4ad6fa0f..94498da35b37 100644 --- a/kernel/trace/rv/Makefile +++ b/kernel/trace/rv/Makefile @@ -17,6 +17,9 @@ obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o +obj-$(CONFIG_RV_MON_STALL) += monitors/stall/stall.o +obj-$(CONFIG_RV_MON_DEADLINE) += monitors/deadline/deadline.o +obj-$(CONFIG_RV_MON_NOMISS) += monitors/nomiss/nomiss.o # Add new monitors here obj-$(CONFIG_RV_REACTORS) += rv_reactors.o obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o diff --git a/kernel/trace/rv/monitors/deadline/Kconfig b/kernel/trace/rv/monitors/deadline/Kconfig new file mode 100644 index 000000000000..38804a6ad91d --- /dev/null +++ b/kernel/trace/rv/monitors/deadline/Kconfig @@ -0,0 +1,10 @@ +config RV_MON_DEADLINE + depends on RV + bool "deadline monitor" + help + Collection of monitors to check the deadline scheduler and server + behave according to specifications. Enable this to enable all + scheduler specification supported by the current kernel. + + For further information, see: + Documentation/trace/rv/monitor_deadline.rst diff --git a/kernel/trace/rv/monitors/deadline/deadline.c b/kernel/trace/rv/monitors/deadline/deadline.c new file mode 100644 index 000000000000..d566d4542ebf --- /dev/null +++ b/kernel/trace/rv/monitors/deadline/deadline.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <linux/kallsyms.h> + +#define MODULE_NAME "deadline" + +#include "deadline.h" + +struct rv_monitor rv_deadline = { + .name = "deadline", + .description = "container for several deadline scheduler specifications.", + .enable = NULL, + .disable = NULL, + .reset = NULL, + .enabled = 0, +}; + +/* Used by other monitors */ +struct sched_class *rv_ext_sched_class; + +static int __init register_deadline(void) +{ + if (IS_ENABLED(CONFIG_SCHED_CLASS_EXT)) { + rv_ext_sched_class = (void *)kallsyms_lookup_name("ext_sched_class"); + if (!rv_ext_sched_class) + pr_warn("rv: Missing ext_sched_class, monitors may not work.\n"); + } + return rv_register_monitor(&rv_deadline, NULL); +} + +static void __exit unregister_deadline(void) +{ + rv_unregister_monitor(&rv_deadline); +} + +module_init(register_deadline); +module_exit(unregister_deadline); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("deadline: container for several deadline scheduler specifications."); diff --git a/kernel/trace/rv/monitors/deadline/deadline.h b/kernel/trace/rv/monitors/deadline/deadline.h new file mode 100644 index 000000000000..0bbfd2543329 --- /dev/null +++ b/kernel/trace/rv/monitors/deadline/deadline.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/kernel.h> +#include <linux/uaccess.h> +#include <linux/sched/deadline.h> +#include <asm/syscall.h> +#include <uapi/linux/sched/types.h> +#include <trace/events/sched.h> + +/* + * Dummy values if not available + */ +#ifndef __NR_sched_setscheduler +#define __NR_sched_setscheduler -__COUNTER__ +#endif +#ifndef __NR_sched_setattr +#define __NR_sched_setattr -__COUNTER__ +#endif + +extern struct rv_monitor rv_deadline; +/* Initialised when registering the deadline container */ +extern struct sched_class *rv_ext_sched_class; + +/* + * If both have dummy values, the syscalls are not supported and we don't even + * need to register the handler. + */ +static inline bool should_skip_syscall_handle(void) +{ + return __NR_sched_setattr < 0 && __NR_sched_setscheduler < 0; +} + +/* + * is_supported_type - return true if @type is supported by the deadline monitors + */ +static inline bool is_supported_type(u8 type) +{ + return type == DL_TASK || type == DL_SERVER_FAIR || type == DL_SERVER_EXT; +} + +/* + * is_server_type - return true if @type is a supported server + */ +static inline bool is_server_type(u8 type) +{ + return is_supported_type(type) && type != DL_TASK; +} + +/* + * Use negative numbers for the server. + * Currently only one fair server per CPU, may change in the future. + */ +#define fair_server_id(cpu) (-cpu) +#define ext_server_id(cpu) (-cpu - num_possible_cpus()) +#define NO_SERVER_ID (-2 * num_possible_cpus()) +/* + * Get a unique id used for dl entities + * + * The cpu is not required for tasks as the pid is used there, if this function + * is called on a dl_se that for sure corresponds to a task, DL_TASK can be + * used in place of cpu. + * We need the cpu for servers as it is provided in the tracepoint and we + * cannot easily retrieve it from the dl_se (requires the struct rq definition). + */ +static inline int get_entity_id(struct sched_dl_entity *dl_se, int cpu, u8 type) +{ + if (dl_server(dl_se) && type != DL_TASK) { + if (type == DL_SERVER_FAIR) + return fair_server_id(cpu); + if (type == DL_SERVER_EXT) + return ext_server_id(cpu); + return NO_SERVER_ID; + } + return dl_task_of(dl_se)->pid; +} + +static inline bool task_is_scx_enabled(struct task_struct *tsk) +{ + return IS_ENABLED(CONFIG_SCHED_CLASS_EXT) && + tsk->sched_class == rv_ext_sched_class; +} + +/* Expand id and target as arguments for da functions */ +#define EXPAND_ID(dl_se, cpu, type) get_entity_id(dl_se, cpu, type), dl_se +#define EXPAND_ID_TASK(tsk) get_entity_id(&tsk->dl, task_cpu(tsk), DL_TASK), &tsk->dl + +static inline u8 get_server_type(struct task_struct *tsk) +{ + if (tsk->policy == SCHED_NORMAL || tsk->policy == SCHED_EXT || + tsk->policy == SCHED_BATCH || tsk->policy == SCHED_IDLE) + return task_is_scx_enabled(tsk) ? DL_SERVER_EXT : DL_SERVER_FAIR; + return DL_OTHER; +} + +static inline int extract_params(struct pt_regs *regs, long id, pid_t *pid_out) +{ + size_t size = offsetofend(struct sched_attr, sched_flags); + struct sched_attr __user *uattr, attr; + int new_policy = -1, ret; + unsigned long args[6]; + + switch (id) { + case __NR_sched_setscheduler: + syscall_get_arguments(current, regs, args); + *pid_out = args[0]; + new_policy = args[1]; + break; + case __NR_sched_setattr: + syscall_get_arguments(current, regs, args); + *pid_out = args[0]; + uattr = (struct sched_attr __user *)args[1]; + /* + * Just copy up to sched_flags, we are not interested after that + */ + ret = copy_struct_from_user(&attr, size, uattr, size); + if (ret) + return ret; + if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY) + return -EINVAL; + new_policy = attr.sched_policy; + break; + default: + return -EINVAL; + } + + return new_policy & ~SCHED_RESET_ON_FORK; +} + +/* Helper functions requiring DA/HA utilities */ +#ifdef RV_MON_TYPE + +/* + * get_fair_server - get the fair server associated to a task + * + * If the task is a boosted task, the server is available in the task_struct, + * otherwise grab the dl entity saved for the CPU where the task is enqueued. + * This function assumes the task is enqueued somewhere. + */ +static inline struct sched_dl_entity *get_server(struct task_struct *tsk, u8 type) +{ + if (tsk->dl_server && get_server_type(tsk) == type) + return tsk->dl_server; + if (type == DL_SERVER_FAIR) + return da_get_target_by_id(fair_server_id(task_cpu(tsk))); + if (type == DL_SERVER_EXT) + return da_get_target_by_id(ext_server_id(task_cpu(tsk))); + return NULL; +} + +/* + * Initialise monitors for all tasks and pre-allocate the storage for servers. + * This is necessary since we don't have access to the servers here and + * allocation can cause deadlocks from their tracepoints. We can only fill + * pre-initialised storage from there. + */ +static inline int init_storage(bool skip_tasks) +{ + struct task_struct *g, *p; + int cpu; + + for_each_possible_cpu(cpu) { + if (!da_create_empty_storage(fair_server_id(cpu))) + goto fail; + if (IS_ENABLED(CONFIG_SCHED_CLASS_EXT) && + !da_create_empty_storage(ext_server_id(cpu))) + goto fail; + } + + if (skip_tasks) + return 0; + + read_lock(&tasklist_lock); + for_each_process_thread(g, p) { + if (p->policy == SCHED_DEADLINE) { + if (!da_create_storage(EXPAND_ID_TASK(p), NULL)) { + read_unlock(&tasklist_lock); + goto fail; + } + } + } + read_unlock(&tasklist_lock); + return 0; + +fail: + da_monitor_destroy(); + return -ENOMEM; +} + +static void __maybe_unused handle_newtask(void *data, struct task_struct *task, u64 flags) +{ + /* Might be superfluous as tasks are not started with this policy.. */ + if (task->policy == SCHED_DEADLINE) + da_create_storage(EXPAND_ID_TASK(task), NULL); +} + +static void __maybe_unused handle_exit(void *data, struct task_struct *p, bool group_dead) +{ + if (p->policy == SCHED_DEADLINE) + da_destroy_storage(get_entity_id(&p->dl, DL_TASK, DL_TASK)); +} + +#endif diff --git a/kernel/trace/rv/monitors/nomiss/Kconfig b/kernel/trace/rv/monitors/nomiss/Kconfig new file mode 100644 index 000000000000..e1886c3a0dd9 --- /dev/null +++ b/kernel/trace/rv/monitors/nomiss/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_NOMISS + depends on RV + depends on HAVE_SYSCALL_TRACEPOINTS + depends on RV_MON_DEADLINE + default y + select HA_MON_EVENTS_ID + bool "nomiss monitor" + help + Monitor to ensure dl entities run to completion before their deadiline. + This monitor is part of the deadline monitors collection. + + For further information, see: + Documentation/trace/rv/monitor_deadline.rst diff --git a/kernel/trace/rv/monitors/nomiss/nomiss.c b/kernel/trace/rv/monitors/nomiss/nomiss.c new file mode 100644 index 000000000000..31f90f3638d8 --- /dev/null +++ b/kernel/trace/rv/monitors/nomiss/nomiss.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> + +#define MODULE_NAME "nomiss" + +#include <uapi/linux/sched/types.h> +#include <trace/events/syscalls.h> +#include <trace/events/sched.h> +#include <trace/events/task.h> +#include <rv_trace.h> + +#define RV_MON_TYPE RV_MON_PER_OBJ +#define HA_TIMER_TYPE HA_TIMER_WHEEL +/* The start condition is on sched_switch, it's dangerous to allocate there */ +#define DA_SKIP_AUTO_ALLOC +typedef struct sched_dl_entity *monitor_target; +#include "nomiss.h" +#include <rv/ha_monitor.h> +#include <monitors/deadline/deadline.h> + +/* + * User configurable deadline threshold. If the total utilisation of deadline + * tasks is larger than 1, they are only guaranteed bounded tardiness. See + * Documentation/scheduler/sched-deadline.rst for more details. + * The minimum tardiness without sched_feat(HRTICK_DL) is 1 tick to accommodate + * for throttle enforced on the next tick. + */ +static u64 deadline_thresh = TICK_NSEC; +module_param(deadline_thresh, ullong, 0644); +#define DEADLINE_NS(ha_mon) (ha_get_target(ha_mon)->dl_deadline + deadline_thresh) + +static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_nomiss env, u64 time_ns) +{ + if (env == clk_nomiss) + return ha_get_clk_ns(ha_mon, env, time_ns); + else if (env == is_constr_dl_nomiss) + return !dl_is_implicit(ha_get_target(ha_mon)); + else if (env == is_defer_nomiss) + return ha_get_target(ha_mon)->dl_defer; + return ENV_INVALID_VALUE; +} + +static void ha_reset_env(struct ha_monitor *ha_mon, enum envs_nomiss env, u64 time_ns) +{ + if (env == clk_nomiss) + ha_reset_clk_ns(ha_mon, env, time_ns); +} + +static inline bool ha_verify_invariants(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + if (curr_state == ready_nomiss) + return ha_check_invariant_ns(ha_mon, clk_nomiss, time_ns); + else if (curr_state == running_nomiss) + return ha_check_invariant_ns(ha_mon, clk_nomiss, time_ns); + return true; +} + +static inline void ha_convert_inv_guard(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + if (curr_state == next_state) + return; + if (curr_state == ready_nomiss) + ha_inv_to_guard(ha_mon, clk_nomiss, DEADLINE_NS(ha_mon), time_ns); + else if (curr_state == running_nomiss) + ha_inv_to_guard(ha_mon, clk_nomiss, DEADLINE_NS(ha_mon), time_ns); +} + +static inline bool ha_verify_guards(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + bool res = true; + + if (curr_state == ready_nomiss && event == dl_replenish_nomiss) + ha_reset_env(ha_mon, clk_nomiss, time_ns); + else if (curr_state == ready_nomiss && event == dl_throttle_nomiss) + res = ha_get_env(ha_mon, is_defer_nomiss, time_ns) == 1ull; + else if (curr_state == idle_nomiss && event == dl_replenish_nomiss) + ha_reset_env(ha_mon, clk_nomiss, time_ns); + else if (curr_state == running_nomiss && event == dl_replenish_nomiss) + ha_reset_env(ha_mon, clk_nomiss, time_ns); + else if (curr_state == sleeping_nomiss && event == dl_replenish_nomiss) + ha_reset_env(ha_mon, clk_nomiss, time_ns); + else if (curr_state == sleeping_nomiss && event == dl_throttle_nomiss) + res = ha_get_env(ha_mon, is_constr_dl_nomiss, time_ns) == 1ull || + ha_get_env(ha_mon, is_defer_nomiss, time_ns) == 1ull; + else if (curr_state == throttled_nomiss && event == dl_replenish_nomiss) + ha_reset_env(ha_mon, clk_nomiss, time_ns); + return res; +} + +static inline void ha_setup_invariants(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + if (next_state == curr_state && event != dl_replenish_nomiss) + return; + if (next_state == ready_nomiss) + ha_start_timer_ns(ha_mon, clk_nomiss, DEADLINE_NS(ha_mon), time_ns); + else if (next_state == running_nomiss) + ha_start_timer_ns(ha_mon, clk_nomiss, DEADLINE_NS(ha_mon), time_ns); + else if (curr_state == ready_nomiss) + ha_cancel_timer(ha_mon); + else if (curr_state == running_nomiss) + ha_cancel_timer(ha_mon); +} + +static bool ha_verify_constraint(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + if (!ha_verify_invariants(ha_mon, curr_state, event, next_state, time_ns)) + return false; + + ha_convert_inv_guard(ha_mon, curr_state, event, next_state, time_ns); + + if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns)) + return false; + + ha_setup_invariants(ha_mon, curr_state, event, next_state, time_ns); + + return true; +} + +static void handle_dl_replenish(void *data, struct sched_dl_entity *dl_se, + int cpu, u8 type) +{ + if (is_supported_type(type)) + da_handle_event(EXPAND_ID(dl_se, cpu, type), dl_replenish_nomiss); +} + +static void handle_dl_throttle(void *data, struct sched_dl_entity *dl_se, + int cpu, u8 type) +{ + if (is_supported_type(type)) + da_handle_event(EXPAND_ID(dl_se, cpu, type), dl_throttle_nomiss); +} + +static void handle_dl_server_stop(void *data, struct sched_dl_entity *dl_se, + int cpu, u8 type) +{ + /* + * This isn't the standard use of da_handle_start_run_event since this + * event cannot only occur from the initial state. + * It is fine to use here because it always brings to a known state and + * the fact we "pretend" the transition starts from the initial state + * has no side effect. + */ + if (is_supported_type(type)) + da_handle_start_run_event(EXPAND_ID(dl_se, cpu, type), dl_server_stop_nomiss); +} + +static inline void handle_server_switch(struct task_struct *next, int cpu, u8 type) +{ + struct sched_dl_entity *dl_se = get_server(next, type); + + if (dl_se && is_idle_task(next)) + da_handle_event(EXPAND_ID(dl_se, cpu, type), dl_server_idle_nomiss); +} + +static void handle_sched_switch(void *data, bool preempt, + struct task_struct *prev, + struct task_struct *next, + unsigned int prev_state) +{ + int cpu = task_cpu(next); + + if (prev_state != TASK_RUNNING && !preempt && prev->policy == SCHED_DEADLINE) + da_handle_event(EXPAND_ID_TASK(prev), sched_switch_suspend_nomiss); + if (next->policy == SCHED_DEADLINE) + da_handle_start_run_event(EXPAND_ID_TASK(next), sched_switch_in_nomiss); + + /* + * The server is available in next only if the next task is boosted, + * otherwise we need to retrieve it. + * Here the server continues in the state running/armed until actually + * stopped, this works since we continue expecting a throttle. + */ + if (next->dl_server) + da_handle_start_event(EXPAND_ID(next->dl_server, cpu, + get_server_type(next)), + sched_switch_in_nomiss); + else { + handle_server_switch(next, cpu, DL_SERVER_FAIR); + if (IS_ENABLED(CONFIG_SCHED_CLASS_EXT)) + handle_server_switch(next, cpu, DL_SERVER_EXT); + } +} + +static void handle_sys_enter(void *data, struct pt_regs *regs, long id) +{ + struct task_struct *p; + int new_policy = -1; + pid_t pid = 0; + + new_policy = extract_params(regs, id, &pid); + if (new_policy < 0) + return; + guard(rcu)(); + p = pid ? find_task_by_vpid(pid) : current; + if (unlikely(!p) || new_policy == p->policy) + return; + + if (p->policy == SCHED_DEADLINE) + da_reset(EXPAND_ID_TASK(p)); + else if (new_policy == SCHED_DEADLINE) + da_create_or_get(EXPAND_ID_TASK(p)); +} + +static void handle_sched_wakeup(void *data, struct task_struct *tsk) +{ + if (tsk->policy == SCHED_DEADLINE) + da_handle_event(EXPAND_ID_TASK(tsk), sched_wakeup_nomiss); +} + +static int enable_nomiss(void) +{ + int retval; + + retval = da_monitor_init(); + if (retval) + return retval; + + retval = init_storage(false); + if (retval) + return retval; + rv_attach_trace_probe("nomiss", sched_dl_replenish_tp, handle_dl_replenish); + rv_attach_trace_probe("nomiss", sched_dl_throttle_tp, handle_dl_throttle); + rv_attach_trace_probe("nomiss", sched_dl_server_stop_tp, handle_dl_server_stop); + rv_attach_trace_probe("nomiss", sched_switch, handle_sched_switch); + rv_attach_trace_probe("nomiss", sched_wakeup, handle_sched_wakeup); + if (!should_skip_syscall_handle()) + rv_attach_trace_probe("nomiss", sys_enter, handle_sys_enter); + rv_attach_trace_probe("nomiss", task_newtask, handle_newtask); + rv_attach_trace_probe("nomiss", sched_process_exit, handle_exit); + + return 0; +} + +static void disable_nomiss(void) +{ + rv_this.enabled = 0; + + /* Those are RCU writers, detach earlier hoping to close a bit faster */ + rv_detach_trace_probe("nomiss", task_newtask, handle_newtask); + rv_detach_trace_probe("nomiss", sched_process_exit, handle_exit); + if (!should_skip_syscall_handle()) + rv_detach_trace_probe("nomiss", sys_enter, handle_sys_enter); + + rv_detach_trace_probe("nomiss", sched_dl_replenish_tp, handle_dl_replenish); + rv_detach_trace_probe("nomiss", sched_dl_throttle_tp, handle_dl_throttle); + rv_detach_trace_probe("nomiss", sched_dl_server_stop_tp, handle_dl_server_stop); + rv_detach_trace_probe("nomiss", sched_switch, handle_sched_switch); + rv_detach_trace_probe("nomiss", sched_wakeup, handle_sched_wakeup); + + da_monitor_destroy(); +} + +static struct rv_monitor rv_this = { + .name = "nomiss", + .description = "dl entities run to completion before their deadline.", + .enable = enable_nomiss, + .disable = disable_nomiss, + .reset = da_monitor_reset_all, + .enabled = 0, +}; + +static int __init register_nomiss(void) +{ + return rv_register_monitor(&rv_this, &rv_deadline); +} + +static void __exit unregister_nomiss(void) +{ + rv_unregister_monitor(&rv_this); +} + +module_init(register_nomiss); +module_exit(unregister_nomiss); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("nomiss: dl entities run to completion before their deadline."); diff --git a/kernel/trace/rv/monitors/nomiss/nomiss.h b/kernel/trace/rv/monitors/nomiss/nomiss.h new file mode 100644 index 000000000000..3d1b436194d7 --- /dev/null +++ b/kernel/trace/rv/monitors/nomiss/nomiss.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Automatically generated C representation of nomiss automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +#define MONITOR_NAME nomiss + +enum states_nomiss { + ready_nomiss, + idle_nomiss, + running_nomiss, + sleeping_nomiss, + throttled_nomiss, + state_max_nomiss, +}; + +#define INVALID_STATE state_max_nomiss + +enum events_nomiss { + dl_replenish_nomiss, + dl_server_idle_nomiss, + dl_server_stop_nomiss, + dl_throttle_nomiss, + sched_switch_in_nomiss, + sched_switch_suspend_nomiss, + sched_wakeup_nomiss, + event_max_nomiss, +}; + +enum envs_nomiss { + clk_nomiss, + is_constr_dl_nomiss, + is_defer_nomiss, + env_max_nomiss, + env_max_stored_nomiss = is_constr_dl_nomiss, +}; + +_Static_assert(env_max_stored_nomiss <= MAX_HA_ENV_LEN, "Not enough slots"); +#define HA_CLK_NS + +struct automaton_nomiss { + char *state_names[state_max_nomiss]; + char *event_names[event_max_nomiss]; + char *env_names[env_max_nomiss]; + unsigned char function[state_max_nomiss][event_max_nomiss]; + unsigned char initial_state; + bool final_states[state_max_nomiss]; +}; + +static const struct automaton_nomiss automaton_nomiss = { + .state_names = { + "ready", + "idle", + "running", + "sleeping", + "throttled", + }, + .event_names = { + "dl_replenish", + "dl_server_idle", + "dl_server_stop", + "dl_throttle", + "sched_switch_in", + "sched_switch_suspend", + "sched_wakeup", + }, + .env_names = { + "clk", + "is_constr_dl", + "is_defer", + }, + .function = { + { + ready_nomiss, + idle_nomiss, + sleeping_nomiss, + throttled_nomiss, + running_nomiss, + INVALID_STATE, + ready_nomiss, + }, + { + ready_nomiss, + idle_nomiss, + sleeping_nomiss, + throttled_nomiss, + running_nomiss, + INVALID_STATE, + INVALID_STATE, + }, + { + running_nomiss, + idle_nomiss, + sleeping_nomiss, + throttled_nomiss, + running_nomiss, + sleeping_nomiss, + running_nomiss, + }, + { + ready_nomiss, + sleeping_nomiss, + sleeping_nomiss, + throttled_nomiss, + running_nomiss, + INVALID_STATE, + ready_nomiss, + }, + { + ready_nomiss, + throttled_nomiss, + INVALID_STATE, + throttled_nomiss, + INVALID_STATE, + throttled_nomiss, + throttled_nomiss, + }, + }, + .initial_state = ready_nomiss, + .final_states = { 1, 0, 0, 0, 0 }, +}; diff --git a/kernel/trace/rv/monitors/nomiss/nomiss_trace.h b/kernel/trace/rv/monitors/nomiss/nomiss_trace.h new file mode 100644 index 000000000000..42e7efaca4e7 --- /dev/null +++ b/kernel/trace/rv/monitors/nomiss/nomiss_trace.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_NOMISS +DEFINE_EVENT(event_da_monitor_id, event_nomiss, + TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state), + TP_ARGS(id, state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor_id, error_nomiss, + TP_PROTO(int id, char *state, char *event), + TP_ARGS(id, state, event)); + +DEFINE_EVENT(error_env_da_monitor_id, error_env_nomiss, + TP_PROTO(int id, char *state, char *event, char *env), + TP_ARGS(id, state, event, env)); +#endif /* CONFIG_RV_MON_NOMISS */ diff --git a/kernel/trace/rv/monitors/opid/Kconfig b/kernel/trace/rv/monitors/opid/Kconfig index 561d32da572b..6d02e239b684 100644 --- a/kernel/trace/rv/monitors/opid/Kconfig +++ b/kernel/trace/rv/monitors/opid/Kconfig @@ -2,18 +2,13 @@ # config RV_MON_OPID depends on RV - depends on TRACE_IRQFLAGS - depends on TRACE_PREEMPT_TOGGLE depends on RV_MON_SCHED - default y if PREEMPT_RT - select DA_MON_EVENTS_IMPLICIT + default y + select HA_MON_EVENTS_IMPLICIT bool "opid monitor" help Monitor to ensure operations like wakeup and need resched occur with - interrupts and preemption disabled or during IRQs, where preemption - may not be disabled explicitly. - - This monitor is unstable on !PREEMPT_RT, say N unless you are testing it. + interrupts and preemption disabled. For further information, see: Documentation/trace/rv/monitor_sched.rst diff --git a/kernel/trace/rv/monitors/opid/opid.c b/kernel/trace/rv/monitors/opid/opid.c index 25a40e90fa40..4594c7c46601 100644 --- a/kernel/trace/rv/monitors/opid/opid.c +++ b/kernel/trace/rv/monitors/opid/opid.c @@ -10,94 +10,63 @@ #define MODULE_NAME "opid" #include <trace/events/sched.h> -#include <trace/events/irq.h> -#include <trace/events/preemptirq.h> #include <rv_trace.h> #include <monitors/sched/sched.h> #define RV_MON_TYPE RV_MON_PER_CPU #include "opid.h" -#include <rv/da_monitor.h> +#include <rv/ha_monitor.h> -#ifdef CONFIG_X86_LOCAL_APIC -#include <asm/trace/irq_vectors.h> - -static void handle_vector_irq_entry(void *data, int vector) +static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_opid env, u64 time_ns) { - da_handle_event(irq_entry_opid); -} - -static void attach_vector_irq(void) -{ - rv_attach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry); - if (IS_ENABLED(CONFIG_IRQ_WORK)) - rv_attach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry); - if (IS_ENABLED(CONFIG_SMP)) { - rv_attach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry); - rv_attach_trace_probe("opid", call_function_entry, handle_vector_irq_entry); - rv_attach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry); + if (env == irq_off_opid) + return irqs_disabled(); + else if (env == preempt_off_opid) { + /* + * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables + * preemption (adding one to the preempt_count). Since we are + * interested in the preempt_count at the time the tracepoint was + * hit, we consider 1 as still enabled. + */ + if (IS_ENABLED(CONFIG_PREEMPTION)) + return (preempt_count() & PREEMPT_MASK) > 1; + return true; } + return ENV_INVALID_VALUE; } -static void detach_vector_irq(void) +static inline bool ha_verify_guards(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) { - rv_detach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry); - if (IS_ENABLED(CONFIG_IRQ_WORK)) - rv_detach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry); - if (IS_ENABLED(CONFIG_SMP)) { - rv_detach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry); - rv_detach_trace_probe("opid", call_function_entry, handle_vector_irq_entry); - rv_detach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry); - } + bool res = true; + + if (curr_state == any_opid && event == sched_need_resched_opid) + res = ha_get_env(ha_mon, irq_off_opid, time_ns) == 1ull; + else if (curr_state == any_opid && event == sched_waking_opid) + res = ha_get_env(ha_mon, irq_off_opid, time_ns) == 1ull && + ha_get_env(ha_mon, preempt_off_opid, time_ns) == 1ull; + return res; } -#else -/* We assume irq_entry tracepoints are sufficient on other architectures */ -static void attach_vector_irq(void) { } -static void detach_vector_irq(void) { } -#endif - -static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip) +static bool ha_verify_constraint(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) { - da_handle_event(irq_disable_opid); -} + if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns)) + return false; -static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip) -{ - da_handle_event(irq_enable_opid); -} - -static void handle_irq_entry(void *data, int irq, struct irqaction *action) -{ - da_handle_event(irq_entry_opid); -} - -static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) -{ - da_handle_event(preempt_disable_opid); -} - -static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) -{ - da_handle_event(preempt_enable_opid); + return true; } static void handle_sched_need_resched(void *data, struct task_struct *tsk, int cpu, int tif) { - /* The monitor's intitial state is not in_irq */ - if (this_cpu_read(hardirq_context)) - da_handle_event(sched_need_resched_opid); - else - da_handle_start_event(sched_need_resched_opid); + da_handle_start_run_event(sched_need_resched_opid); } static void handle_sched_waking(void *data, struct task_struct *p) { - /* The monitor's intitial state is not in_irq */ - if (this_cpu_read(hardirq_context)) - da_handle_event(sched_waking_opid); - else - da_handle_start_event(sched_waking_opid); + da_handle_start_run_event(sched_waking_opid); } static int enable_opid(void) @@ -108,14 +77,8 @@ static int enable_opid(void) if (retval) return retval; - rv_attach_trace_probe("opid", irq_disable, handle_irq_disable); - rv_attach_trace_probe("opid", irq_enable, handle_irq_enable); - rv_attach_trace_probe("opid", irq_handler_entry, handle_irq_entry); - rv_attach_trace_probe("opid", preempt_disable, handle_preempt_disable); - rv_attach_trace_probe("opid", preempt_enable, handle_preempt_enable); rv_attach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched); rv_attach_trace_probe("opid", sched_waking, handle_sched_waking); - attach_vector_irq(); return 0; } @@ -124,14 +87,8 @@ static void disable_opid(void) { rv_this.enabled = 0; - rv_detach_trace_probe("opid", irq_disable, handle_irq_disable); - rv_detach_trace_probe("opid", irq_enable, handle_irq_enable); - rv_detach_trace_probe("opid", irq_handler_entry, handle_irq_entry); - rv_detach_trace_probe("opid", preempt_disable, handle_preempt_disable); - rv_detach_trace_probe("opid", preempt_enable, handle_preempt_enable); rv_detach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched); rv_detach_trace_probe("opid", sched_waking, handle_sched_waking); - detach_vector_irq(); da_monitor_destroy(); } diff --git a/kernel/trace/rv/monitors/opid/opid.h b/kernel/trace/rv/monitors/opid/opid.h index 092992514970..fb0aa4c28aa6 100644 --- a/kernel/trace/rv/monitors/opid/opid.h +++ b/kernel/trace/rv/monitors/opid/opid.h @@ -8,30 +8,31 @@ #define MONITOR_NAME opid enum states_opid { - disabled_opid, - enabled_opid, - in_irq_opid, - irq_disabled_opid, - preempt_disabled_opid, + any_opid, state_max_opid, }; #define INVALID_STATE state_max_opid enum events_opid { - irq_disable_opid, - irq_enable_opid, - irq_entry_opid, - preempt_disable_opid, - preempt_enable_opid, sched_need_resched_opid, sched_waking_opid, event_max_opid, }; +enum envs_opid { + irq_off_opid, + preempt_off_opid, + env_max_opid, + env_max_stored_opid = irq_off_opid, +}; + +_Static_assert(env_max_stored_opid <= MAX_HA_ENV_LEN, "Not enough slots"); + struct automaton_opid { char *state_names[state_max_opid]; char *event_names[event_max_opid]; + char *env_names[env_max_opid]; unsigned char function[state_max_opid][event_max_opid]; unsigned char initial_state; bool final_states[state_max_opid]; @@ -39,68 +40,19 @@ struct automaton_opid { static const struct automaton_opid automaton_opid = { .state_names = { - "disabled", - "enabled", - "in_irq", - "irq_disabled", - "preempt_disabled", + "any", }, .event_names = { - "irq_disable", - "irq_enable", - "irq_entry", - "preempt_disable", - "preempt_enable", "sched_need_resched", "sched_waking", }, + .env_names = { + "irq_off", + "preempt_off", + }, .function = { - { - INVALID_STATE, - preempt_disabled_opid, - disabled_opid, - INVALID_STATE, - irq_disabled_opid, - disabled_opid, - disabled_opid, - }, - { - irq_disabled_opid, - INVALID_STATE, - INVALID_STATE, - preempt_disabled_opid, - enabled_opid, - INVALID_STATE, - INVALID_STATE, - }, - { - INVALID_STATE, - enabled_opid, - in_irq_opid, - INVALID_STATE, - INVALID_STATE, - in_irq_opid, - in_irq_opid, - }, - { - INVALID_STATE, - enabled_opid, - in_irq_opid, - disabled_opid, - INVALID_STATE, - irq_disabled_opid, - INVALID_STATE, - }, - { - disabled_opid, - INVALID_STATE, - INVALID_STATE, - INVALID_STATE, - enabled_opid, - INVALID_STATE, - INVALID_STATE, - }, + { any_opid, any_opid }, }, - .initial_state = disabled_opid, - .final_states = { 0, 1, 0, 0, 0 }, + .initial_state = any_opid, + .final_states = { 1 }, }; diff --git a/kernel/trace/rv/monitors/opid/opid_trace.h b/kernel/trace/rv/monitors/opid/opid_trace.h index 3df6ff955c30..b04005b64208 100644 --- a/kernel/trace/rv/monitors/opid/opid_trace.h +++ b/kernel/trace/rv/monitors/opid/opid_trace.h @@ -12,4 +12,8 @@ DEFINE_EVENT(event_da_monitor, event_opid, DEFINE_EVENT(error_da_monitor, error_opid, TP_PROTO(char *state, char *event), TP_ARGS(state, event)); + +DEFINE_EVENT(error_env_da_monitor, error_env_opid, + TP_PROTO(char *state, char *event, char *env), + TP_ARGS(state, event, env)); #endif /* CONFIG_RV_MON_OPID */ diff --git a/kernel/trace/rv/monitors/sleep/sleep.c b/kernel/trace/rv/monitors/sleep/sleep.c index c1347da69e9d..8dfe5ec13e19 100644 --- a/kernel/trace/rv/monitors/sleep/sleep.c +++ b/kernel/trace/rv/monitors/sleep/sleep.c @@ -49,6 +49,7 @@ static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bo ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false); ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false); ltl_atom_set(mon, LTL_FUTEX_WAIT, false); + ltl_atom_set(mon, LTL_EPOLL_WAIT, false); ltl_atom_set(mon, LTL_FUTEX_LOCK_PI, false); ltl_atom_set(mon, LTL_BLOCK_ON_RT_MUTEX, false); } @@ -63,6 +64,7 @@ static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bo ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_TAI, false); ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false); ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false); + ltl_atom_set(mon, LTL_EPOLL_WAIT, false); if (strstarts(task->comm, "migration/")) ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, true); @@ -162,6 +164,11 @@ static void handle_sys_enter(void *data, struct pt_regs *regs, long id) break; } break; +#ifdef __NR_epoll_wait + case __NR_epoll_wait: + ltl_atom_update(current, LTL_EPOLL_WAIT, true); + break; +#endif } } @@ -174,6 +181,7 @@ static void handle_sys_exit(void *data, struct pt_regs *regs, long ret) ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, false); ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_TAI, false); ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false); + ltl_atom_set(mon, LTL_EPOLL_WAIT, false); ltl_atom_update(current, LTL_CLOCK_NANOSLEEP, false); } diff --git a/kernel/trace/rv/monitors/sleep/sleep.h b/kernel/trace/rv/monitors/sleep/sleep.h index 2ab46fd218d2..95dc2727c059 100644 --- a/kernel/trace/rv/monitors/sleep/sleep.h +++ b/kernel/trace/rv/monitors/sleep/sleep.h @@ -15,6 +15,7 @@ enum ltl_atom { LTL_ABORT_SLEEP, LTL_BLOCK_ON_RT_MUTEX, LTL_CLOCK_NANOSLEEP, + LTL_EPOLL_WAIT, LTL_FUTEX_LOCK_PI, LTL_FUTEX_WAIT, LTL_KERNEL_THREAD, @@ -40,6 +41,7 @@ static const char *ltl_atom_str(enum ltl_atom atom) "ab_sl", "bl_on_rt_mu", "cl_na", + "ep_wa", "fu_lo_pi", "fu_wa", "ker_th", @@ -75,39 +77,41 @@ static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES); static void ltl_start(struct task_struct *task, struct ltl_monitor *mon) { - bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms); - bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms); - bool val40 = task_is_rcu || task_is_migration; - bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms); - bool val41 = futex_lock_pi || val40; - bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms); - bool val5 = block_on_rt_mutex || val41; - bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon->atoms); - bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms); - bool val32 = abort_sleep || kthread_should_stop; bool woken_by_nmi = test_bit(LTL_WOKEN_BY_NMI, mon->atoms); - bool val33 = woken_by_nmi || val32; bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms); - bool val34 = woken_by_hardirq || val33; bool woken_by_equal_or_higher_prio = test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO, mon->atoms); - bool val14 = woken_by_equal_or_higher_prio || val34; bool wake = test_bit(LTL_WAKE, mon->atoms); - bool val13 = !wake; - bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms); + bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms); + bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms); + bool sleep = test_bit(LTL_SLEEP, mon->atoms); + bool rt = test_bit(LTL_RT, mon->atoms); + bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME, mon->atoms); bool nanosleep_clock_tai = test_bit(LTL_NANOSLEEP_CLOCK_TAI, mon->atoms); bool nanosleep_clock_monotonic = test_bit(LTL_NANOSLEEP_CLOCK_MONOTONIC, mon->atoms); - bool val24 = nanosleep_clock_monotonic || nanosleep_clock_tai; - bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME, mon->atoms); - bool val25 = nanosleep_timer_abstime && val24; - bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms); - bool val18 = clock_nanosleep && val25; + bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon->atoms); + bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms); bool futex_wait = test_bit(LTL_FUTEX_WAIT, mon->atoms); - bool val9 = futex_wait || val18; + bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms); + bool epoll_wait = test_bit(LTL_EPOLL_WAIT, mon->atoms); + bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms); + bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms); + bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms); + bool val42 = task_is_rcu || task_is_migration; + bool val43 = futex_lock_pi || val42; + bool val5 = block_on_rt_mutex || val43; + bool val34 = abort_sleep || kthread_should_stop; + bool val35 = woken_by_nmi || val34; + bool val36 = woken_by_hardirq || val35; + bool val14 = woken_by_equal_or_higher_prio || val36; + bool val13 = !wake; + bool val26 = nanosleep_clock_monotonic || nanosleep_clock_tai; + bool val27 = nanosleep_timer_abstime && val26; + bool val18 = clock_nanosleep && val27; + bool val20 = val18 || epoll_wait; + bool val9 = futex_wait || val20; bool val11 = val9 || kernel_thread; - bool sleep = test_bit(LTL_SLEEP, mon->atoms); bool val2 = !sleep; - bool rt = test_bit(LTL_RT, mon->atoms); bool val1 = !rt; bool val3 = val1 || val2; @@ -124,39 +128,41 @@ static void ltl_start(struct task_struct *task, struct ltl_monitor *mon) static void ltl_possible_next_states(struct ltl_monitor *mon, unsigned int state, unsigned long *next) { - bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms); - bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms); - bool val40 = task_is_rcu || task_is_migration; - bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms); - bool val41 = futex_lock_pi || val40; - bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms); - bool val5 = block_on_rt_mutex || val41; - bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon->atoms); - bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms); - bool val32 = abort_sleep || kthread_should_stop; bool woken_by_nmi = test_bit(LTL_WOKEN_BY_NMI, mon->atoms); - bool val33 = woken_by_nmi || val32; bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms); - bool val34 = woken_by_hardirq || val33; bool woken_by_equal_or_higher_prio = test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO, mon->atoms); - bool val14 = woken_by_equal_or_higher_prio || val34; bool wake = test_bit(LTL_WAKE, mon->atoms); - bool val13 = !wake; - bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms); + bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms); + bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms); + bool sleep = test_bit(LTL_SLEEP, mon->atoms); + bool rt = test_bit(LTL_RT, mon->atoms); + bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME, mon->atoms); bool nanosleep_clock_tai = test_bit(LTL_NANOSLEEP_CLOCK_TAI, mon->atoms); bool nanosleep_clock_monotonic = test_bit(LTL_NANOSLEEP_CLOCK_MONOTONIC, mon->atoms); - bool val24 = nanosleep_clock_monotonic || nanosleep_clock_tai; - bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME, mon->atoms); - bool val25 = nanosleep_timer_abstime && val24; - bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms); - bool val18 = clock_nanosleep && val25; + bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon->atoms); + bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms); bool futex_wait = test_bit(LTL_FUTEX_WAIT, mon->atoms); - bool val9 = futex_wait || val18; + bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms); + bool epoll_wait = test_bit(LTL_EPOLL_WAIT, mon->atoms); + bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms); + bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms); + bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms); + bool val42 = task_is_rcu || task_is_migration; + bool val43 = futex_lock_pi || val42; + bool val5 = block_on_rt_mutex || val43; + bool val34 = abort_sleep || kthread_should_stop; + bool val35 = woken_by_nmi || val34; + bool val36 = woken_by_hardirq || val35; + bool val14 = woken_by_equal_or_higher_prio || val36; + bool val13 = !wake; + bool val26 = nanosleep_clock_monotonic || nanosleep_clock_tai; + bool val27 = nanosleep_timer_abstime && val26; + bool val18 = clock_nanosleep && val27; + bool val20 = val18 || epoll_wait; + bool val9 = futex_wait || val20; bool val11 = val9 || kernel_thread; - bool sleep = test_bit(LTL_SLEEP, mon->atoms); bool val2 = !sleep; - bool rt = test_bit(LTL_RT, mon->atoms); bool val1 = !rt; bool val3 = val1 || val2; diff --git a/kernel/trace/rv/monitors/stall/Kconfig b/kernel/trace/rv/monitors/stall/Kconfig new file mode 100644 index 000000000000..6f846b642544 --- /dev/null +++ b/kernel/trace/rv/monitors/stall/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +config RV_MON_STALL + depends on RV + select HA_MON_EVENTS_ID + bool "stall monitor" + help + Enable the stall sample monitor that illustrates the usage of hybrid + automata monitors. It can be used to identify tasks stalled for + longer than a threshold. + + For further information, see: + Documentation/trace/rv/monitor_stall.rst diff --git a/kernel/trace/rv/monitors/stall/stall.c b/kernel/trace/rv/monitors/stall/stall.c new file mode 100644 index 000000000000..9ccfda6b0e73 --- /dev/null +++ b/kernel/trace/rv/monitors/stall/stall.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> + +#define MODULE_NAME "stall" + +#include <trace/events/sched.h> +#include <rv_trace.h> + +#define RV_MON_TYPE RV_MON_PER_TASK +#define HA_TIMER_TYPE HA_TIMER_WHEEL +#include "stall.h" +#include <rv/ha_monitor.h> + +static u64 threshold_jiffies = 1000; +module_param(threshold_jiffies, ullong, 0644); + +static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_stall env, u64 time_ns) +{ + if (env == clk_stall) + return ha_get_clk_jiffy(ha_mon, env); + return ENV_INVALID_VALUE; +} + +static void ha_reset_env(struct ha_monitor *ha_mon, enum envs_stall env, u64 time_ns) +{ + if (env == clk_stall) + ha_reset_clk_jiffy(ha_mon, env); +} + +static inline bool ha_verify_invariants(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + if (curr_state == enqueued_stall) + return ha_check_invariant_jiffy(ha_mon, clk_stall, time_ns); + return true; +} + +static inline bool ha_verify_guards(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + bool res = true; + + if (curr_state == dequeued_stall && event == sched_wakeup_stall) + ha_reset_env(ha_mon, clk_stall, time_ns); + else if (curr_state == running_stall && event == sched_switch_preempt_stall) + ha_reset_env(ha_mon, clk_stall, time_ns); + return res; +} + +static inline void ha_setup_invariants(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + if (next_state == curr_state) + return; + if (next_state == enqueued_stall) + ha_start_timer_jiffy(ha_mon, clk_stall, threshold_jiffies, time_ns); + else if (curr_state == enqueued_stall) + ha_cancel_timer(ha_mon); +} + +static bool ha_verify_constraint(struct ha_monitor *ha_mon, + enum states curr_state, enum events event, + enum states next_state, u64 time_ns) +{ + if (!ha_verify_invariants(ha_mon, curr_state, event, next_state, time_ns)) + return false; + + if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns)) + return false; + + ha_setup_invariants(ha_mon, curr_state, event, next_state, time_ns); + + return true; +} + +static void handle_sched_switch(void *data, bool preempt, + struct task_struct *prev, + struct task_struct *next, + unsigned int prev_state) +{ + if (!preempt && prev_state != TASK_RUNNING) + da_handle_start_event(prev, sched_switch_wait_stall); + else + da_handle_event(prev, sched_switch_preempt_stall); + da_handle_event(next, sched_switch_in_stall); +} + +static void handle_sched_wakeup(void *data, struct task_struct *p) +{ + da_handle_event(p, sched_wakeup_stall); +} + +static int enable_stall(void) +{ + int retval; + + retval = da_monitor_init(); + if (retval) + return retval; + + rv_attach_trace_probe("stall", sched_switch, handle_sched_switch); + rv_attach_trace_probe("stall", sched_wakeup, handle_sched_wakeup); + + return 0; +} + +static void disable_stall(void) +{ + rv_this.enabled = 0; + + rv_detach_trace_probe("stall", sched_switch, handle_sched_switch); + rv_detach_trace_probe("stall", sched_wakeup, handle_sched_wakeup); + + da_monitor_destroy(); +} + +static struct rv_monitor rv_this = { + .name = "stall", + .description = "identify tasks stalled for longer than a threshold.", + .enable = enable_stall, + .disable = disable_stall, + .reset = da_monitor_reset_all, + .enabled = 0, +}; + +static int __init register_stall(void) +{ + return rv_register_monitor(&rv_this, NULL); +} + +static void __exit unregister_stall(void) +{ + rv_unregister_monitor(&rv_this); +} + +module_init(register_stall); +module_exit(unregister_stall); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); +MODULE_DESCRIPTION("stall: identify tasks stalled for longer than a threshold."); diff --git a/kernel/trace/rv/monitors/stall/stall.h b/kernel/trace/rv/monitors/stall/stall.h new file mode 100644 index 000000000000..638520cb1082 --- /dev/null +++ b/kernel/trace/rv/monitors/stall/stall.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Automatically generated C representation of stall automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +#define MONITOR_NAME stall + +enum states_stall { + dequeued_stall, + enqueued_stall, + running_stall, + state_max_stall, +}; + +#define INVALID_STATE state_max_stall + +enum events_stall { + sched_switch_in_stall, + sched_switch_preempt_stall, + sched_switch_wait_stall, + sched_wakeup_stall, + event_max_stall, +}; + +enum envs_stall { + clk_stall, + env_max_stall, + env_max_stored_stall = env_max_stall, +}; + +_Static_assert(env_max_stored_stall <= MAX_HA_ENV_LEN, "Not enough slots"); + +struct automaton_stall { + char *state_names[state_max_stall]; + char *event_names[event_max_stall]; + char *env_names[env_max_stall]; + unsigned char function[state_max_stall][event_max_stall]; + unsigned char initial_state; + bool final_states[state_max_stall]; +}; + +static const struct automaton_stall automaton_stall = { + .state_names = { + "dequeued", + "enqueued", + "running", + }, + .event_names = { + "sched_switch_in", + "sched_switch_preempt", + "sched_switch_wait", + "sched_wakeup", + }, + .env_names = { + "clk", + }, + .function = { + { + INVALID_STATE, + INVALID_STATE, + INVALID_STATE, + enqueued_stall, + }, + { + running_stall, + INVALID_STATE, + INVALID_STATE, + enqueued_stall, + }, + { + running_stall, + enqueued_stall, + dequeued_stall, + running_stall, + }, + }, + .initial_state = dequeued_stall, + .final_states = { 1, 0, 0 }, +}; diff --git a/kernel/trace/rv/monitors/stall/stall_trace.h b/kernel/trace/rv/monitors/stall/stall_trace.h new file mode 100644 index 000000000000..6a7cc1b1d040 --- /dev/null +++ b/kernel/trace/rv/monitors/stall/stall_trace.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_STALL +DEFINE_EVENT(event_da_monitor_id, event_stall, + TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state), + TP_ARGS(id, state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor_id, error_stall, + TP_PROTO(int id, char *state, char *event), + TP_ARGS(id, state, event)); + +DEFINE_EVENT(error_env_da_monitor_id, error_env_stall, + TP_PROTO(int id, char *state, char *event, char *env), + TP_ARGS(id, state, event, env)); +#endif /* CONFIG_RV_MON_STALL */ diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h index 4a6faddac614..9622c269789c 100644 --- a/kernel/trace/rv/rv_trace.h +++ b/kernel/trace/rv/rv_trace.h @@ -62,9 +62,39 @@ DECLARE_EVENT_CLASS(error_da_monitor, #include <monitors/scpd/scpd_trace.h> #include <monitors/snep/snep_trace.h> #include <monitors/sts/sts_trace.h> -#include <monitors/opid/opid_trace.h> // Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here +#ifdef CONFIG_HA_MON_EVENTS_IMPLICIT +/* For simplicity this class is marked as DA although relevant only for HA */ +DECLARE_EVENT_CLASS(error_env_da_monitor, + + TP_PROTO(char *state, char *event, char *env), + + TP_ARGS(state, event, env), + + TP_STRUCT__entry( + __string( state, state ) + __string( event, event ) + __string( env, env ) + ), + + TP_fast_assign( + __assign_str(state); + __assign_str(event); + __assign_str(env); + ), + + TP_printk("event %s not expected in the state %s with env %s", + __get_str(event), + __get_str(state), + __get_str(env)) +); + +#include <monitors/opid/opid_trace.h> +// Add new monitors based on CONFIG_HA_MON_EVENTS_IMPLICIT here + +#endif + #endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */ #ifdef CONFIG_DA_MON_EVENTS_ID @@ -128,6 +158,41 @@ DECLARE_EVENT_CLASS(error_da_monitor_id, #include <monitors/sssw/sssw_trace.h> // Add new monitors based on CONFIG_DA_MON_EVENTS_ID here +#ifdef CONFIG_HA_MON_EVENTS_ID +/* For simplicity this class is marked as DA although relevant only for HA */ +DECLARE_EVENT_CLASS(error_env_da_monitor_id, + + TP_PROTO(int id, char *state, char *event, char *env), + + TP_ARGS(id, state, event, env), + + TP_STRUCT__entry( + __field( int, id ) + __string( state, state ) + __string( event, event ) + __string( env, env ) + ), + + TP_fast_assign( + __assign_str(state); + __assign_str(event); + __assign_str(env); + __entry->id = id; + ), + + TP_printk("%d: event %s not expected in the state %s with env %s", + __entry->id, + __get_str(event), + __get_str(state), + __get_str(env)) +); + +#include <monitors/stall/stall_trace.h> +#include <monitors/nomiss/nomiss_trace.h> +// Add new monitors based on CONFIG_HA_MON_EVENTS_ID here + +#endif + #endif /* CONFIG_DA_MON_EVENTS_ID */ #ifdef CONFIG_LTL_MON_EVENTS_ID DECLARE_EVENT_CLASS(event_ltl_monitor_id, |
