summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/cacheinfo.h1
-rw-r--r--include/linux/mm_types.h32
-rw-r--r--include/linux/sched.h102
-rw-r--r--include/linux/sched/clock.h5
-rw-r--r--include/linux/sched/smt.h4
-rw-r--r--include/linux/sched/topology.h32
-rw-r--r--include/linux/topology.h15
7 files changed, 134 insertions, 57 deletions
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index c8f4f0a0b874..fc879ac4cc4f 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -89,6 +89,7 @@ int populate_cache_leaves(unsigned int cpu);
int cache_setup_acpi(unsigned int cpu);
bool last_level_cache_is_valid(unsigned int cpu);
bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y);
+struct cacheinfo *get_cpu_cacheinfo_llc(unsigned int cpu);
int fetch_cache_info(unsigned int cpu);
int detect_cache_attributes(unsigned int cpu);
#ifndef CONFIG_ACPI_PPTT
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2fc552b3924a..5cadb00d9352 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1223,6 +1223,8 @@ struct mm_struct {
/* MM CID related storage */
struct mm_mm_cid mm_cid;
+ /* sched_cache related statistics */
+ struct sched_cache_stat sc_stat;
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* size of all page tables */
#endif
@@ -1619,6 +1621,36 @@ static inline unsigned int mm_cid_size(void)
# define MM_CID_STATIC_SIZE 0
#endif /* CONFIG_SCHED_MM_CID */
+#ifdef CONFIG_SCHED_CACHE
+void mm_init_sched(struct mm_struct *mm,
+ struct sched_cache_time __percpu *pcpu_sched);
+
+static inline int mm_alloc_sched_noprof(struct mm_struct *mm)
+{
+ struct sched_cache_time __percpu *pcpu_sched =
+ alloc_percpu_noprof(struct sched_cache_time);
+
+ if (!pcpu_sched)
+ return -ENOMEM;
+
+ mm_init_sched(mm, pcpu_sched);
+ return 0;
+}
+
+#define mm_alloc_sched(...) alloc_hooks(mm_alloc_sched_noprof(__VA_ARGS__))
+
+static inline void mm_destroy_sched(struct mm_struct *mm)
+{
+ free_percpu(mm->sc_stat.pcpu_sched);
+ mm->sc_stat.pcpu_sched = NULL;
+}
+#else /* !CONFIG_SCHED_CACHE */
+
+static inline int mm_alloc_sched(struct mm_struct *mm) { return 0; }
+static inline void mm_destroy_sched(struct mm_struct *mm) { }
+
+#endif /* CONFIG_SCHED_CACHE */
+
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd9488751f51..b3204a15d512 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -161,7 +161,7 @@ struct user_event_mm;
*/
#define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \
- TASK_DEAD | TASK_FROZEN))
+ TASK_DEAD | TASK_WAKING | TASK_FROZEN))
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
# define debug_normal_state_change(state_value) \
@@ -702,6 +702,11 @@ struct sched_dl_entity {
* running, skipping the defer phase.
*
* @dl_defer_idle tracks idle state
+ *
+ * @dl_bw_attached tells if this server's bandwidth currently
+ * contributes to the root domain's total_bw. Only meaningful for server
+ * entities (@dl_server == 1). Allows toggling the reservation on/off
+ * without losing the configured @dl_runtime/@dl_period.
*/
unsigned int dl_throttled : 1;
unsigned int dl_yielded : 1;
@@ -713,6 +718,7 @@ struct sched_dl_entity {
unsigned int dl_defer_armed : 1;
unsigned int dl_defer_running : 1;
unsigned int dl_defer_idle : 1;
+ unsigned int dl_bw_attached : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
@@ -846,7 +852,11 @@ struct task_struct {
struct alloc_tag *alloc_tag;
#endif
- int on_cpu;
+ u8 on_cpu;
+ u8 on_rq;
+ u8 is_blocked;
+ u8 __pad;
+
struct __call_single_node wake_entry;
unsigned int wakee_flips;
unsigned long wakee_flip_decay_ts;
@@ -861,7 +871,6 @@ struct task_struct {
*/
int recent_used_cpu;
int wake_cpu;
- int on_rq;
int prio;
int static_prio;
@@ -1243,6 +1252,13 @@ struct task_struct {
struct mutex *blocked_on; /* lock we're blocked on */
raw_spinlock_t blocked_lock;
+ /*
+ * The task that is boosting this task; a back link for the current
+ * donor stack. Set in schedule() -> find_proxy_task() and only stable
+ * under preempt_disable().
+ */
+ struct task_struct *blocked_donor;
+
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
/*
* Encoded lock address causing task block (lower 2 bits = type from
@@ -1403,6 +1419,13 @@ struct task_struct {
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_SCHED_CACHE
+ struct callback_head cache_work;
+ int preferred_llc;
+ /* 1: task was enqueued to its preferred LLC, 0 otherwise */
+ int pref_llc_queued;
+#endif
+
struct rseq_data rseq;
struct sched_mm_cid mm_cid;
@@ -2177,19 +2200,10 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock);
#ifndef CONFIG_PREEMPT_RT
-/*
- * With proxy exec, if a task has been proxy-migrated, it may be a donor
- * on a cpu that it can't actually run on. Thus we need a special state
- * to denote that the task is being woken, but that it needs to be
- * evaluated for return-migration before it is run. So if the task is
- * blocked_on PROXY_WAKING, return migrate it before running it.
- */
-#define PROXY_WAKING ((struct mutex *)(-1L))
-
static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
{
lockdep_assert_held_once(&p->blocked_lock);
- return p->blocked_on == PROXY_WAKING ? NULL : p->blocked_on;
+ return p->blocked_on;
}
static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
@@ -2217,7 +2231,7 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *
* blocked_on relationships, but make sure we are not
* clearing the relationship with a different lock.
*/
- WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m && p->blocked_on != PROXY_WAKING);
+ WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
p->blocked_on = NULL;
}
@@ -2226,35 +2240,6 @@ static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
guard(raw_spinlock_irqsave)(&p->blocked_lock);
__clear_task_blocked_on(p, m);
}
-
-static inline void __set_task_blocked_on_waking(struct task_struct *p, struct mutex *m)
-{
- /* Currently we serialize blocked_on under the task::blocked_lock */
- lockdep_assert_held_once(&p->blocked_lock);
-
- if (!sched_proxy_exec()) {
- __clear_task_blocked_on(p, m);
- return;
- }
-
- /* Don't set PROXY_WAKING if blocked_on was already cleared */
- if (!p->blocked_on)
- return;
- /*
- * There may be cases where we set PROXY_WAKING on tasks that were
- * already set to waking, but make sure we are not changing
- * the relationship with a different lock.
- */
- WARN_ON_ONCE(m && p->blocked_on != m && p->blocked_on != PROXY_WAKING);
- p->blocked_on = PROXY_WAKING;
-}
-
-static inline void set_task_blocked_on_waking(struct task_struct *p, struct mutex *m)
-{
- guard(raw_spinlock_irqsave)(&p->blocked_lock);
- __set_task_blocked_on_waking(p, m);
-}
-
#else
static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m)
{
@@ -2263,14 +2248,6 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mute
static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m)
{
}
-
-static inline void __set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m)
-{
-}
-
-static inline void set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m)
-{
-}
#endif /* !CONFIG_PREEMPT_RT */
static __always_inline bool need_resched(void)
@@ -2403,6 +2380,29 @@ static __always_inline int task_mm_cid(struct task_struct *t)
}
#endif
+#ifdef CONFIG_SCHED_CACHE
+
+struct sched_cache_time {
+ u64 runtime;
+ unsigned long epoch;
+};
+
+struct sched_cache_stat {
+ struct sched_cache_time __percpu *pcpu_sched;
+ raw_spinlock_t lock;
+ unsigned long epoch;
+ u64 nr_running_avg;
+ unsigned long next_scan;
+ unsigned long footprint;
+ int cpu;
+} ____cacheline_aligned_in_smp;
+
+#else
+
+struct sched_cache_stat { };
+
+#endif
+
#ifndef MODULE
#ifndef COMPILE_OFFSETS
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 196f0ca351a2..39f0a7f94bfc 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -33,6 +33,11 @@ extern u64 sched_clock_cpu(int cpu);
extern void sched_clock_init(void);
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline int sched_clock_stable(void)
+{
+ return 1;
+}
+
static inline void sched_clock_tick(void)
{
}
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index 166b19af956f..cde6679c0278 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -4,16 +4,12 @@
#include <linux/static_key.h>
-#ifdef CONFIG_SCHED_SMT
extern struct static_key_false sched_smt_present;
static __always_inline bool sched_smt_active(void)
{
return static_branch_likely(&sched_smt_present);
}
-#else
-static __always_inline bool sched_smt_active(void) { return false; }
-#endif
void arch_smt_update(void);
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 36553e14866d..b5d9d7c2b8ad 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -67,7 +67,25 @@ struct sched_domain_shared {
atomic_t ref;
atomic_t nr_busy_cpus;
int has_idle_cores;
- int nr_idle_scan;
+ union {
+ int nr_idle_scan;
+ /*
+ * Used during allocation to claim the sched_domain_shared
+ * object at multiple levels.
+ *
+ * Note: between build and the first periodic LB tick, which
+ * rewrites the union via update_idle_cpu_scan(), readers of
+ * nr_idle_scan may observe the transient SD_* flag value as
+ * the scan bound. The flag bits are small positive integers,
+ * so the effect is just a slightly relaxed scan bound for one
+ * window and self-heals on the first tick.
+ */
+ int alloc_flags;
+ };
+#ifdef CONFIG_SCHED_CACHE
+ unsigned long util_avg;
+ unsigned long capacity;
+#endif
};
struct sched_domain {
@@ -99,6 +117,12 @@ struct sched_domain {
u64 max_newidle_lb_cost;
unsigned long last_decay_max_lb_cost;
+#ifdef CONFIG_SCHED_CACHE
+ unsigned int llc_max;
+ unsigned int *llc_counts __counted_by_ptr(llc_max);
+ unsigned long llc_bytes;
+#endif
+
#ifdef CONFIG_SCHEDSTATS
/* sched_balance_rq() stats */
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -256,4 +280,10 @@ static inline int task_node(const struct task_struct *p)
return cpu_to_node(task_cpu(p));
}
+#ifdef CONFIG_SCHED_CACHE
+extern void sched_update_llc_bytes(unsigned int cpu);
+#else
+static inline void sched_update_llc_bytes(unsigned int cpu) { }
+#endif
+
#endif /* _LINUX_SCHED_TOPOLOGY_H */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 6575af39fd10..709a2dcf4c73 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -230,11 +230,24 @@ static inline int cpu_to_mem(int cpu)
#define topology_drawer_cpumask(cpu) cpumask_of(cpu)
#endif
-#if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask)
+/*
+ * Defining cpu_smt_mask as cpumask_of that CPU helps to get
+ * rid of lot of ifdeffery all around the codebase in case of
+ * CONFIG_SCHED_SMT=n. It just means there are no other siblings, which
+ * is what is expected.
+ */
+#if defined(CONFIG_SCHED_SMT)
+# if !defined(cpu_smt_mask)
static inline const struct cpumask *cpu_smt_mask(int cpu)
{
return topology_sibling_cpumask(cpu);
}
+# endif
+#else /* !CONFIG_SCHED_SMT */
+static inline const struct cpumask *cpu_smt_mask(int cpu)
+{
+ return cpumask_of(cpu);
+}
#endif
#ifndef topology_is_primary_thread