diff options
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/cacheinfo.h | 1 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 32 | ||||
| -rw-r--r-- | include/linux/sched.h | 102 | ||||
| -rw-r--r-- | include/linux/sched/clock.h | 5 | ||||
| -rw-r--r-- | include/linux/sched/smt.h | 4 | ||||
| -rw-r--r-- | include/linux/sched/topology.h | 32 | ||||
| -rw-r--r-- | include/linux/topology.h | 15 |
7 files changed, 134 insertions, 57 deletions
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index c8f4f0a0b874..fc879ac4cc4f 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -89,6 +89,7 @@ int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); bool last_level_cache_is_valid(unsigned int cpu); bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); +struct cacheinfo *get_cpu_cacheinfo_llc(unsigned int cpu); int fetch_cache_info(unsigned int cpu); int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2fc552b3924a..5cadb00d9352 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1223,6 +1223,8 @@ struct mm_struct { /* MM CID related storage */ struct mm_mm_cid mm_cid; + /* sched_cache related statistics */ + struct sched_cache_stat sc_stat; #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* size of all page tables */ #endif @@ -1619,6 +1621,36 @@ static inline unsigned int mm_cid_size(void) # define MM_CID_STATIC_SIZE 0 #endif /* CONFIG_SCHED_MM_CID */ +#ifdef CONFIG_SCHED_CACHE +void mm_init_sched(struct mm_struct *mm, + struct sched_cache_time __percpu *pcpu_sched); + +static inline int mm_alloc_sched_noprof(struct mm_struct *mm) +{ + struct sched_cache_time __percpu *pcpu_sched = + alloc_percpu_noprof(struct sched_cache_time); + + if (!pcpu_sched) + return -ENOMEM; + + mm_init_sched(mm, pcpu_sched); + return 0; +} + +#define mm_alloc_sched(...) alloc_hooks(mm_alloc_sched_noprof(__VA_ARGS__)) + +static inline void mm_destroy_sched(struct mm_struct *mm) +{ + free_percpu(mm->sc_stat.pcpu_sched); + mm->sc_stat.pcpu_sched = NULL; +} +#else /* !CONFIG_SCHED_CACHE */ + +static inline int mm_alloc_sched(struct mm_struct *mm) { return 0; } +static inline void mm_destroy_sched(struct mm_struct *mm) { } + +#endif /* CONFIG_SCHED_CACHE */ + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); diff --git a/include/linux/sched.h b/include/linux/sched.h index bd9488751f51..b3204a15d512 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -161,7 +161,7 @@ struct user_event_mm; */ #define is_special_task_state(state) \ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \ - TASK_DEAD | TASK_FROZEN)) + TASK_DEAD | TASK_WAKING | TASK_FROZEN)) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP # define debug_normal_state_change(state_value) \ @@ -702,6 +702,11 @@ struct sched_dl_entity { * running, skipping the defer phase. * * @dl_defer_idle tracks idle state + * + * @dl_bw_attached tells if this server's bandwidth currently + * contributes to the root domain's total_bw. Only meaningful for server + * entities (@dl_server == 1). Allows toggling the reservation on/off + * without losing the configured @dl_runtime/@dl_period. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; @@ -713,6 +718,7 @@ struct sched_dl_entity { unsigned int dl_defer_armed : 1; unsigned int dl_defer_running : 1; unsigned int dl_defer_idle : 1; + unsigned int dl_bw_attached : 1; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -846,7 +852,11 @@ struct task_struct { struct alloc_tag *alloc_tag; #endif - int on_cpu; + u8 on_cpu; + u8 on_rq; + u8 is_blocked; + u8 __pad; + struct __call_single_node wake_entry; unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; @@ -861,7 +871,6 @@ struct task_struct { */ int recent_used_cpu; int wake_cpu; - int on_rq; int prio; int static_prio; @@ -1243,6 +1252,13 @@ struct task_struct { struct mutex *blocked_on; /* lock we're blocked on */ raw_spinlock_t blocked_lock; + /* + * The task that is boosting this task; a back link for the current + * donor stack. Set in schedule() -> find_proxy_task() and only stable + * under preempt_disable(). + */ + struct task_struct *blocked_donor; + #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER /* * Encoded lock address causing task block (lower 2 bits = type from @@ -1403,6 +1419,13 @@ struct task_struct { unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_SCHED_CACHE + struct callback_head cache_work; + int preferred_llc; + /* 1: task was enqueued to its preferred LLC, 0 otherwise */ + int pref_llc_queued; +#endif + struct rseq_data rseq; struct sched_mm_cid mm_cid; @@ -2177,19 +2200,10 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock); #ifndef CONFIG_PREEMPT_RT -/* - * With proxy exec, if a task has been proxy-migrated, it may be a donor - * on a cpu that it can't actually run on. Thus we need a special state - * to denote that the task is being woken, but that it needs to be - * evaluated for return-migration before it is run. So if the task is - * blocked_on PROXY_WAKING, return migrate it before running it. - */ -#define PROXY_WAKING ((struct mutex *)(-1L)) - static inline struct mutex *__get_task_blocked_on(struct task_struct *p) { lockdep_assert_held_once(&p->blocked_lock); - return p->blocked_on == PROXY_WAKING ? NULL : p->blocked_on; + return p->blocked_on; } static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) @@ -2217,7 +2231,7 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex * * blocked_on relationships, but make sure we are not * clearing the relationship with a different lock. */ - WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m && p->blocked_on != PROXY_WAKING); + WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m); p->blocked_on = NULL; } @@ -2226,35 +2240,6 @@ static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) guard(raw_spinlock_irqsave)(&p->blocked_lock); __clear_task_blocked_on(p, m); } - -static inline void __set_task_blocked_on_waking(struct task_struct *p, struct mutex *m) -{ - /* Currently we serialize blocked_on under the task::blocked_lock */ - lockdep_assert_held_once(&p->blocked_lock); - - if (!sched_proxy_exec()) { - __clear_task_blocked_on(p, m); - return; - } - - /* Don't set PROXY_WAKING if blocked_on was already cleared */ - if (!p->blocked_on) - return; - /* - * There may be cases where we set PROXY_WAKING on tasks that were - * already set to waking, but make sure we are not changing - * the relationship with a different lock. - */ - WARN_ON_ONCE(m && p->blocked_on != m && p->blocked_on != PROXY_WAKING); - p->blocked_on = PROXY_WAKING; -} - -static inline void set_task_blocked_on_waking(struct task_struct *p, struct mutex *m) -{ - guard(raw_spinlock_irqsave)(&p->blocked_lock); - __set_task_blocked_on_waking(p, m); -} - #else static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) { @@ -2263,14 +2248,6 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mute static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) { } - -static inline void __set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m) -{ -} - -static inline void set_task_blocked_on_waking(struct task_struct *p, struct rt_mutex *m) -{ -} #endif /* !CONFIG_PREEMPT_RT */ static __always_inline bool need_resched(void) @@ -2403,6 +2380,29 @@ static __always_inline int task_mm_cid(struct task_struct *t) } #endif +#ifdef CONFIG_SCHED_CACHE + +struct sched_cache_time { + u64 runtime; + unsigned long epoch; +}; + +struct sched_cache_stat { + struct sched_cache_time __percpu *pcpu_sched; + raw_spinlock_t lock; + unsigned long epoch; + u64 nr_running_avg; + unsigned long next_scan; + unsigned long footprint; + int cpu; +} ____cacheline_aligned_in_smp; + +#else + +struct sched_cache_stat { }; + +#endif + #ifndef MODULE #ifndef COMPILE_OFFSETS diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 196f0ca351a2..39f0a7f94bfc 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -33,6 +33,11 @@ extern u64 sched_clock_cpu(int cpu); extern void sched_clock_init(void); #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline int sched_clock_stable(void) +{ + return 1; +} + static inline void sched_clock_tick(void) { } diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index 166b19af956f..cde6679c0278 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -4,16 +4,12 @@ #include <linux/static_key.h> -#ifdef CONFIG_SCHED_SMT extern struct static_key_false sched_smt_present; static __always_inline bool sched_smt_active(void) { return static_branch_likely(&sched_smt_present); } -#else -static __always_inline bool sched_smt_active(void) { return false; } -#endif void arch_smt_update(void); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 36553e14866d..b5d9d7c2b8ad 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -67,7 +67,25 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; - int nr_idle_scan; + union { + int nr_idle_scan; + /* + * Used during allocation to claim the sched_domain_shared + * object at multiple levels. + * + * Note: between build and the first periodic LB tick, which + * rewrites the union via update_idle_cpu_scan(), readers of + * nr_idle_scan may observe the transient SD_* flag value as + * the scan bound. The flag bits are small positive integers, + * so the effect is just a slightly relaxed scan bound for one + * window and self-heals on the first tick. + */ + int alloc_flags; + }; +#ifdef CONFIG_SCHED_CACHE + unsigned long util_avg; + unsigned long capacity; +#endif }; struct sched_domain { @@ -99,6 +117,12 @@ struct sched_domain { u64 max_newidle_lb_cost; unsigned long last_decay_max_lb_cost; +#ifdef CONFIG_SCHED_CACHE + unsigned int llc_max; + unsigned int *llc_counts __counted_by_ptr(llc_max); + unsigned long llc_bytes; +#endif + #ifdef CONFIG_SCHEDSTATS /* sched_balance_rq() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -256,4 +280,10 @@ static inline int task_node(const struct task_struct *p) return cpu_to_node(task_cpu(p)); } +#ifdef CONFIG_SCHED_CACHE +extern void sched_update_llc_bytes(unsigned int cpu); +#else +static inline void sched_update_llc_bytes(unsigned int cpu) { } +#endif + #endif /* _LINUX_SCHED_TOPOLOGY_H */ diff --git a/include/linux/topology.h b/include/linux/topology.h index 6575af39fd10..709a2dcf4c73 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -230,11 +230,24 @@ static inline int cpu_to_mem(int cpu) #define topology_drawer_cpumask(cpu) cpumask_of(cpu) #endif -#if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) +/* + * Defining cpu_smt_mask as cpumask_of that CPU helps to get + * rid of lot of ifdeffery all around the codebase in case of + * CONFIG_SCHED_SMT=n. It just means there are no other siblings, which + * is what is expected. + */ +#if defined(CONFIG_SCHED_SMT) +# if !defined(cpu_smt_mask) static inline const struct cpumask *cpu_smt_mask(int cpu) { return topology_sibling_cpumask(cpu); } +# endif +#else /* !CONFIG_SCHED_SMT */ +static inline const struct cpumask *cpu_smt_mask(int cpu) +{ + return cpumask_of(cpu); +} #endif #ifndef topology_is_primary_thread |
