diff options
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/bootmem_info.h | 1 | ||||
| -rw-r--r-- | include/linux/compaction.h | 10 | ||||
| -rw-r--r-- | include/linux/damon.h | 136 | ||||
| -rw-r--r-- | include/linux/gfp.h | 4 | ||||
| -rw-r--r-- | include/linux/gfp_types.h | 6 | ||||
| -rw-r--r-- | include/linux/highmem-internal.h | 2 | ||||
| -rw-r--r-- | include/linux/huge_mm.h | 47 | ||||
| -rw-r--r-- | include/linux/list_lru.h | 70 | ||||
| -rw-r--r-- | include/linux/memcontrol.h | 31 | ||||
| -rw-r--r-- | include/linux/memory.h | 7 | ||||
| -rw-r--r-- | include/linux/memory_hotplug.h | 8 | ||||
| -rw-r--r-- | include/linux/mm.h | 62 | ||||
| -rw-r--r-- | include/linux/mm_inline.h | 2 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 19 | ||||
| -rw-r--r-- | include/linux/mmu_notifier.h | 4 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 23 | ||||
| -rw-r--r-- | include/linux/nodemask.h | 18 | ||||
| -rw-r--r-- | include/linux/page_ref.h | 18 | ||||
| -rw-r--r-- | include/linux/pageblock-flags.h | 6 | ||||
| -rw-r--r-- | include/linux/pagemap.h | 2 | ||||
| -rw-r--r-- | include/linux/swap.h | 24 | ||||
| -rw-r--r-- | include/linux/swap_cgroup.h | 47 | ||||
| -rw-r--r-- | include/linux/thread_info.h | 2 | ||||
| -rw-r--r-- | include/linux/userfaultfd_k.h | 40 | ||||
| -rw-r--r-- | include/linux/vmalloc.h | 4 | ||||
| -rw-r--r-- | include/linux/vmpressure.h | 9 |
26 files changed, 374 insertions, 228 deletions
diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index 492ceeb1cdf8..f724340755e5 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -82,7 +82,6 @@ static inline void get_page_bootmem(unsigned long info, struct page *page, static inline void free_bootmem_page(struct page *page) { - kmemleak_free_part_phys(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); free_reserved_page(page); } #endif diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 173d9c07a895..f29ef0653546 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -2,6 +2,8 @@ #ifndef _LINUX_COMPACTION_H #define _LINUX_COMPACTION_H +#include <linux/swap.h> + /* * Determines how hard direct compaction should try to succeed. * Lower value means higher priority, analogically to reclaim priority. @@ -73,11 +75,9 @@ static inline unsigned long compact_gap(unsigned int order) * effectively limited by COMPACT_CLUSTER_MAX, as that's the maximum * that the migrate scanner can have isolated on migrate list, and free * scanner is only invoked when the number of isolated free pages is - * lower than that. But it's not worth to complicate the formula here - * as a bigger gap for higher orders than strictly necessary can also - * improve chances of compaction success. + * lower than that. */ - return 2UL << order; + return min(2UL << order, COMPACT_CLUSTER_MAX); } static inline int current_is_kcompactd(void) @@ -101,7 +101,7 @@ extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); bool compaction_zonelist_suitable(struct alloc_context *ac, int order, - int alloc_flags); + int alloc_flags, gfp_t gfp_mask); extern void __meminit kcompactd_run(int nid); extern void __meminit kcompactd_stop(int nid); diff --git a/include/linux/damon.h b/include/linux/damon.h index f2cdb7c3f5e6..6f7edb3590ef 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -8,23 +8,20 @@ #ifndef _DAMON_H_ #define _DAMON_H_ +#include <linux/math64.h> #include <linux/memcontrol.h> #include <linux/mutex.h> +#include <linux/prandom.h> #include <linux/time64.h> #include <linux/types.h> -#include <linux/random.h> /* Minimal region size. Every damon_region is aligned by this. */ #define DAMON_MIN_REGION_SZ PAGE_SIZE +/* Maximum number of monitoring probes. */ +#define DAMON_MAX_PROBES (4) /* Max priority score for DAMON-based operation schemes */ #define DAMOS_MAX_SCORE (99) -/* Get a random number in [l, r) */ -static inline unsigned long damon_rand(unsigned long l, unsigned long r) -{ - return l + get_random_u32_below(r - l); -} - /** * struct damon_addr_range - Represents an address region of [@start, @end). * @start: Start address of the region (inclusive). @@ -52,6 +49,7 @@ struct damon_size_range { * @nr_accesses: Access frequency of this region. * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for * each sampling interval. + * @probe_hits: Number of probe-positive region samples. * @list: List head for siblings. * @age: Age of this region. * @@ -80,6 +78,7 @@ struct damon_region { unsigned long sampling_addr; unsigned int nr_accesses; unsigned int nr_accesses_bp; + unsigned char probe_hits[DAMON_MAX_PROBES]; struct list_head list; unsigned int age; @@ -121,6 +120,7 @@ struct damon_target { * @DAMOS_PAGEOUT: Reclaim the region. * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. + * @DAMOS_COLLAPSE: Call ``madvise()`` for the region with MADV_COLLAPSE. * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists. * @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists. * @DAMOS_MIGRATE_HOT: Migrate the regions prioritizing warmer regions. @@ -140,6 +140,7 @@ enum damos_action { DAMOS_PAGEOUT, DAMOS_HUGEPAGE, DAMOS_NOHUGEPAGE, + DAMOS_COLLAPSE, DAMOS_LRU_PRIO, DAMOS_LRU_DEPRIO, DAMOS_MIGRATE_HOT, @@ -159,6 +160,8 @@ enum damos_action { * @DAMOS_QUOTA_NODE_MEMCG_FREE_BP: MemFree ratio of a node for a cgroup. * @DAMOS_QUOTA_ACTIVE_MEM_BP: Active to total LRU memory ratio. * @DAMOS_QUOTA_INACTIVE_MEM_BP: Inactive to total LRU memory ratio. + * @DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP: Scheme-eligible memory ratio of a + * node in basis points (0-10000). * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics. * * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported. @@ -172,6 +175,7 @@ enum damos_quota_goal_metric { DAMOS_QUOTA_NODE_MEMCG_FREE_BP, DAMOS_QUOTA_ACTIVE_MEM_BP, DAMOS_QUOTA_INACTIVE_MEM_BP, + DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP, NR_DAMOS_QUOTA_GOAL_METRICS, }; @@ -233,6 +237,8 @@ enum damos_quota_goal_tuner { * @goals: Head of quota tuning goals (&damos_quota_goal) list. * @goal_tuner: Goal-based @esz tuning algorithm to use. * @esz: Effective size quota in bytes. + * @fail_charge_num: Failed regions charge rate numerator. + * @fail_charge_denom: Failed regions charge rate denominator. * * @weight_sz: Weight of the region's size for prioritization. * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. @@ -262,6 +268,10 @@ enum damos_quota_goal_tuner { * * The resulting effective size quota in bytes is set to @esz. * + * For DAMOS action applying failed amount of regions, charging those same to + * those that the action has successfully applied may be unfair. For the + * reason, 'the size * @fail_charge_num / @fail_charge_denom' is charged. + * * For selecting regions within the quota, DAMON prioritizes current scheme's * target memory regions using the &struct damon_operations->get_scheme_score. * You could customize the prioritization logic by setting &weight_sz, @@ -276,6 +286,9 @@ struct damos_quota { enum damos_quota_goal_tuner goal_tuner; unsigned long esz; + unsigned int fail_charge_num; + unsigned int fail_charge_denom; + unsigned int weight_sz; unsigned int weight_nr_accesses; unsigned int weight_age; @@ -617,6 +630,7 @@ enum damon_ops_id { * @update: Update operations-related data structures. * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. + * @apply_probes: Apply probes for each region. * @get_scheme_score: Get the score of a region for a scheme. * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. @@ -643,6 +657,8 @@ enum damon_ops_id { * last preparation and update the number of observed accesses of each region. * It should also return max number of observed accesses that made as a result * of its update. The value will be used for regions adjustment threshold. + * @apply_probes should apply the data attribute probes to each region and + * accordingly update the probe hits counter of the region. * @get_scheme_score should return the priority score of a region for a scheme * as an integer in [0, &DAMOS_MAX_SCORE]. * @apply_scheme is called from @kdamond when a region for user provided @@ -660,6 +676,7 @@ struct damon_operations { void (*update)(struct damon_ctx *context); void (*prepare_access_checks)(struct damon_ctx *context); unsigned int (*check_accesses)(struct damon_ctx *context); + void (*apply_probes)(struct damon_ctx *context); int (*get_scheme_score)(struct damon_ctx *context, struct damon_region *r, struct damos *scheme); unsigned long (*apply_scheme)(struct damon_ctx *context, @@ -722,6 +739,47 @@ struct damon_intervals_goal { }; /** + * enum damon_filter_type - Type of &struct damon_filter + * + * @DAMON_FILTER_TYPE_ANON: Anonymous pages. + * @DAMON_FILTER_TYPE_MEMCG: Specific memcg's pages. + */ +enum damon_filter_type { + DAMON_FILTER_TYPE_ANON, + DAMON_FILTER_TYPE_MEMCG, +}; + +/** + * struct damon_filter - DAMON region filter for &struct damon_probe. + * + * @type: Type of the region. + * @matching: Whether this filter is for the type-matching ones. + * @allow: Whether the @type-@matching ones should pass this filter. + * @memcg_id: Memcg id of the question if @type is DAMON_FILTER_MEMCG. + * @list: Siblings list. + */ +struct damon_filter { + enum damon_filter_type type; + bool matching; + bool allow; + union { + u64 memcg_id; + }; + struct list_head list; +}; + +/** + * struct damon_probe - Data region attribute probe. + * + * @filters: Filters for assessing if a given region is for this probe. + * @list: Siblings list. + */ +struct damon_probe { + struct list_head filters; + struct list_head list; +}; + +/** * struct damon_attrs - Monitoring attributes for accuracy/overhead control. * * @sample_interval: The time between access samplings. @@ -787,6 +845,7 @@ struct damon_attrs { * @ops: Set of monitoring operations for given use cases. * @addr_unit: Scale factor for core to ops address conversion. * @min_region_sz: Minimum region size. + * @pause: Pause kdamond main loop. * @adaptive_targets: Head of monitoring targets (&damon_target) list. * @schemes: Head of schemes (&damos) list. */ @@ -838,13 +897,34 @@ struct damon_ctx { /* public: */ struct damon_operations ops; + struct list_head probes; unsigned long addr_unit; unsigned long min_region_sz; + bool pause; struct list_head adaptive_targets; struct list_head schemes; + + /* Per-ctx PRNG state for damon_rand(); kdamond is the sole consumer. */ + struct rnd_state rnd_state; }; +/* Get a random number in [@l, @r) using @ctx's lockless PRNG. */ +static inline unsigned long damon_rand(struct damon_ctx *ctx, + unsigned long l, unsigned long r) +{ + unsigned long span = r - l; + u64 rnd; + + if (span <= U32_MAX) { + rnd = prandom_u32_state(&ctx->rnd_state); + return l + (unsigned long)((rnd * span) >> 32); + } + rnd = ((u64)prandom_u32_state(&ctx->rnd_state) << 32) | + prandom_u32_state(&ctx->rnd_state); + return l + mul_u64_u64_shr(rnd, span, 64); +} + static inline struct damon_region *damon_next_region(struct damon_region *r) { return container_of(r->list.next, struct damon_region, list); @@ -870,15 +950,26 @@ static inline unsigned long damon_sz_region(struct damon_region *r) return r->ar.end - r->ar.start; } +#define damon_for_each_filter(f, p) \ + list_for_each_entry(f, &(p)->filters, list) + +#define damon_for_each_filter_safe(f, next, p) \ + list_for_each_entry_safe(f, next, &(p)->filters, list) + +#define damon_for_each_probe(p, ctx) \ + list_for_each_entry(p, &(ctx)->probes, list) + +#define damon_for_each_probe_safe(p, next, ctx) \ + list_for_each_entry_safe(p, next, &(ctx)->probes, list) #define damon_for_each_region(r, t) \ - list_for_each_entry(r, &t->regions_list, list) + list_for_each_entry(r, &(t)->regions_list, list) #define damon_for_each_region_from(r, t) \ - list_for_each_entry_from(r, &t->regions_list, list) + list_for_each_entry_from(r, &(t)->regions_list, list) #define damon_for_each_region_safe(r, next, t) \ - list_for_each_entry_safe(r, next, &t->regions_list, list) + list_for_each_entry_safe(r, next, &(t)->regions_list, list) #define damon_for_each_target(t, ctx) \ list_for_each_entry(t, &(ctx)->adaptive_targets, list) @@ -893,7 +984,7 @@ static inline unsigned long damon_sz_region(struct damon_region *r) list_for_each_entry_safe(s, next, &(ctx)->schemes, list) #define damos_for_each_quota_goal(goal, quota) \ - list_for_each_entry(goal, "a->goals, list) + list_for_each_entry(goal, &(quota)->goals, list) #define damos_for_each_quota_goal_safe(goal, next, quota) \ list_for_each_entry_safe(goal, next, &(quota)->goals, list) @@ -912,21 +1003,16 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #ifdef CONFIG_DAMON -struct damon_region *damon_new_region(unsigned long start, unsigned long end); +struct damon_filter *damon_new_filter(enum damon_filter_type type, + bool matching, bool allow); +void damon_add_filter(struct damon_probe *probe, struct damon_filter *f); +void damon_destroy_filter(struct damon_filter *f); -/* - * Add a region between two other regions - */ -static inline void damon_insert_region(struct damon_region *r, - struct damon_region *prev, struct damon_region *next, - struct damon_target *t) -{ - __list_add(&r->list, &prev->list, &next->list); - t->nr_regions++; -} +struct damon_probe *damon_new_probe(void); +void damon_add_probe(struct damon_ctx *ctx, struct damon_probe *probe); + +struct damon_region *damon_new_region(unsigned long start, unsigned long end); -void damon_add_region(struct damon_region *r, struct damon_target *t); -void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges, unsigned long min_region_sz); void damon_update_region_access_rate(struct damon_region *r, bool accessed, @@ -994,7 +1080,7 @@ int damon_kdamond_pid(struct damon_ctx *ctx); int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); -int damon_set_region_biggest_system_ram_default(struct damon_target *t, +int damon_set_region_system_rams_default(struct damon_target *t, unsigned long *start, unsigned long *end, unsigned long addr_unit, unsigned long min_region_sz); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 51ef13ed756e..cdf95a9f0b87 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -239,6 +239,8 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, struct page **page_array); #define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__)) +void free_pages_bulk(struct page **page_array, unsigned long nr_pages); + unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array); @@ -467,6 +469,8 @@ void free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages); void free_contig_range(unsigned long pfn, unsigned long nr_pages); #endif +void __free_contig_range(unsigned long pfn, unsigned long nr_pages); + DEFINE_FREE(free_page, void *, free_page((unsigned long)_T)) #endif /* __LINUX_GFP_H */ diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index cd4972a7c97c..54ca0c88bab6 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -281,9 +281,9 @@ enum { * * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. * Used for userspace and vmalloc pages; the latter are unpoisoned by - * kasan_unpoison_vmalloc instead. For userspace pages, results in - * poisoning being skipped as well, see should_skip_kasan_poison for - * details. Only effective in HW_TAGS mode. + * kasan_unpoison_vmalloc instead. If passed to vmalloc, kasan_unpoison_vmalloc + * is skipped too. For userspace pages, results in poisoning being skipped as + * well, see should_skip_kasan_poison for details. Only effective in HW_TAGS mode. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 0574c21ca45d..bb71e7dba4f7 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -262,7 +262,7 @@ static inline bool is_kmap_addr(const void *x) * @__addr: Virtual address to be unmapped * * Unmaps an address previously mapped by kmap_atomic() and re-enables - * pagefaults. Depending on PREEMP_RT configuration, re-enables also + * pagefaults. Depending on PREEMPT_RT configuration, re-enables also * migration and preemption. Users should not count on these side effects. * * Mappings should be unmapped in the reverse order that they were mapped. diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 2949e5acff35..c0d223d0c556 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -238,6 +238,31 @@ static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, } /* + * Make sure huge_gfp is always more limited than limit_gfp. + * Some shmem users want THP allocation to be done less aggressively + * and only in certain zone. + */ +static inline gfp_t thp_shmem_limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp) +{ + gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM; + gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY; + gfp_t zoneflags = limit_gfp & GFP_ZONEMASK; + gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK); + + /* Allow allocations only from the originally specified zones. */ + result |= zoneflags; + + /* + * Minimize the result gfp by taking the union with the deny flags, + * and the intersection of the allow flags. + */ + result |= (limit_gfp & denyflags); + result |= (huge_gfp & limit_gfp) & allowflags; + + return result; +} + +/* * Filter the bitfield of input orders to the ones suitable for use in the vma. * See thp_vma_suitable_order(). * All orders that pass the checks are returned as a bitfield. @@ -414,10 +439,10 @@ static inline int split_huge_page(struct page *page) { return split_huge_page_to_list_to_order(page, NULL, 0); } + +int folio_memcg_alloc_deferred(struct folio *folio); + void deferred_split_folio(struct folio *folio, bool partially_mapped); -#ifdef CONFIG_MEMCG -void reparent_deferred_split_queue(struct mem_cgroup *memcg); -#endif void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze); @@ -581,6 +606,11 @@ static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, return false; } +static inline gfp_t thp_shmem_limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp) +{ + return huge_gfp; +} + static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, unsigned long addr, unsigned long orders) { @@ -649,8 +679,15 @@ static inline int try_folio_split_to_order(struct folio *folio, return -EINVAL; } -static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} -static inline void reparent_deferred_split_queue(struct mem_cgroup *memcg) {} +static inline int folio_memcg_alloc_deferred(struct folio *folio) +{ + return 0; +} + +static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) +{ +} + #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index fe739d35a864..a450fffe1550 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -81,9 +81,76 @@ static inline int list_lru_init_memcg_key(struct list_lru *lru, struct shrinker int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp); + +#ifdef CONFIG_MEMCG +/** + * folio_memcg_list_lru_alloc - allocate list_lru heads for shrinkable folio + * @folio: the newly allocated & charged folio + * @lru: the list_lru this might be queued on + * @gfp: gfp mask + * + * Allocate list_lru heads (per-memcg, per-node) needed to queue this + * particular folio down the line. + * + * This does memcg_list_lru_alloc(), but on the memcg that @folio is + * associated with. Handles folio_memcg() access rules in the fast + * path (list_lru heads allocated) and the allocation slowpath. + * + * Returns 0 on success, a negative error value otherwise. + */ +int folio_memcg_list_lru_alloc(struct folio *folio, struct list_lru *lru, + gfp_t gfp); +#else +static inline int folio_memcg_list_lru_alloc(struct folio *folio, + struct list_lru *lru, gfp_t gfp) +{ + return 0; +} +#endif + void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent); /** + * list_lru_lock: lock the sublist for the given node and memcg + * @lru: the lru pointer + * @nid: the node id of the sublist to lock. + * @memcg: pointer to the cgroup of the sublist to lock. On return, + * updated to the cgroup whose sublist was actually locked, + * which may be an ancestor if the original memcg was dying. + * + * Returns the locked list_lru_one sublist. The caller must call + * list_lru_unlock() when done. + * + * You must ensure that the memcg is not freed during this call (e.g., with + * rcu or by taking a css refcnt). + * + * Return: the locked list_lru_one, or NULL on failure + */ +struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid, + struct mem_cgroup **memcg); + +/** + * list_lru_unlock: unlock a sublist locked by list_lru_lock() + * @l: the list_lru_one to unlock + */ +void list_lru_unlock(struct list_lru_one *l); + +struct list_lru_one *list_lru_lock_irq(struct list_lru *lru, int nid, + struct mem_cgroup **memcg); +void list_lru_unlock_irq(struct list_lru_one *l); + +struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid, + struct mem_cgroup **memcg, unsigned long *irq_flags); +void list_lru_unlock_irqrestore(struct list_lru_one *l, + unsigned long *irq_flags); + +/* Caller-locked variants, see list_lru_add() etc for documentation */ +bool __list_lru_add(struct list_lru *lru, struct list_lru_one *l, + struct list_head *item, int nid, struct mem_cgroup *memcg); +bool __list_lru_del(struct list_lru *lru, struct list_lru_one *l, + struct list_head *item, int nid); + +/** * list_lru_add: add an element to the lru list's tail * @lru: the lru pointer * @item: the item to be added. @@ -115,6 +182,9 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg); +bool list_lru_add_irq(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg); + /** * list_lru_add_obj: add an element to the lru list's tail * @lru: the lru pointer diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index dc3fa687759b..e1f46a0016fc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -29,6 +29,7 @@ struct obj_cgroup; struct page; struct mm_struct; struct kmem_cache; +struct swap_cluster_info; /* Cgroup-specific page state, on top of universal node page state */ enum memcg_stat_item { @@ -277,10 +278,6 @@ struct mem_cgroup { struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT]; #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - struct deferred_split deferred_split_queue; -#endif - #ifdef CONFIG_LRU_GEN_WALKS_MMU /* per-memcg mm_struct list */ struct lru_gen_mm_list mm_list; @@ -646,8 +643,8 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp); -int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, - gfp_t gfp, swp_entry_t entry); +int mem_cgroup_swapin_charge_folio(struct folio *folio, unsigned short id, + struct mm_struct *mm, gfp_t gfp); void __mem_cgroup_uncharge(struct folio *folio); @@ -1137,7 +1134,7 @@ static inline int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp) } static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, - struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) + unsigned short id, struct mm_struct *mm, gfp_t gfp) { return 0; } @@ -1899,9 +1896,6 @@ static inline void mem_cgroup_exit_user_fault(void) current->in_user_fault = 0; } -void memcg1_swapout(struct folio *folio, swp_entry_t entry); -void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages); - #else /* CONFIG_MEMCG_V1 */ static inline unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -1929,14 +1923,23 @@ static inline void mem_cgroup_exit_user_fault(void) { } -static inline void memcg1_swapout(struct folio *folio, swp_entry_t entry) +#endif /* CONFIG_MEMCG_V1 */ + +#if defined(CONFIG_MEMCG_V1) && defined(CONFIG_SWAP) + +void __memcg1_swapout(struct folio *folio, struct swap_cluster_info *ci); +void memcg1_swapin(struct folio *folio); + +#else + +static inline void __memcg1_swapout(struct folio *folio, + struct swap_cluster_info *ci) { } -static inline void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages) +static inline void memcg1_swapin(struct folio *folio) { } - -#endif /* CONFIG_MEMCG_V1 */ +#endif #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index 5bb5599c6b2b..463dc02f6cff 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -158,7 +158,11 @@ int create_memory_block_devices(unsigned long start, unsigned long size, void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); extern int memory_notify(enum memory_block_state state, void *v); -extern struct memory_block *find_memory_block(unsigned long section_nr); +struct memory_block *memory_block_get(unsigned long block_id); +static inline void memory_block_put(struct memory_block *mem) +{ + put_device(&mem->dev); +} typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); @@ -171,7 +175,6 @@ struct memory_group *memory_group_find_by_id(int mgid); typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *); int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, struct memory_group *excluded, void *arg); -struct memory_block *find_memory_block_by_id(unsigned long block_id); #define hotplug_memory_notifier(fn, pri) ({ \ static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri };\ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 815e908c4135..7c9d66729c60 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -135,9 +135,10 @@ static inline bool movable_node_is_enabled(void) return movable_node_enabled; } -extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap); +extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap, + struct dev_pagemap *pgmap); extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap); + struct vmem_altmap *altmap, struct dev_pagemap *pgmap); /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, @@ -307,7 +308,8 @@ extern int sparse_add_section(int nid, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); extern void sparse_remove_section(unsigned long pfn, unsigned long nr_pages, - struct vmem_altmap *altmap); + struct vmem_altmap *altmap, + struct dev_pagemap *pgmap); extern struct zone *zone_for_pfn_range(enum mmop online_type, int nid, struct memory_group *group, unsigned long start_pfn, unsigned long nr_pages); diff --git a/include/linux/mm.h b/include/linux/mm.h index fc2acedf0b76..485df9c2dbdd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -496,6 +496,21 @@ enum { #else #define VM_UFFD_MINOR VM_NONE #endif + +/* + * vma_flags_t masks for the userfaultfd VMA flags. VMA_UFFD_MINOR is gated on + * the same config as VM_UFFD_MINOR -- which implies 64BIT, where the bit fits + * -- so an out-of-range bit is never fed to mk_vma_flags() on a build whose + * bitmap cannot hold it. + */ +#define VMA_UFFD_MISSING mk_vma_flags(VMA_UFFD_MISSING_BIT) +#define VMA_UFFD_WP mk_vma_flags(VMA_UFFD_WP_BIT) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR +#define VMA_UFFD_MINOR mk_vma_flags(VMA_UFFD_MINOR_BIT) +#else +#define VMA_UFFD_MINOR EMPTY_VMA_FLAGS +#endif + #ifdef CONFIG_64BIT #define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED) #define VM_SEALED INIT_VM_FLAG(SEALED) @@ -1238,6 +1253,30 @@ static __always_inline void vma_flags_set_mask(vma_flags_t *flags, #define vma_flags_set(flags, ...) \ vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__)) +static __always_inline vma_flags_t __mk_vma_flags_from_masks(size_t count, + const vma_flags_t *masks) +{ + vma_flags_t flags = EMPTY_VMA_FLAGS; + size_t i; + + for (i = 0; i < count; i++) + vma_flags_set_mask(&flags, masks[i]); + return flags; +} + +/* + * Combine pre-computed vma_flags_t masks into one value, e.g.: + * + * vma_flags_t flags = mk_vma_flags_from_masks(VMA_UFFD_WP, VMA_UFFD_MINOR); + * + * Unlike mk_vma_flags(), which takes bit numbers, this takes whole masks -- + * each of which may be EMPTY_VMA_FLAGS when its feature is unavailable -- so a + * bit that does not exist on the current build is never materialised. + */ +#define mk_vma_flags_from_masks(...) \ + __mk_vma_flags_from_masks(COUNT_ARGS(__VA_ARGS__), \ + (const vma_flags_t []){__VA_ARGS__}) + /* Clear all of the to-clear flags in flags, non-atomically. */ static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear) @@ -1489,6 +1528,11 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) vma->vm_ops = NULL; } +static inline void vma_desc_set_anonymous(struct vm_area_desc *desc) +{ + desc->vm_ops = NULL; +} + static inline bool vma_is_anonymous(struct vm_area_struct *vma) { return !vma->vm_ops; @@ -1888,16 +1932,6 @@ static inline bool folio_mapped(const struct folio *folio) return folio_mapcount(folio) >= 1; } -/* - * Return true if this page is mapped into pagetables. - * For compound page it returns true if any sub-page of compound page is mapped, - * even if this particular sub-page is not itself mapped by any PTE or PMD. - */ -static inline bool page_mapped(const struct page *page) -{ - return folio_mapped(page_folio(page)); -} - static inline struct page *virt_to_head_page(const void *x) { struct page *page = virt_to_page(x); @@ -4855,18 +4889,10 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) } #endif -void *sparse_buffer_alloc(unsigned long size); unsigned long section_map_size(void); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); -pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); -pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); -pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); -pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, - struct vmem_altmap *altmap, unsigned long ptpfn, - unsigned long flags); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node, diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index a171070e15f0..a8430a7ae054 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -247,7 +247,7 @@ static inline unsigned long lru_gen_folio_seq(const struct lruvec *lruvec, (folio_test_dirty(folio) || folio_test_writeback(folio)))) gen = MIN_NR_GENS; else - gen = MAX_NR_GENS - folio_test_workingset(folio); + gen = MAX_NR_GENS - (folio_test_workingset(folio) || folio_test_referenced(folio)); return max(READ_ONCE(lrugen->max_seq) - gen + 1, READ_ONCE(lrugen->min_seq[type])); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5cadb00d9352..b18c2b2e7d2c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -845,23 +845,10 @@ struct mmap_action { enum mmap_action_type type; /* - * If specified, this hook is invoked after the selected action has been - * successfully completed. Note that the VMA write lock still held. - * - * The absolute minimum ought to be done here. - * - * Returns 0 on success, or an error code. - */ - int (*success_hook)(const struct vm_area_struct *vma); - - /* - * If specified, this hook is invoked when an error occurred when - * attempting the selected action. - * - * The hook can return an error code in order to filter the error, but - * it is not valid to clear the error here. + * If non-zero, replace errors that arise from mmap actions with this + * value instead. Only valid error codes may be specified. */ - int (*error_hook)(int err); + int error_override; /* * This should be set in rare instances where the operation required diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 69c304b467df..a11a44eef521 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -134,8 +134,8 @@ struct mmu_notifier_ops { * Invalidation of multiple concurrent ranges may be * optionally permitted by the driver. Either way the * establishment of sptes is forbidden in the range passed to - * invalidate_range_begin/end for the whole duration of the - * invalidate_range_begin/end critical section. + * invalidate_range_start/end for the whole duration of the + * invalidate_range_start/end critical section. * * invalidate_range_start() is called when all pages in the * range are still mapped and have at least a refcount of one. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9adb2ad21da5..ca2712187147 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -177,9 +177,12 @@ static inline bool migratetype_is_mergeable(int mt) return mt < MIGRATE_PCPTYPES; } -#define for_each_migratetype_order(order, type) \ - for (order = 0; order < NR_PAGE_ORDERS; order++) \ - for (type = 0; type < MIGRATE_TYPES; type++) +#define for_each_free_list(list, zone, order) \ + for (order = 0; order < NR_PAGE_ORDERS; order++) \ + for (unsigned int __type = 0; \ + __type < MIGRATE_TYPES && \ + (list = &(zone)->free_area[order].free_list[__type], 1); \ + __type++) extern int page_group_by_mobility_disabled; @@ -211,7 +214,6 @@ enum numa_stat_item { #endif enum zone_stat_item { - /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, NR_FREE_PAGES_BLOCKS, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ @@ -222,7 +224,6 @@ enum zone_stat_item { NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - /* Second 128 byte cacheline */ #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif @@ -1428,14 +1429,6 @@ struct zonelist { */ extern struct page *mem_map; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -struct deferred_split { - spinlock_t split_queue_lock; - struct list_head split_queue; - unsigned long split_queue_len; -}; -#endif - #ifdef CONFIG_MEMORY_FAILURE /* * Per NUMA node memory failure handling statistics. @@ -1561,10 +1554,6 @@ typedef struct pglist_data { unsigned long first_deferred_pfn; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - struct deferred_split deferred_split_queue; -#endif - #ifdef CONFIG_NUMA_BALANCING /* start time in ms of current promote rate limit period */ unsigned int nbp_rl_start; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 204c92462f3c..b842aa525546 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -24,23 +24,23 @@ * void nodes_setall(mask) set all bits * void nodes_clear(mask) clear all bits * int node_isset(node, mask) true iff bit 'node' set in mask - * int node_test_and_set(node, mask) test and set bit 'node' in mask + * bool node_test_and_set(node, mask) test and set bit 'node' in mask * - * void nodes_and(dst, src1, src2) dst = src1 & src2 [intersection] + * bool nodes_and(dst, src1, src2) dst = src1 & src2 [intersection] * void nodes_or(dst, src1, src2) dst = src1 | src2 [union] * void nodes_xor(dst, src1, src2) dst = src1 ^ src2 - * void nodes_andnot(dst, src1, src2) dst = src1 & ~src2 + * bool nodes_andnot(dst, src1, src2) dst = src1 & ~src2 * void nodes_complement(dst, src) dst = ~src * - * int nodes_equal(mask1, mask2) Does mask1 == mask2? - * int nodes_intersects(mask1, mask2) Do mask1 and mask2 intersect? - * int nodes_subset(mask1, mask2) Is mask1 a subset of mask2? - * int nodes_empty(mask) Is mask empty (no bits sets)? - * int nodes_full(mask) Is mask full (all bits sets)? + * bool nodes_equal(mask1, mask2) Does mask1 == mask2? + * bool nodes_intersects(mask1, mask2) Do mask1 and mask2 intersect? + * bool nodes_subset(mask1, mask2) Is mask1 a subset of mask2? + * bool nodes_empty(mask) Is mask empty (no bits sets)? + * bool nodes_full(mask) Is mask full (all bits sets)? * int nodes_weight(mask) Hamming weight - number of set bits * * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES - * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES + * unsigned int next_node(node, mask) Next node past 'node', or MAX_NUMNODES * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, * or MAX_NUMNODES * unsigned int first_unset_node(mask) First node not set in mask, or diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 94d3f0e71c06..9f5c75d06f76 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -71,6 +71,12 @@ static inline int page_ref_count(const struct page *page) * folio_ref_count - The reference count on this folio. * @folio: The folio. * + * Folios contain a reference count. When that reference count reaches + * zero, the folio is referred to as frozen. At this point, it will + * usually be returned to the memory allocator, but some parts of the + * kernel freeze folios in order to perform unusual operations on them + * such as splitting or migration. + * * The refcount is usually incremented by calls to folio_get() and * decremented by calls to folio_put(). Some typical users of the * folio refcount: @@ -82,6 +88,18 @@ static inline int page_ref_count(const struct page *page) * - Pipes * - Direct IO which references this page in the process address space * + * The reference count has three components: expected, temporary and + * spurious. The expected reference count of a folio is that which + * we would logically expect it to be from just reading the code. + * Temporary refcounts are gained by threads which need a temporary + * reference to make sure the folio isn't reallocated while they use it. + * Spurious refcounts are gained by threads which, thanks to RCU walks + * of the page tables or file cache, find a stale pointer to a folio. + * These threads will drop the refcount after discoveering the pointer + * is stale, but it can surprise other users to see the spurious refcount + * on a freshly allocated folio (eg they may see a refcount of 2 instead + * of 1). + * * Return: The number of references to this folio. */ static inline int folio_ref_count(const struct folio *folio) diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e046278a01fa..9a6c3ea17684 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -36,12 +36,12 @@ enum pageblock_bits { #define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS)) -#define MIGRATETYPE_MASK (BIT(PB_migrate_0)|BIT(PB_migrate_1)|BIT(PB_migrate_2)) +#define PAGEBLOCK_MIGRATETYPE_MASK (BIT(PB_migrate_0)|BIT(PB_migrate_1)|BIT(PB_migrate_2)) #ifdef CONFIG_MEMORY_ISOLATION -#define MIGRATETYPE_AND_ISO_MASK (MIGRATETYPE_MASK | BIT(PB_migrate_isolate)) +#define PAGEBLOCK_ISO_MASK BIT(PB_migrate_isolate) #else -#define MIGRATETYPE_AND_ISO_MASK MIGRATETYPE_MASK +#define PAGEBLOCK_ISO_MASK 0 #endif #if defined(CONFIG_HUGETLB_PAGE) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 31a848485ad9..1f50991b43e3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1350,6 +1350,7 @@ struct readahead_control { struct file_ra_state *ra; /* private: use the readahead_* accessors instead */ pgoff_t _index; + pgoff_t _max_index; /* limit readahead to _max_index, inclusive */ unsigned int _nr_pages; unsigned int _batch_count; bool dropbehind; @@ -1363,6 +1364,7 @@ struct readahead_control { .mapping = m, \ .ra = r, \ ._index = i, \ + ._max_index = ULONG_MAX, \ } #define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7a09df6977a5..8f0f68e245ba 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -213,6 +213,7 @@ enum { SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ + SWP_HIBERNATION = (1 << 13), /* pinned for hibernation */ /* add others here before... */ }; @@ -252,7 +253,6 @@ struct swap_info_struct { struct plist_node list; /* entry in swap_active_head */ signed char type; /* strange name for an index */ unsigned int max; /* size of this swap device */ - unsigned long *zeromap; /* kvmalloc'ed bitmap to track zero pages */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct list_head free_clusters; /* free clusters list */ struct list_head full_clusters; /* full clusters list */ @@ -433,7 +433,9 @@ static inline long get_nr_swap_pages(void) } extern void si_swapinfo(struct sysinfo *); -int swap_type_of(dev_t device, sector_t offset); +extern int pin_hibernation_swap_type(dev_t device, sector_t offset); +extern void unpin_hibernation_swap_type(int type); +extern int find_hibernation_swap_type(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); extern sector_t swapdev_block(int, pgoff_t); @@ -571,33 +573,31 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) #endif #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) -int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); -static inline int mem_cgroup_try_charge_swap(struct folio *folio, - swp_entry_t entry) +int __mem_cgroup_try_charge_swap(struct folio *folio); +static inline int mem_cgroup_try_charge_swap(struct folio *folio) { if (mem_cgroup_disabled()) return 0; - return __mem_cgroup_try_charge_swap(folio, entry); + return __mem_cgroup_try_charge_swap(folio); } -extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); -static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) +extern void __mem_cgroup_uncharge_swap(unsigned short id, unsigned int nr_pages); +static inline void mem_cgroup_uncharge_swap(unsigned short id, unsigned int nr_pages) { if (mem_cgroup_disabled()) return; - __mem_cgroup_uncharge_swap(entry, nr_pages); + __mem_cgroup_uncharge_swap(id, nr_pages); } extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct folio *folio); #else -static inline int mem_cgroup_try_charge_swap(struct folio *folio, - swp_entry_t entry) +static inline int mem_cgroup_try_charge_swap(struct folio *folio) { return 0; } -static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, +static inline void mem_cgroup_uncharge_swap(unsigned short id, unsigned int nr_pages) { } diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h deleted file mode 100644 index 91cdf12190a0..000000000000 --- a/include/linux/swap_cgroup.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_SWAP_CGROUP_H -#define __LINUX_SWAP_CGROUP_H - -#include <linux/swap.h> - -#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) - -extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent); -extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents); -extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); -extern int swap_cgroup_swapon(int type, unsigned long max_pages); -extern void swap_cgroup_swapoff(int type); - -#else - -static inline -void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent) -{ -} - -static inline -unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents) -{ - return 0; -} - -static inline -unsigned short lookup_swap_cgroup_id(swp_entry_t ent) -{ - return 0; -} - -static inline int -swap_cgroup_swapon(int type, unsigned long max_pages) -{ - return 0; -} - -static inline void swap_cgroup_swapoff(int type) -{ - return; -} - -#endif - -#endif /* __LINUX_SWAP_CGROUP_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 051e42902690..307b8390fc67 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -92,7 +92,7 @@ static inline long set_restart_fn(struct restart_block *restart, #define THREAD_ALIGN THREAD_SIZE #endif -#define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) +#define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_SKIP_KASAN) /* * flag set/clear/test wrappers diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index d2920f98ab86..68edac4dcd78 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -23,8 +23,8 @@ /* The set of all possible UFFD-related VM flags. */ #define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) -#define __VMA_UFFD_FLAGS mk_vma_flags(VMA_UFFD_MISSING_BIT, VMA_UFFD_WP_BIT, \ - VMA_UFFD_MINOR_BIT) +#define __VMA_UFFD_FLAGS mk_vma_flags_from_masks(VMA_UFFD_MISSING, VMA_UFFD_WP, \ + VMA_UFFD_MINOR) /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining @@ -147,26 +147,12 @@ static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_at /* Flags controlling behavior. These behavior changes are mode-independent. */ #define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0) -extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, - unsigned long src_start, unsigned long len, - uffd_flags_t flags); -extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, - unsigned long dst_start, - unsigned long len); -extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, - unsigned long len, uffd_flags_t flags); -extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, - unsigned long len, uffd_flags_t flags); -extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, - unsigned long len, bool enable_wp); extern long uffd_wp_range(struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp); /* move_pages */ void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); -ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, - unsigned long src_start, unsigned long len, __u64 flags); int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, @@ -239,9 +225,6 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) return vma->vm_flags & __VM_UFFD_FLAGS; } -bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags, - bool wp_async); - static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) { struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; @@ -271,25 +254,6 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm, extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); extern bool userfaultfd_wp_async(struct vm_area_struct *vma); -void userfaultfd_reset_ctx(struct vm_area_struct *vma); - -struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end); - -int userfaultfd_register_range(struct userfaultfd_ctx *ctx, - struct vm_area_struct *vma, - vm_flags_t vm_flags, - unsigned long start, unsigned long end, - bool wp_async); - -void userfaultfd_release_new(struct userfaultfd_ctx *ctx); - -void userfaultfd_release_all(struct mm_struct *mm, - struct userfaultfd_ctx *ctx); - static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) { /* Only wr-protect mode uses pte markers */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3b02c0c6b371..d87dc7f77f4e 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -265,7 +265,9 @@ static inline bool is_vm_area_hugepages(const void *addr) * allocated in the vmalloc layer. */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC - return find_vm_area(addr)->page_order > 0; + struct vm_struct *area = find_vm_area(addr); + + return area && area->page_order > 0; #else return false; #endif diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 6a2f51ebbfd3..faecd5522401 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -30,8 +30,8 @@ struct vmpressure { struct mem_cgroup; #ifdef CONFIG_MEMCG -extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, - unsigned long scanned, unsigned long reclaimed); +void vmpressure(gfp_t gfp, int order, struct mem_cgroup *memcg, bool tree, + unsigned long scanned, unsigned long reclaimed); extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); extern void vmpressure_init(struct vmpressure *vmpr); @@ -44,8 +44,9 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg, extern void vmpressure_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd); #else -static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, - unsigned long scanned, unsigned long reclaimed) {} +static inline void vmpressure(gfp_t gfp, int order, struct mem_cgroup *memcg, + bool tree, unsigned long scanned, + unsigned long reclaimed) {} static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) {} #endif /* CONFIG_MEMCG */ |
