diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-01-23 11:21:37 +0100 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-01-23 11:21:37 +0100 |
| commit | 3282f1c427e18feb61774c37a034fbb3622177b4 (patch) | |
| tree | 109d5873614af7741ae06b968bd5f253ef39c613 /mm/page_alloc.c | |
| parent | 0d579622a3b0bb76742ffa8becc103fe35934dca (diff) | |
| parent | 5dfbc5357c34bdf81c84aa78bc8e3d6d9ba10aad (diff) | |
Merge v6.18.7linux-rolling-stable
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 74 |
1 files changed, 58 insertions, 16 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e644f2744c2..623f6e5b583a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -166,6 +166,33 @@ static DEFINE_MUTEX(pcp_batch_high_lock); #define pcp_spin_unlock(ptr) \ pcpu_spin_unlock(lock, ptr) +/* + * With the UP spinlock implementation, when we spin_lock(&pcp->lock) (for i.e. + * a potentially remote cpu drain) and get interrupted by an operation that + * attempts pcp_spin_trylock(), we can't rely on the trylock failure due to UP + * spinlock assumptions making the trylock a no-op. So we have to turn that + * spin_lock() to a spin_lock_irqsave(). This works because on UP there are no + * remote cpu's so we can only be locking the only existing local one. + */ +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +static inline void __flags_noop(unsigned long *flags) { } +#define pcp_spin_lock_maybe_irqsave(ptr, flags) \ +({ \ + __flags_noop(&(flags)); \ + spin_lock(&(ptr)->lock); \ +}) +#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ +({ \ + spin_unlock(&(ptr)->lock); \ + __flags_noop(&(flags)); \ +}) +#else +#define pcp_spin_lock_maybe_irqsave(ptr, flags) \ + spin_lock_irqsave(&(ptr)->lock, flags) +#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ + spin_unlock_irqrestore(&(ptr)->lock, flags) +#endif + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -2552,10 +2579,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * Called from the vmstat counter updater to decay the PCP high. * Return whether there are addition works to do. */ -int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) +bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) { - int high_min, to_drain, batch; - int todo = 0; + int high_min, to_drain, to_drain_batched, batch; + unsigned long UP_flags; + bool todo = false; high_min = READ_ONCE(pcp->high_min); batch = READ_ONCE(pcp->batch); @@ -2568,15 +2596,18 @@ int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX), pcp->high - (pcp->high >> 3), high_min); if (pcp->high > high_min) - todo++; + todo = true; } to_drain = pcp->count - pcp->high; - if (to_drain > 0) { - spin_lock(&pcp->lock); - free_pcppages_bulk(zone, to_drain, pcp, 0); - spin_unlock(&pcp->lock); - todo++; + while (to_drain > 0) { + to_drain_batched = min(to_drain, batch); + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + free_pcppages_bulk(zone, to_drain_batched, pcp, 0); + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + todo = true; + + to_drain -= to_drain_batched; } return todo; @@ -2590,14 +2621,15 @@ int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) */ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { + unsigned long UP_flags; int to_drain, batch; batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) { - spin_lock(&pcp->lock); + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); free_pcppages_bulk(zone, to_drain, pcp, 0); - spin_unlock(&pcp->lock); + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); } } #endif @@ -2608,10 +2640,11 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) static void drain_pages_zone(unsigned int cpu, struct zone *zone) { struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + unsigned long UP_flags; int count; do { - spin_lock(&pcp->lock); + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); count = pcp->count; if (count) { int to_drain = min(count, @@ -2620,7 +2653,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) free_pcppages_bulk(zone, to_drain, pcp, 0); count -= to_drain; } - spin_unlock(&pcp->lock); + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); } while (count); } @@ -6078,6 +6111,7 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu) { struct per_cpu_pages *pcp; struct cpu_cacheinfo *cci; + unsigned long UP_flags; pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); cci = get_cpu_cacheinfo(cpu); @@ -6088,12 +6122,12 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu) * This can reduce zone lock contention without hurting * cache-hot pages sharing. */ - spin_lock(&pcp->lock); + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) pcp->flags |= PCPF_FREE_HIGH_BATCH; else pcp->flags &= ~PCPF_FREE_HIGH_BATCH; - spin_unlock(&pcp->lock); + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); } void setup_pcp_cacheinfo(unsigned int cpu) @@ -6611,11 +6645,19 @@ static int percpu_pagelist_high_fraction_sysctl_handler(const struct ctl_table * int old_percpu_pagelist_high_fraction; int ret; + /* + * Avoid using pcp_batch_high_lock for reads as the value is read + * atomically and a race with offlining is harmless. + */ + + if (!write) + return proc_dointvec_minmax(table, write, buffer, length, ppos); + mutex_lock(&pcp_batch_high_lock); old_percpu_pagelist_high_fraction = percpu_pagelist_high_fraction; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); - if (!write || ret < 0) + if (ret < 0) goto out; /* Sanity checking to avoid pcp imbalance */ |
