diff options
| author | Dave Airlie <airlied@redhat.com> | 2026-03-30 06:04:57 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2026-03-30 06:04:59 +1000 |
| commit | a51973c5dff8a0f01cc7d1b2007306ea0004fa16 (patch) | |
| tree | dd854810489a6d1ab7373c965bb97fae840fb26b | |
| parent | 0d270f0df6170fa56eefbb836577c961a791b1fd (diff) | |
| parent | 05c8b1cdc54036465ea457a0501a8c2f9409fce7 (diff) | |
Merge tag 'drm-xe-next-2026-03-26-1' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
Hi Dave and Sima,
Here goes our late, final drm-xe-next PR towards 7.1. We just purgeable
BO uAPI in today, hence the late pull.
In the big things we have:
- Add support for purgeable buffer objects
Thanks,
Matt
UAPI Changes:
- Add support for purgeable buffer objects (Arvind, Himal)
Driver Changes:
- Remove useless comment (Maarten)
- Issue GGTT invalidation under lock in ggtt_node_remove (Brost, Fixes)
- Fix mismatched include guards in header files (Shuicheng)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/acX4fWxPkZrrfwnT@gsse-cloud1.jf.intel.com
| -rw-r--r-- | drivers/gpu/drm/xe/xe_bo.c | 194 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_bo.h | 58 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_bo_types.h | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_device.c | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_dma_buf.c | 24 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_ggtt.c | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_gt_idle_types.h | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_exec_queue_types.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_heci_gsc.h | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_late_bind_fw_types.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pagefault.c | 15 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_platform_types.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pt.c | 40 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_query.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_svm.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_tile_printk.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 112 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm_madvise.c | 303 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm_madvise.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm_types.h | 11 | ||||
| -rw-r--r-- | include/uapi/drm/xe_drm.h | 69 |
22 files changed, 814 insertions, 68 deletions
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 7545d2fa3255..a7c2dc7f224c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -838,6 +838,122 @@ static int xe_bo_move_notify(struct xe_bo *bo, return 0; } +/** + * xe_bo_set_purgeable_shrinker() - Update shrinker accounting for purgeable state + * @bo: Buffer object + * @new_state: New purgeable state being set + * + * Transfers pages between shrinkable and purgeable buckets when the BO + * purgeable state changes. Called automatically from xe_bo_set_purgeable_state(). + */ +static void xe_bo_set_purgeable_shrinker(struct xe_bo *bo, + enum xe_madv_purgeable_state new_state) +{ + struct ttm_buffer_object *ttm_bo = &bo->ttm; + struct ttm_tt *tt = ttm_bo->ttm; + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct xe_ttm_tt *xe_tt; + long tt_pages; + + xe_bo_assert_held(bo); + + if (!tt || !ttm_tt_is_populated(tt)) + return; + + xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + tt_pages = tt->num_pages; + + if (!xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_DONTNEED) { + xe_tt->purgeable = true; + /* Transfer pages from shrinkable to purgeable count */ + xe_shrinker_mod_pages(xe->mem.shrinker, -tt_pages, tt_pages); + } else if (xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_WILLNEED) { + xe_tt->purgeable = false; + /* Transfer pages from purgeable to shrinkable count */ + xe_shrinker_mod_pages(xe->mem.shrinker, tt_pages, -tt_pages); + } +} + +/** + * xe_bo_set_purgeable_state() - Set BO purgeable state with validation + * @bo: Buffer object + * @new_state: New purgeable state + * + * Sets the purgeable state with lockdep assertions and validates state + * transitions. Once a BO is PURGED, it cannot transition to any other state. + * Invalid transitions are caught with xe_assert(). Shrinker page accounting + * is updated automatically. + */ +void xe_bo_set_purgeable_state(struct xe_bo *bo, + enum xe_madv_purgeable_state new_state) +{ + struct xe_device *xe = xe_bo_device(bo); + + xe_bo_assert_held(bo); + + /* Validate state is one of the known values */ + xe_assert(xe, new_state == XE_MADV_PURGEABLE_WILLNEED || + new_state == XE_MADV_PURGEABLE_DONTNEED || + new_state == XE_MADV_PURGEABLE_PURGED); + + /* Once purged, always purged - cannot transition out */ + xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED && + new_state != XE_MADV_PURGEABLE_PURGED)); + + bo->madv_purgeable = new_state; + xe_bo_set_purgeable_shrinker(bo, new_state); +} + +/** + * xe_ttm_bo_purge() - Purge buffer object backing store + * @ttm_bo: The TTM buffer object to purge + * @ctx: TTM operation context + * + * This function purges the backing store of a BO marked as DONTNEED and + * triggers rebind to invalidate stale GPU mappings. For fault-mode VMs, + * this zaps the PTEs. The next GPU access will trigger a page fault and + * perform NULL rebind (scratch pages or clear PTEs based on VM config). + * + * Return: 0 on success, negative error code on failure + */ +static int xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct ttm_placement place = {}; + int ret; + + xe_bo_assert_held(bo); + + if (!ttm_bo->ttm) + return 0; + + if (!xe_bo_madv_is_dontneed(bo)) + return 0; + + /* + * Use the standard pre-move hook so we share the same cleanup/invalidate + * path as migrations: drop any CPU vmap and schedule the necessary GPU + * unbind/rebind work. + * + * This must be called before ttm_bo_validate() frees the pages. + * May fail in no-wait contexts (fault/shrinker) or if the BO is + * pinned. Keep state unchanged on failure so we don't end up "PURGED" + * with stale mappings. + */ + ret = xe_bo_move_notify(bo, ctx); + if (ret) + return ret; + + ret = ttm_bo_validate(ttm_bo, &place, ctx); + if (ret) + return ret; + + /* Commit the state transition only once invalidation was queued */ + xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_PURGED); + + return 0; +} + static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_resource *new_mem, @@ -857,6 +973,20 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, ttm && ttm_tt_is_populated(ttm)) ? true : false; int ret = 0; + /* + * Purge only non-shared BOs explicitly marked DONTNEED by userspace. + * The move_notify callback will handle invalidation asynchronously. + */ + if (evict && xe_bo_madv_is_dontneed(bo)) { + ret = xe_ttm_bo_purge(ttm_bo, ctx); + if (ret) + return ret; + + /* Free the unused eviction destination resource */ + ttm_resource_free(ttm_bo, &new_mem); + return 0; + } + /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { if (new_mem->mem_type == XE_PL_TT) @@ -1154,6 +1284,9 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, lret = xe_bo_move_notify(xe_bo, ctx); if (!lret) lret = xe_bo_shrink_purge(ctx, bo, scanned); + if (lret > 0 && xe_bo_madv_is_dontneed(xe_bo)) + xe_bo_set_purgeable_state(xe_bo, + XE_MADV_PURGEABLE_PURGED); goto out_unref; } @@ -1606,18 +1739,6 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) } } -static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx) -{ - struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); - - if (ttm_bo->ttm) { - struct ttm_placement place = {}; - int ret = ttm_bo_validate(ttm_bo, &place, ctx); - - drm_WARN_ON(&xe->drm, ret); - } -} - static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) { struct ttm_operation_ctx ctx = { @@ -1902,6 +2023,16 @@ static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_devic if (!dma_resv_trylock(tbo->base.resv)) goto out_validation; + /* + * Reject CPU faults to purgeable BOs. DONTNEED BOs can be purged + * at any time, and purged BOs have no backing store. Either case + * is undefined behavior for CPU access. + */ + if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) { + ret = VM_FAULT_SIGBUS; + goto out_unlock; + } + if (xe_ttm_bo_is_imported(tbo)) { ret = VM_FAULT_SIGBUS; drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); @@ -1992,6 +2123,15 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) if (err) break; + /* + * Reject CPU faults to purgeable BOs. DONTNEED BOs can be + * purged at any time, and purged BOs have no backing store. + */ + if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) { + err = -EFAULT; + break; + } + if (xe_ttm_bo_is_imported(tbo)) { err = -EFAULT; drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); @@ -2069,10 +2209,35 @@ static const struct vm_operations_struct xe_gem_vm_ops = { .access = xe_bo_vm_access, }; +static int xe_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct xe_bo *bo = gem_to_xe_bo(obj); + int err = 0; + + /* + * Reject mmap of purgeable BOs. DONTNEED BOs can be purged + * at any time, making CPU access undefined behavior. Purged BOs have + * no backing store and are permanently invalid. + */ + err = xe_bo_lock(bo, true); + if (err) + return err; + + if (xe_bo_madv_is_dontneed(bo)) + err = -EBUSY; + else if (xe_bo_is_purged(bo)) + err = -EINVAL; + xe_bo_unlock(bo); + if (err) + return err; + + return drm_gem_ttm_mmap(obj, vma); +} + static const struct drm_gem_object_funcs xe_gem_object_funcs = { .free = xe_gem_object_free, .close = xe_gem_object_close, - .mmap = drm_gem_ttm_mmap, + .mmap = xe_gem_object_mmap, .export = xe_gem_prime_export, .vm_ops = &xe_gem_vm_ops, }; @@ -2198,6 +2363,9 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, #endif INIT_LIST_HEAD(&bo->vram_userfault_link); + /* Initialize purge advisory state */ + bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED; + drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); if (resv) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index a0ad846e9450..68dea7d25a6b 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -87,6 +87,28 @@ #define XE_PCI_BARRIER_MMAP_OFFSET (0x50 << XE_PTE_SHIFT) +/** + * enum xe_madv_purgeable_state - Buffer object purgeable state enumeration + * + * This enum defines the possible purgeable states for a buffer object, + * allowing userspace to provide memory usage hints to the kernel for + * better memory management under pressure. + * + * @XE_MADV_PURGEABLE_WILLNEED: The buffer object is needed and should not be purged. + * This is the default state. + * @XE_MADV_PURGEABLE_DONTNEED: The buffer object is not currently needed and can be + * purged by the kernel under memory pressure. + * @XE_MADV_PURGEABLE_PURGED: The buffer object has been purged by the kernel. + * + * Accessing a purged buffer will result in an error. Per i915 semantics, + * once purged, a BO remains permanently invalid and must be destroyed and recreated. + */ +enum xe_madv_purgeable_state { + XE_MADV_PURGEABLE_WILLNEED, + XE_MADV_PURGEABLE_DONTNEED, + XE_MADV_PURGEABLE_PURGED, +}; + struct sg_table; struct xe_bo *xe_bo_alloc(void); @@ -215,6 +237,42 @@ static inline bool xe_bo_is_protected(const struct xe_bo *bo) return bo->pxp_key_instance; } +/** + * xe_bo_is_purged() - Check if buffer object has been purged + * @bo: The buffer object to check + * + * Checks if the buffer object's backing store has been discarded by the + * kernel due to memory pressure after being marked as purgeable (DONTNEED). + * Once purged, the BO cannot be restored and any attempt to use it will fail. + * + * Context: Caller must hold the BO's dma-resv lock + * Return: true if the BO has been purged, false otherwise + */ +static inline bool xe_bo_is_purged(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + return bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED; +} + +/** + * xe_bo_madv_is_dontneed() - Check if BO is marked as DONTNEED + * @bo: The buffer object to check + * + * Checks if userspace has marked this BO as DONTNEED (i.e., its contents + * are not currently needed and can be discarded under memory pressure). + * This is used internally to decide whether a BO is eligible for purging. + * + * Context: Caller must hold the BO's dma-resv lock + * Return: true if the BO is marked DONTNEED, false otherwise + */ +static inline bool xe_bo_madv_is_dontneed(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + return bo->madv_purgeable == XE_MADV_PURGEABLE_DONTNEED; +} + +void xe_bo_set_purgeable_state(struct xe_bo *bo, enum xe_madv_purgeable_state new_state); + static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) { if (likely(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index d4fe3c8dca5b..ff8317bfc1ae 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -108,6 +108,12 @@ struct xe_bo { * from default */ u64 min_align; + + /** + * @madv_purgeable: user space advise on BO purgeability, protected + * by BO's dma-resv lock. + */ + u32 madv_purgeable; }; #endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b17d4a878686..9b0540e3e289 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -390,9 +390,6 @@ bool xe_is_xe_file(const struct file *file) } static struct drm_driver driver = { - /* Don't use MTRRs here; the Xserver or userspace app should - * deal with them for Intel hardware. - */ .driver_features = DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ | diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index ea370cd373e9..7f9602b3363d 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -223,6 +223,26 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) if (bo->vm) return ERR_PTR(-EPERM); + /* + * Reject exporting purgeable BOs. DONTNEED BOs can be purged + * at any time, making the exported dma-buf unusable. Purged BOs + * have no backing store and are permanently invalid. + */ + ret = xe_bo_lock(bo, true); + if (ret) + return ERR_PTR(ret); + + if (xe_bo_madv_is_dontneed(bo)) { + ret = -EBUSY; + goto out_unlock; + } + + if (xe_bo_is_purged(bo)) { + ret = -EINVAL; + goto out_unlock; + } + xe_bo_unlock(bo); + ret = ttm_bo_setup_export(&bo->ttm, &ctx); if (ret) return ERR_PTR(ret); @@ -232,6 +252,10 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) buf->ops = &xe_dmabuf_ops; return buf; + +out_unlock: + xe_bo_unlock(bo); + return ERR_PTR(ret); } static struct drm_gem_object * diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 21071b64b09d..a848d1a41b9b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -481,15 +481,10 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node)); drm_mm_remove_node(&node->base); node->base.size = 0; - mutex_unlock(&ggtt->lock); - - if (!bound) - goto free_node; - - if (node->invalidate_on_remove) + if (bound && node->invalidate_on_remove) xe_ggtt_invalidate(ggtt); + mutex_unlock(&ggtt->lock); -free_node: ggtt_node_fini(node); } diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h index a3667c567f8a..5b5d2e835102 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle_types.h +++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h @@ -3,8 +3,8 @@ * Copyright © 2023 Intel Corporation */ -#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ -#define _XE_GT_IDLE_SYSFS_TYPES_H_ +#ifndef _XE_GT_IDLE_TYPES_H_ +#define _XE_GT_IDLE_TYPES_H_ #include <linux/spinlock.h> #include <linux/types.h> @@ -40,4 +40,4 @@ struct xe_gt_idle { u64 (*idle_residency)(struct xe_guc_pc *pc); }; -#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */ +#endif /* _XE_GT_IDLE_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index fd0915ed8eb1..e5e53b421f29 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -3,8 +3,8 @@ * Copyright © 2022 Intel Corporation */ -#ifndef _XE_GUC_ENGINE_TYPES_H_ -#define _XE_GUC_ENGINE_TYPES_H_ +#ifndef _XE_GUC_EXEC_QUEUE_TYPES_H_ +#define _XE_GUC_EXEC_QUEUE_TYPES_H_ #include <linux/spinlock.h> #include <linux/workqueue.h> diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h index 745eb6783942..a76f4122b778 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.h +++ b/drivers/gpu/drm/xe/xe_heci_gsc.h @@ -2,8 +2,8 @@ /* * Copyright(c) 2023, Intel Corporation. All rights reserved. */ -#ifndef __XE_HECI_GSC_DEV_H__ -#define __XE_HECI_GSC_DEV_H__ +#ifndef _XE_HECI_GSC_H_ +#define _XE_HECI_GSC_H_ #include <linux/types.h> @@ -37,4 +37,4 @@ int xe_heci_gsc_init(struct xe_device *xe); void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir); void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir); -#endif /* __XE_HECI_GSC_DEV_H__ */ +#endif /* _XE_HECI_GSC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h index 28a0d7c909c0..37225c1ae528 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -3,8 +3,8 @@ * Copyright © 2023 Intel Corporation */ -#ifndef _XE_ENGINE_CLASS_SYSFS_H_ -#define _XE_ENGINE_CLASS_SYSFS_H_ +#ifndef _XE_HW_ENGINE_CLASS_SYSFS_H_ +#define _XE_HW_ENGINE_CLASS_SYSFS_H_ #include <linux/kobject.h> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h index 2a8a985c37e7..7fdb24e810b3 100644 --- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h @@ -3,8 +3,8 @@ * Copyright © 2025 Intel Corporation */ -#ifndef _XE_LATE_BIND_TYPES_H_ -#define _XE_LATE_BIND_TYPES_H_ +#ifndef _XE_LATE_BIND_FW_TYPES_H_ +#define _XE_LATE_BIND_FW_TYPES_H_ #include <linux/iosys-map.h> #include <linux/mutex.h> diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index 2fd55d7c98f9..c2d0a6c34917 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -59,6 +59,19 @@ static int xe_pagefault_begin(struct drm_exec *exec, struct xe_vma *vma, if (!bo) return 0; + /* + * Skip validate/migrate for DONTNEED/purged BOs - repopulating + * their pages would prevent the shrinker from reclaiming them. + * For non-scratch VMs there is no safe fallback so fail the fault. + * For scratch VMs let xe_vma_rebind() run normally; it will install + * scratch PTEs so the GPU gets safe zero reads instead of faulting. + */ + if (unlikely(xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo))) { + if (!xe_vm_has_scratch(vm)) + return -EACCES; + return 0; + } + return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : xe_bo_validate(bo, vm, true, exec); } @@ -145,7 +158,7 @@ static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid) down_read(&xe->usm.lock); vm = xa_load(&xe->usm.asid_to_vm, asid); - if (vm && xe_vm_in_fault_mode(vm)) + if (vm && (xe_vm_in_fault_mode(vm) || xe_vm_has_scratch(vm))) xe_vm_get(vm); else vm = ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index 6cff385227ea..3447848b74e3 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -3,8 +3,8 @@ * Copyright © 2022 Intel Corporation */ -#ifndef _XE_PLATFORM_INFO_TYPES_H_ -#define _XE_PLATFORM_INFO_TYPES_H_ +#ifndef _XE_PLATFORM_TYPES_H_ +#define _XE_PLATFORM_TYPES_H_ /* * Keep this in graphics version based order and chronological order within a diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 713a303c9053..8e5f4f0dea3f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -531,20 +531,26 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, /* Is this a leaf entry ?*/ if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - bool is_vram = is_null ? false : xe_res_is_vram(curs); + struct xe_bo *bo = xe_vma_bo(xe_walk->vma); + bool is_null_or_purged = xe_vma_is_null(xe_walk->vma) || + (bo && xe_bo_is_purged(bo)); + bool is_vram = is_null_or_purged ? false : xe_res_is_vram(curs); XE_WARN_ON(xe_walk->va_curs_start != addr); if (xe_walk->clear_pt) { pte = 0; } else { - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : + /* + * For purged BOs, treat like null VMAs - pass address 0. + * The pte_encode_vma will set XE_PTE_NULL flag for scratch mapping. + */ + pte = vm->pt_ops->pte_encode_vma(is_null_or_purged ? 0 : xe_res_dma(curs) + xe_walk->dma_offset, xe_walk->vma, pat_index, level); - if (!is_null) + if (!is_null_or_purged) pte |= is_vram ? xe_walk->default_vram_pte : xe_walk->default_system_pte; @@ -568,7 +574,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (unlikely(ret)) return ret; - if (!is_null && !xe_walk->clear_pt) + if (!is_null_or_purged && !xe_walk->clear_pt) xe_res_next(curs, next - addr); xe_walk->va_curs_start = next; xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); @@ -721,6 +727,26 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, }; struct xe_pt *pt = vm->pt_root[tile->id]; int ret; + bool is_purged = false; + + /* + * Check if BO is purged: + * - Scratch VMs: Use scratch PTEs (XE_PTE_NULL) for safe zero reads + * - Non-scratch VMs: Clear PTEs to zero (non-present) to avoid mapping to phys addr 0 + * + * For non-scratch VMs, we force clear_pt=true so leaf PTEs become completely + * zero instead of creating a PRESENT mapping to physical address 0. + */ + if (bo && xe_bo_is_purged(bo)) { + is_purged = true; + + /* + * For non-scratch VMs, a NULL rebind should use zero PTEs + * (non-present), not a present PTE to phys 0. + */ + if (!xe_vm_has_scratch(vm)) + xe_walk.clear_pt = true; + } if (range) { /* Move this entire thing to xe_svm.c? */ @@ -756,11 +782,11 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, } xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0; + xe_walk.dma_offset = (bo && !is_purged) ? vram_region_gpu_offset(bo->ttm.resource) : 0; if (!range) xe_bo_assert_held(bo); - if (!xe_vma_is_null(vma) && !range) { + if (!xe_vma_is_null(vma) && !range && !is_purged) { if (xe_vma_is_userptr(vma)) xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0, xe_vma_size(vma), &curs); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 4852fdcb4b95..d84d6a422c45 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -342,6 +342,8 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY; config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX; + config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= + DRM_XE_QUERY_CONFIG_FLAG_HAS_PURGING_SUPPORT; config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 0251098650af..5b627eed1eab 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -322,6 +322,7 @@ static void xe_vma_set_default_attributes(struct xe_vma *vma) .preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES, .pat_index = vma->attr.default_pat_index, .atomic_access = DRM_XE_ATOMIC_UNDEFINED, + .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, }; xe_vma_mem_attr_copy(&vma->attr, &default_attr); diff --git a/drivers/gpu/drm/xe/xe_tile_printk.h b/drivers/gpu/drm/xe/xe_tile_printk.h index 63640a42685d..738433a764bd 100644 --- a/drivers/gpu/drm/xe/xe_tile_printk.h +++ b/drivers/gpu/drm/xe/xe_tile_printk.h @@ -3,8 +3,8 @@ * Copyright © 2025 Intel Corporation */ -#ifndef _xe_tile_printk_H_ -#define _xe_tile_printk_H_ +#ifndef _XE_TILE_PRINTK_H_ +#define _XE_TILE_PRINTK_H_ #include "xe_printk.h" diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d96e0a0c5605..56e2db50bb36 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -40,6 +40,7 @@ #include "xe_tile.h" #include "xe_tlb_inval.h" #include "xe_trace_bo.h" +#include "xe_vm_madvise.h" #include "xe_wa.h" static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) @@ -327,6 +328,7 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked) static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) { struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj); struct drm_gpuva *gpuva; int ret; @@ -335,10 +337,16 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, &vm->rebind_list); + /* Skip re-populating purged BOs, rebind maps scratch pages. */ + if (xe_bo_is_purged(bo)) { + vm_bo->evicted = false; + return 0; + } + if (!try_wait_for_completion(&vm->xe->pm_block)) return -EAGAIN; - ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); + ret = xe_bo_validate(bo, vm, false, exec); if (ret) return ret; @@ -1147,6 +1155,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, static void xe_vma_destroy_late(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); + struct xe_bo *bo = xe_vma_bo(vma); if (vma->ufence) { xe_sync_ufence_put(vma->ufence); @@ -1161,7 +1170,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { xe_vm_put(vm); } else { - xe_bo_put(xe_vma_bo(vma)); + xe_bo_put(bo); } xe_vma_free(vma); @@ -1187,6 +1196,7 @@ static void vma_destroy_cb(struct dma_fence *fence, static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) { struct xe_vm *vm = xe_vma_vm(vma); + struct xe_bo *bo = xe_vma_bo(vma); lockdep_assert_held_write(&vm->lock); xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); @@ -1195,9 +1205,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); xe_userptr_destroy(to_userptr_vma(vma)); } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { - xe_bo_assert_held(xe_vma_bo(vma)); + xe_bo_assert_held(bo); drm_gpuva_unlink(&vma->gpuva); + xe_bo_recompute_purgeable_state(bo); } xe_vm_assert_held(vm); @@ -1427,6 +1438,9 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, u16 pat_index, u32 pt_level) { + struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); + pte |= XE_PAGE_PRESENT; if (likely(!xe_vma_read_only(vma))) @@ -1435,7 +1449,13 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); - if (unlikely(xe_vma_is_null(vma))) + /* + * NULL PTEs redirect to scratch page (return zeros on read). + * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs. + * Never set NULL flag without scratch page - causes undefined behavior. + */ + if (unlikely(xe_vma_is_null(vma) || + (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm)))) pte |= XE_PTE_NULL; return pte; @@ -2751,6 +2771,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, .atomic_access = DRM_XE_ATOMIC_UNDEFINED, .default_pat_index = op->map.pat_index, .pat_index = op->map.pat_index, + .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, }; flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; @@ -2990,8 +3011,22 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } } +/** + * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate() + * @res_evict: Allow evicting resources during validation + * @validate: Perform BO validation + * @request_decompress: Request BO decompression + * @check_purged: Reject operation if BO is purged + */ +struct xe_vma_lock_and_validate_flags { + u32 res_evict : 1; + u32 validate : 1; + u32 request_decompress : 1; + u32 check_purged : 1; +}; + static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool res_evict, bool validate, bool request_decompress) + struct xe_vma_lock_and_validate_flags flags) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -3000,15 +3035,24 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, if (bo) { if (!bo->vm) err = drm_exec_lock_obj(exec, &bo->ttm.base); - if (!err && validate) + + /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */ + if (!err && flags.check_purged) { + if (xe_bo_madv_is_dontneed(bo)) + err = -EBUSY; /* BO marked purgeable */ + else if (xe_bo_is_purged(bo)) + err = -EINVAL; /* BO already purged */ + } + + if (!err && flags.validate) err = xe_bo_validate(bo, vm, xe_vm_allow_vm_eviction(vm) && - res_evict, exec); + flags.res_evict, exec); if (err) return err; - if (request_decompress) + if (flags.request_decompress) err = xe_bo_decompress(bo); } @@ -3102,10 +3146,14 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: if (!op->map.invalidate_on_bind) err = vma_lock_and_validate(exec, op->map.vma, - res_evict, - !xe_vm_in_fault_mode(vm) || - op->map.immediate, - op->map.request_decompress); + (struct xe_vma_lock_and_validate_flags) { + .res_evict = res_evict, + .validate = !xe_vm_in_fault_mode(vm) || + op->map.immediate, + .request_decompress = + op->map.request_decompress, + .check_purged = true, + }); break; case DRM_GPUVA_OP_REMAP: err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); @@ -3114,13 +3162,28 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), - res_evict, false, false); + (struct xe_vma_lock_and_validate_flags) { + .res_evict = res_evict, + .validate = false, + .request_decompress = false, + .check_purged = false, + }); if (!err && op->remap.prev) err = vma_lock_and_validate(exec, op->remap.prev, - res_evict, true, false); + (struct xe_vma_lock_and_validate_flags) { + .res_evict = res_evict, + .validate = true, + .request_decompress = false, + .check_purged = true, + }); if (!err && op->remap.next) err = vma_lock_and_validate(exec, op->remap.next, - res_evict, true, false); + (struct xe_vma_lock_and_validate_flags) { + .res_evict = res_evict, + .validate = true, + .request_decompress = false, + .check_purged = true, + }); break; case DRM_GPUVA_OP_UNMAP: err = check_ufence(gpuva_to_vma(op->base.unmap.va)); @@ -3129,7 +3192,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), - res_evict, false, false); + (struct xe_vma_lock_and_validate_flags) { + .res_evict = res_evict, + .validate = false, + .request_decompress = false, + .check_purged = false, + }); break; case DRM_GPUVA_OP_PREFETCH: { @@ -3142,9 +3210,19 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, region <= ARRAY_SIZE(region_to_mem_type)); } + /* + * Prefetch attempts to migrate BO's backing store without + * repopulating it first. Purged BOs have no backing store + * to migrate, so reject the operation. + */ err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), - res_evict, false, false); + (struct xe_vma_lock_and_validate_flags) { + .res_evict = res_evict, + .validate = false, + .request_decompress = false, + .check_purged = true, + }); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region], diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index e564b12c02d9..66f00d3f5c07 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -13,6 +13,7 @@ #include "xe_pt.h" #include "xe_svm.h" #include "xe_tlb_inval.h" +#include "xe_vm.h" struct xe_vmas_in_madvise_range { u64 addr; @@ -26,6 +27,8 @@ struct xe_vmas_in_madvise_range { /** * struct xe_madvise_details - Argument to madvise_funcs * @dpagemap: Reference-counted pointer to a struct drm_pagemap. + * @has_purged_bo: Track if any BO was purged (for purgeable state) + * @retained_ptr: User pointer for retained value (for purgeable state) * * The madvise IOCTL handler may, in addition to the user-space * args, have additional info to pass into the madvise_func that @@ -34,6 +37,8 @@ struct xe_vmas_in_madvise_range { */ struct xe_madvise_details { struct drm_pagemap *dpagemap; + bool has_purged_bo; + u64 retained_ptr; }; static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) @@ -180,6 +185,222 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, } } +/** + * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf + * @bo: Buffer object + * + * Prevent marking imported or exported dma-bufs as purgeable. + * For imported BOs, Xe doesn't own the backing store and cannot + * safely reclaim pages (exporter or other devices may still be + * using them). For exported BOs, external devices may have active + * mappings we cannot track. + * + * Return: true if BO is imported or exported, false otherwise + */ +static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo) +{ + struct drm_gem_object *obj = &bo->ttm.base; + + /* Imported: exporter owns backing store */ + if (drm_gem_is_imported(obj)) + return true; + + /* Exported: external devices may be accessing */ + if (obj->dma_buf) + return true; + + return false; +} + +/** + * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation + * + * Distinguishes whether a BO's VMAs are all DONTNEED, have at least + * one WILLNEED, or have no VMAs at all. + * + * Enum values align with XE_MADV_PURGEABLE_* states for consistency. + */ +enum xe_bo_vmas_purge_state { + /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */ + XE_BO_VMAS_STATE_WILLNEED = 0, + /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */ + XE_BO_VMAS_STATE_DONTNEED = 1, + /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */ + XE_BO_VMAS_STATE_NO_VMAS = 2, +}; + +/* + * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and + * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across + * both enums so the single-line cast is always valid. + */ +static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED, + "VMA purge state WILLNEED must equal madv purgeable WILLNEED"); +static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED, + "VMA purge state DONTNEED must equal madv purgeable DONTNEED"); + +/** + * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state + * @bo: Buffer object + * + * Check all VMAs across all VMs to determine aggregate purgeable state. + * Shared BOs require unanimous DONTNEED state from all mappings. + * + * Caller must hold BO dma-resv lock. + * + * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED, + * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED, + * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs + */ +static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo) +{ + struct drm_gpuvm_bo *vm_bo; + struct drm_gpuva *gpuva; + struct drm_gem_object *obj = &bo->ttm.base; + bool has_vmas = false; + + xe_bo_assert_held(bo); + + /* Shared dma-bufs cannot be purgeable */ + if (xe_bo_is_dmabuf_shared(bo)) + return XE_BO_VMAS_STATE_WILLNEED; + + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + has_vmas = true; + + /* Any non-DONTNEED VMA prevents purging */ + if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED) + return XE_BO_VMAS_STATE_WILLNEED; + } + } + + /* + * No VMAs => preserve existing BO purgeable state. + * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped. + */ + if (!has_vmas) + return XE_BO_VMAS_STATE_NO_VMAS; + + return XE_BO_VMAS_STATE_DONTNEED; +} + +/** + * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs + * @bo: Buffer object + * + * Walk all VMAs to determine if BO should be purgeable or not. + * Shared BOs require unanimous DONTNEED state from all mappings. + * If the BO has no VMAs the existing state is preserved. + * + * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists, + * VM lock must also be held (write) to prevent concurrent VMA modifications. + * This is satisfied at both call sites: + * - xe_vma_destroy(): holds vm->lock write + * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path) + * + * Return: nothing + */ +void xe_bo_recompute_purgeable_state(struct xe_bo *bo) +{ + enum xe_bo_vmas_purge_state vma_state; + + if (!bo) + return; + + xe_bo_assert_held(bo); + + /* + * Once purged, always purged. Cannot transition back to WILLNEED. + * This matches i915 semantics where purged BOs are permanently invalid. + */ + if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED) + return; + + vma_state = xe_bo_all_vmas_dontneed(bo); + + if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable && + vma_state != XE_BO_VMAS_STATE_NO_VMAS) + xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state); +} + +/** + * madvise_purgeable - Handle purgeable buffer object advice + * @xe: XE device + * @vm: VM + * @vmas: Array of VMAs + * @num_vmas: Number of VMAs + * @op: Madvise operation + * @details: Madvise details for return values + * + * Handles DONTNEED/WILLNEED/PURGED states. Tracks if any BO was purged + * in details->has_purged_bo for later copy to userspace. + */ +static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op, + struct xe_madvise_details *details) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE); + + for (i = 0; i < num_vmas; i++) { + struct xe_bo *bo = xe_vma_bo(vmas[i]); + + if (!bo) { + /* Purgeable state applies to BOs only, skip non-BO VMAs */ + vmas[i]->skip_invalidation = true; + continue; + } + + /* BO must be locked before modifying madv state */ + xe_bo_assert_held(bo); + + /* Skip shared dma-bufs - no PTEs to zap */ + if (xe_bo_is_dmabuf_shared(bo)) { + vmas[i]->skip_invalidation = true; + continue; + } + + /* + * Once purged, always purged. Cannot transition back to WILLNEED. + * This matches i915 semantics where purged BOs are permanently invalid. + */ + if (xe_bo_is_purged(bo)) { + details->has_purged_bo = true; + vmas[i]->skip_invalidation = true; + continue; + } + + switch (op->purge_state_val.val) { + case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED: + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; + vmas[i]->skip_invalidation = true; + + xe_bo_recompute_purgeable_state(bo); + break; + case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED: + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; + /* + * Don't zap PTEs at DONTNEED time -- pages are still + * alive. The zap happens in xe_bo_move_notify() right + * before the shrinker frees them. + */ + vmas[i]->skip_invalidation = true; + + xe_bo_recompute_purgeable_state(bo); + break; + default: + /* Should never hit - values validated in madvise_args_are_sane() */ + xe_assert(vm->xe, 0); + return; + } + } +} + typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, struct drm_xe_madvise *op, @@ -189,6 +410,7 @@ static const madvise_func madvise_funcs[] = { [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, + [DRM_XE_VMA_ATTR_PURGEABLE_STATE] = madvise_purgeable, }; static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) @@ -319,6 +541,19 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv return false; break; } + case DRM_XE_VMA_ATTR_PURGEABLE_STATE: + { + u32 val = args->purge_state_val.val; + + if (XE_IOCTL_DBG(xe, !(val == DRM_XE_VMA_PURGEABLE_STATE_WILLNEED || + val == DRM_XE_VMA_PURGEABLE_STATE_DONTNEED))) + return false; + + if (XE_IOCTL_DBG(xe, args->purge_state_val.pad)) + return false; + + break; + } default: if (XE_IOCTL_DBG(xe, 1)) return false; @@ -337,6 +572,12 @@ static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise memset(details, 0, sizeof(*details)); + /* Store retained pointer for purgeable state */ + if (args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE) { + details->retained_ptr = args->purge_state_val.retained_ptr; + return 0; + } + if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) { int fd = args->preferred_mem_loc.devmem_fd; struct drm_pagemap *dpagemap; @@ -365,6 +606,21 @@ static void xe_madvise_details_fini(struct xe_madvise_details *details) drm_pagemap_put(details->dpagemap); } +static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details *details) +{ + u32 retained; + + if (!details->retained_ptr) + return 0; + + retained = !details->has_purged_bo; + + if (put_user(retained, (u32 __user *)u64_to_user_ptr(details->retained_ptr))) + return -EFAULT; + + return 0; +} + static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, u32 atomic_val) { @@ -416,13 +672,21 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_madvise *args = data; - struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, - .range = args->range, }; + struct xe_vmas_in_madvise_range madvise_range = { + /* + * Userspace may pass canonical (sign-extended) addresses. + * Strip the sign extension to get the internal non-canonical + * form used by the GPUVM, matching xe_vm_bind_ioctl() behavior. + */ + .addr = xe_device_uncanonicalize_addr(xe, args->start), + .range = args->range, + }; struct xe_madvise_details details; u16 pat_index, coh_mode; struct xe_vm *vm; struct drm_exec exec; int err, attr_type; + bool do_retained; vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) @@ -433,6 +697,25 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil goto put_vm; } + /* Cache whether we need to write retained, and validate it's initialized to 0 */ + do_retained = args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE && + args->purge_state_val.retained_ptr; + if (do_retained) { + u32 retained; + u32 __user *retained_ptr; + + retained_ptr = u64_to_user_ptr(args->purge_state_val.retained_ptr); + if (get_user(retained, retained_ptr)) { + err = -EFAULT; + goto put_vm; + } + + if (XE_IOCTL_DBG(xe, retained != 0)) { + err = -EINVAL; + goto put_vm; + } + } + xe_svm_flush(vm); err = down_write_killable(&vm->lock); @@ -448,7 +731,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil if (err) goto unlock_vm; - err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range); if (err) goto madv_fini; @@ -510,10 +793,18 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil } attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); + + /* Ensure the madvise function exists for this type */ + if (!madvise_funcs[attr_type]) { + err = -EINVAL; + goto err_fini; + } + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args, &details); - err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); + err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr, + madvise_range.addr + args->range); if (madvise_range.has_svm_userptr_vmas) xe_svm_notifier_unlock(vm); @@ -528,6 +819,10 @@ madv_fini: xe_madvise_details_fini(&details); unlock_vm: up_write(&vm->lock); + + /* Write retained value to user after releasing all locks */ + if (!err && do_retained) + err = xe_madvise_purgeable_retained_to_user(&details); put_vm: xe_vm_put(vm); return err; diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h index b0e1fc445f23..39acd2689ca0 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.h +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -8,8 +8,11 @@ struct drm_device; struct drm_file; +struct xe_bo; int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void xe_bo_recompute_purgeable_state(struct xe_bo *bo); + #endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 3ab2cef25426..a94827d7fbec 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -96,6 +96,17 @@ struct xe_vma_mem_attr { * same as default_pat_index unless overwritten by madvise. */ u16 pat_index; + + /** + * @purgeable_state: Purgeable hint for this VMA mapping + * + * Per-VMA purgeable state from madvise. Valid states are WILLNEED (0) + * or DONTNEED (1). Shared BOs require all VMAs to be DONTNEED before + * the BO can be purged. PURGED state exists only at BO level. + * + * Protected by BO dma-resv lock. Set via DRM_IOCTL_XE_MADVISE. + */ + u32 purgeable_state; }; struct xe_vma { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 6c99514a85e1..ae2fda23ce7c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -432,6 +432,7 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX (1 << 4) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_PURGING_SUPPORT (1 << 5) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -2171,6 +2172,7 @@ struct drm_xe_query_eu_stall { * - DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: Set preferred memory location. * - DRM_XE_MEM_RANGE_ATTR_ATOMIC: Set atomic access policy. * - DRM_XE_MEM_RANGE_ATTR_PAT: Set page attribute table index. + * - DRM_XE_VMA_ATTR_PURGEABLE_STATE: Set purgeable state for BOs. * * Example: * @@ -2203,6 +2205,7 @@ struct drm_xe_madvise { #define DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC 0 #define DRM_XE_MEM_RANGE_ATTR_ATOMIC 1 #define DRM_XE_MEM_RANGE_ATTR_PAT 2 +#define DRM_XE_VMA_ATTR_PURGEABLE_STATE 3 /** @type: type of attribute */ __u32 type; @@ -2293,6 +2296,72 @@ struct drm_xe_madvise { /** @pat_index.reserved: Reserved */ __u64 reserved; } pat_index; + + /** + * @purge_state_val: Purgeable state configuration + * + * Used when @type == DRM_XE_VMA_ATTR_PURGEABLE_STATE. + * + * Configures the purgeable state of buffer objects in the specified + * virtual address range. This allows applications to hint to the kernel + * about bo's usage patterns for better memory management. + * + * By default all VMAs are in WILLNEED state. + * + * Supported values for @purge_state_val.val: + * - DRM_XE_VMA_PURGEABLE_STATE_WILLNEED (0): Marks BO as needed. + * If the BO was previously purged, the kernel sets the __u32 at + * @retained_ptr to 0 (backing store lost) so the application knows + * it must recreate the BO. + * + * - DRM_XE_VMA_PURGEABLE_STATE_DONTNEED (1): Marks BO as not currently + * needed. Kernel may purge it under memory pressure to reclaim memory. + * Only applies to non-shared BOs. The kernel sets the __u32 at + * @retained_ptr to 1 if the backing store still exists (not yet purged), + * or 0 if it was already purged. + * + * Important: Once marked as DONTNEED, touching the BO's memory + * is undefined behavior. It may succeed temporarily (before the + * kernel purges the backing store) but will suddenly fail once + * the BO transitions to PURGED state. + * + * To transition back: use WILLNEED and check @retained_ptr — + * if 0, backing store was lost and the BO must be recreated. + * + * The following operations are blocked in DONTNEED state to + * prevent the BO from being re-mapped after madvise: + * - New mmap() calls: Fail with -EBUSY + * - VM_BIND operations: Fail with -EBUSY + * - New dma-buf exports: Fail with -EBUSY + * - CPU page faults (existing mmap): Fail with SIGBUS + * - GPU page faults (fault-mode VMs): Fail with -EACCES + */ + struct { +#define DRM_XE_VMA_PURGEABLE_STATE_WILLNEED 0 +#define DRM_XE_VMA_PURGEABLE_STATE_DONTNEED 1 + /** @purge_state_val.val: value for DRM_XE_VMA_ATTR_PURGEABLE_STATE */ + __u32 val; + + /** @purge_state_val.pad: MBZ */ + __u32 pad; + /** + * @purge_state_val.retained_ptr: Pointer to a __u32 output + * field for backing store status. + * + * Userspace must initialize the __u32 value at this address + * to 0 before the ioctl. Kernel writes a __u32 after the + * operation: + * - 1 if backing store exists (not purged) + * - 0 if backing store was purged + * + * If userspace fails to initialize to 0, ioctl returns -EINVAL. + * This ensures a safe default (0 = assume purged) if kernel + * cannot write the result. + * + * Similar to i915's drm_i915_gem_madvise.retained field. + */ + __u64 retained_ptr; + } purge_state_val; }; /** @reserved: Reserved */ |
