summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2026-03-30 06:04:57 +1000
committerDave Airlie <airlied@redhat.com>2026-03-30 06:04:59 +1000
commita51973c5dff8a0f01cc7d1b2007306ea0004fa16 (patch)
treedd854810489a6d1ab7373c965bb97fae840fb26b
parent0d270f0df6170fa56eefbb836577c961a791b1fd (diff)
parent05c8b1cdc54036465ea457a0501a8c2f9409fce7 (diff)
Merge tag 'drm-xe-next-2026-03-26-1' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
Hi Dave and Sima, Here goes our late, final drm-xe-next PR towards 7.1. We just purgeable BO uAPI in today, hence the late pull. In the big things we have: - Add support for purgeable buffer objects Thanks, Matt UAPI Changes: - Add support for purgeable buffer objects (Arvind, Himal) Driver Changes: - Remove useless comment (Maarten) - Issue GGTT invalidation under lock in ggtt_node_remove (Brost, Fixes) - Fix mismatched include guards in header files (Shuicheng) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Matthew Brost <matthew.brost@intel.com> Link: https://patch.msgid.link/acX4fWxPkZrrfwnT@gsse-cloud1.jf.intel.com
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c194
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h58
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_device.c3
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c24
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c9
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_exec_queue_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.h6
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h4
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.c15
-rw-r--r--drivers/gpu/drm/xe/xe_platform_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c40
-rw-r--r--drivers/gpu/drm/xe/xe_query.c2
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c1
-rw-r--r--drivers/gpu/drm/xe/xe_tile_printk.h4
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c112
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c303
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.h3
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h11
-rw-r--r--include/uapi/drm/xe_drm.h69
22 files changed, 814 insertions, 68 deletions
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 7545d2fa3255..a7c2dc7f224c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -838,6 +838,122 @@ static int xe_bo_move_notify(struct xe_bo *bo,
return 0;
}
+/**
+ * xe_bo_set_purgeable_shrinker() - Update shrinker accounting for purgeable state
+ * @bo: Buffer object
+ * @new_state: New purgeable state being set
+ *
+ * Transfers pages between shrinkable and purgeable buckets when the BO
+ * purgeable state changes. Called automatically from xe_bo_set_purgeable_state().
+ */
+static void xe_bo_set_purgeable_shrinker(struct xe_bo *bo,
+ enum xe_madv_purgeable_state new_state)
+{
+ struct ttm_buffer_object *ttm_bo = &bo->ttm;
+ struct ttm_tt *tt = ttm_bo->ttm;
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct xe_ttm_tt *xe_tt;
+ long tt_pages;
+
+ xe_bo_assert_held(bo);
+
+ if (!tt || !ttm_tt_is_populated(tt))
+ return;
+
+ xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+ tt_pages = tt->num_pages;
+
+ if (!xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_DONTNEED) {
+ xe_tt->purgeable = true;
+ /* Transfer pages from shrinkable to purgeable count */
+ xe_shrinker_mod_pages(xe->mem.shrinker, -tt_pages, tt_pages);
+ } else if (xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_WILLNEED) {
+ xe_tt->purgeable = false;
+ /* Transfer pages from purgeable to shrinkable count */
+ xe_shrinker_mod_pages(xe->mem.shrinker, tt_pages, -tt_pages);
+ }
+}
+
+/**
+ * xe_bo_set_purgeable_state() - Set BO purgeable state with validation
+ * @bo: Buffer object
+ * @new_state: New purgeable state
+ *
+ * Sets the purgeable state with lockdep assertions and validates state
+ * transitions. Once a BO is PURGED, it cannot transition to any other state.
+ * Invalid transitions are caught with xe_assert(). Shrinker page accounting
+ * is updated automatically.
+ */
+void xe_bo_set_purgeable_state(struct xe_bo *bo,
+ enum xe_madv_purgeable_state new_state)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+
+ xe_bo_assert_held(bo);
+
+ /* Validate state is one of the known values */
+ xe_assert(xe, new_state == XE_MADV_PURGEABLE_WILLNEED ||
+ new_state == XE_MADV_PURGEABLE_DONTNEED ||
+ new_state == XE_MADV_PURGEABLE_PURGED);
+
+ /* Once purged, always purged - cannot transition out */
+ xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED &&
+ new_state != XE_MADV_PURGEABLE_PURGED));
+
+ bo->madv_purgeable = new_state;
+ xe_bo_set_purgeable_shrinker(bo, new_state);
+}
+
+/**
+ * xe_ttm_bo_purge() - Purge buffer object backing store
+ * @ttm_bo: The TTM buffer object to purge
+ * @ctx: TTM operation context
+ *
+ * This function purges the backing store of a BO marked as DONTNEED and
+ * triggers rebind to invalidate stale GPU mappings. For fault-mode VMs,
+ * this zaps the PTEs. The next GPU access will trigger a page fault and
+ * perform NULL rebind (scratch pages or clear PTEs based on VM config).
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static int xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
+{
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct ttm_placement place = {};
+ int ret;
+
+ xe_bo_assert_held(bo);
+
+ if (!ttm_bo->ttm)
+ return 0;
+
+ if (!xe_bo_madv_is_dontneed(bo))
+ return 0;
+
+ /*
+ * Use the standard pre-move hook so we share the same cleanup/invalidate
+ * path as migrations: drop any CPU vmap and schedule the necessary GPU
+ * unbind/rebind work.
+ *
+ * This must be called before ttm_bo_validate() frees the pages.
+ * May fail in no-wait contexts (fault/shrinker) or if the BO is
+ * pinned. Keep state unchanged on failure so we don't end up "PURGED"
+ * with stale mappings.
+ */
+ ret = xe_bo_move_notify(bo, ctx);
+ if (ret)
+ return ret;
+
+ ret = ttm_bo_validate(ttm_bo, &place, ctx);
+ if (ret)
+ return ret;
+
+ /* Commit the state transition only once invalidation was queued */
+ xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_PURGED);
+
+ return 0;
+}
+
static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_resource *new_mem,
@@ -857,6 +973,20 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
ttm && ttm_tt_is_populated(ttm)) ? true : false;
int ret = 0;
+ /*
+ * Purge only non-shared BOs explicitly marked DONTNEED by userspace.
+ * The move_notify callback will handle invalidation asynchronously.
+ */
+ if (evict && xe_bo_madv_is_dontneed(bo)) {
+ ret = xe_ttm_bo_purge(ttm_bo, ctx);
+ if (ret)
+ return ret;
+
+ /* Free the unused eviction destination resource */
+ ttm_resource_free(ttm_bo, &new_mem);
+ return 0;
+ }
+
/* Bo creation path, moving to system or TT. */
if ((!old_mem && ttm) && !handle_system_ccs) {
if (new_mem->mem_type == XE_PL_TT)
@@ -1154,6 +1284,9 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
lret = xe_bo_move_notify(xe_bo, ctx);
if (!lret)
lret = xe_bo_shrink_purge(ctx, bo, scanned);
+ if (lret > 0 && xe_bo_madv_is_dontneed(xe_bo))
+ xe_bo_set_purgeable_state(xe_bo,
+ XE_MADV_PURGEABLE_PURGED);
goto out_unref;
}
@@ -1606,18 +1739,6 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
}
}
-static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
-{
- struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
-
- if (ttm_bo->ttm) {
- struct ttm_placement place = {};
- int ret = ttm_bo_validate(ttm_bo, &place, ctx);
-
- drm_WARN_ON(&xe->drm, ret);
- }
-}
-
static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
{
struct ttm_operation_ctx ctx = {
@@ -1902,6 +2023,16 @@ static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_devic
if (!dma_resv_trylock(tbo->base.resv))
goto out_validation;
+ /*
+ * Reject CPU faults to purgeable BOs. DONTNEED BOs can be purged
+ * at any time, and purged BOs have no backing store. Either case
+ * is undefined behavior for CPU access.
+ */
+ if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
if (xe_ttm_bo_is_imported(tbo)) {
ret = VM_FAULT_SIGBUS;
drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
@@ -1992,6 +2123,15 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
if (err)
break;
+ /*
+ * Reject CPU faults to purgeable BOs. DONTNEED BOs can be
+ * purged at any time, and purged BOs have no backing store.
+ */
+ if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) {
+ err = -EFAULT;
+ break;
+ }
+
if (xe_ttm_bo_is_imported(tbo)) {
err = -EFAULT;
drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
@@ -2069,10 +2209,35 @@ static const struct vm_operations_struct xe_gem_vm_ops = {
.access = xe_bo_vm_access,
};
+static int xe_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+ int err = 0;
+
+ /*
+ * Reject mmap of purgeable BOs. DONTNEED BOs can be purged
+ * at any time, making CPU access undefined behavior. Purged BOs have
+ * no backing store and are permanently invalid.
+ */
+ err = xe_bo_lock(bo, true);
+ if (err)
+ return err;
+
+ if (xe_bo_madv_is_dontneed(bo))
+ err = -EBUSY;
+ else if (xe_bo_is_purged(bo))
+ err = -EINVAL;
+ xe_bo_unlock(bo);
+ if (err)
+ return err;
+
+ return drm_gem_ttm_mmap(obj, vma);
+}
+
static const struct drm_gem_object_funcs xe_gem_object_funcs = {
.free = xe_gem_object_free,
.close = xe_gem_object_close,
- .mmap = drm_gem_ttm_mmap,
+ .mmap = xe_gem_object_mmap,
.export = xe_gem_prime_export,
.vm_ops = &xe_gem_vm_ops,
};
@@ -2198,6 +2363,9 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
#endif
INIT_LIST_HEAD(&bo->vram_userfault_link);
+ /* Initialize purge advisory state */
+ bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED;
+
drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
if (resv) {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index a0ad846e9450..68dea7d25a6b 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -87,6 +87,28 @@
#define XE_PCI_BARRIER_MMAP_OFFSET (0x50 << XE_PTE_SHIFT)
+/**
+ * enum xe_madv_purgeable_state - Buffer object purgeable state enumeration
+ *
+ * This enum defines the possible purgeable states for a buffer object,
+ * allowing userspace to provide memory usage hints to the kernel for
+ * better memory management under pressure.
+ *
+ * @XE_MADV_PURGEABLE_WILLNEED: The buffer object is needed and should not be purged.
+ * This is the default state.
+ * @XE_MADV_PURGEABLE_DONTNEED: The buffer object is not currently needed and can be
+ * purged by the kernel under memory pressure.
+ * @XE_MADV_PURGEABLE_PURGED: The buffer object has been purged by the kernel.
+ *
+ * Accessing a purged buffer will result in an error. Per i915 semantics,
+ * once purged, a BO remains permanently invalid and must be destroyed and recreated.
+ */
+enum xe_madv_purgeable_state {
+ XE_MADV_PURGEABLE_WILLNEED,
+ XE_MADV_PURGEABLE_DONTNEED,
+ XE_MADV_PURGEABLE_PURGED,
+};
+
struct sg_table;
struct xe_bo *xe_bo_alloc(void);
@@ -215,6 +237,42 @@ static inline bool xe_bo_is_protected(const struct xe_bo *bo)
return bo->pxp_key_instance;
}
+/**
+ * xe_bo_is_purged() - Check if buffer object has been purged
+ * @bo: The buffer object to check
+ *
+ * Checks if the buffer object's backing store has been discarded by the
+ * kernel due to memory pressure after being marked as purgeable (DONTNEED).
+ * Once purged, the BO cannot be restored and any attempt to use it will fail.
+ *
+ * Context: Caller must hold the BO's dma-resv lock
+ * Return: true if the BO has been purged, false otherwise
+ */
+static inline bool xe_bo_is_purged(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+ return bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED;
+}
+
+/**
+ * xe_bo_madv_is_dontneed() - Check if BO is marked as DONTNEED
+ * @bo: The buffer object to check
+ *
+ * Checks if userspace has marked this BO as DONTNEED (i.e., its contents
+ * are not currently needed and can be discarded under memory pressure).
+ * This is used internally to decide whether a BO is eligible for purging.
+ *
+ * Context: Caller must hold the BO's dma-resv lock
+ * Return: true if the BO is marked DONTNEED, false otherwise
+ */
+static inline bool xe_bo_madv_is_dontneed(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+ return bo->madv_purgeable == XE_MADV_PURGEABLE_DONTNEED;
+}
+
+void xe_bo_set_purgeable_state(struct xe_bo *bo, enum xe_madv_purgeable_state new_state);
+
static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
{
if (likely(bo)) {
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index d4fe3c8dca5b..ff8317bfc1ae 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -108,6 +108,12 @@ struct xe_bo {
* from default
*/
u64 min_align;
+
+ /**
+ * @madv_purgeable: user space advise on BO purgeability, protected
+ * by BO's dma-resv lock.
+ */
+ u32 madv_purgeable;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b17d4a878686..9b0540e3e289 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -390,9 +390,6 @@ bool xe_is_xe_file(const struct file *file)
}
static struct drm_driver driver = {
- /* Don't use MTRRs here; the Xserver or userspace app should
- * deal with them for Intel hardware.
- */
.driver_features =
DRIVER_GEM |
DRIVER_RENDER | DRIVER_SYNCOBJ |
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index ea370cd373e9..7f9602b3363d 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -223,6 +223,26 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
if (bo->vm)
return ERR_PTR(-EPERM);
+ /*
+ * Reject exporting purgeable BOs. DONTNEED BOs can be purged
+ * at any time, making the exported dma-buf unusable. Purged BOs
+ * have no backing store and are permanently invalid.
+ */
+ ret = xe_bo_lock(bo, true);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (xe_bo_madv_is_dontneed(bo)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ if (xe_bo_is_purged(bo)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ xe_bo_unlock(bo);
+
ret = ttm_bo_setup_export(&bo->ttm, &ctx);
if (ret)
return ERR_PTR(ret);
@@ -232,6 +252,10 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
buf->ops = &xe_dmabuf_ops;
return buf;
+
+out_unlock:
+ xe_bo_unlock(bo);
+ return ERR_PTR(ret);
}
static struct drm_gem_object *
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 21071b64b09d..a848d1a41b9b 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -481,15 +481,10 @@ static void ggtt_node_remove(struct xe_ggtt_node *node)
xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node));
drm_mm_remove_node(&node->base);
node->base.size = 0;
- mutex_unlock(&ggtt->lock);
-
- if (!bound)
- goto free_node;
-
- if (node->invalidate_on_remove)
+ if (bound && node->invalidate_on_remove)
xe_ggtt_invalidate(ggtt);
+ mutex_unlock(&ggtt->lock);
-free_node:
ggtt_node_fini(node);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h
index a3667c567f8a..5b5d2e835102 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h
@@ -3,8 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
-#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_
-#define _XE_GT_IDLE_SYSFS_TYPES_H_
+#ifndef _XE_GT_IDLE_TYPES_H_
+#define _XE_GT_IDLE_TYPES_H_
#include <linux/spinlock.h>
#include <linux/types.h>
@@ -40,4 +40,4 @@ struct xe_gt_idle {
u64 (*idle_residency)(struct xe_guc_pc *pc);
};
-#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */
+#endif /* _XE_GT_IDLE_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
index fd0915ed8eb1..e5e53b421f29 100644
--- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
@@ -3,8 +3,8 @@
* Copyright © 2022 Intel Corporation
*/
-#ifndef _XE_GUC_ENGINE_TYPES_H_
-#define _XE_GUC_ENGINE_TYPES_H_
+#ifndef _XE_GUC_EXEC_QUEUE_TYPES_H_
+#define _XE_GUC_EXEC_QUEUE_TYPES_H_
#include <linux/spinlock.h>
#include <linux/workqueue.h>
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h
index 745eb6783942..a76f4122b778 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.h
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.h
@@ -2,8 +2,8 @@
/*
* Copyright(c) 2023, Intel Corporation. All rights reserved.
*/
-#ifndef __XE_HECI_GSC_DEV_H__
-#define __XE_HECI_GSC_DEV_H__
+#ifndef _XE_HECI_GSC_H_
+#define _XE_HECI_GSC_H_
#include <linux/types.h>
@@ -37,4 +37,4 @@ int xe_heci_gsc_init(struct xe_device *xe);
void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir);
void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir);
-#endif /* __XE_HECI_GSC_DEV_H__ */
+#endif /* _XE_HECI_GSC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
index 28a0d7c909c0..37225c1ae528 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -3,8 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
-#ifndef _XE_ENGINE_CLASS_SYSFS_H_
-#define _XE_ENGINE_CLASS_SYSFS_H_
+#ifndef _XE_HW_ENGINE_CLASS_SYSFS_H_
+#define _XE_HW_ENGINE_CLASS_SYSFS_H_
#include <linux/kobject.h>
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
index 2a8a985c37e7..7fdb24e810b3 100644
--- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
@@ -3,8 +3,8 @@
* Copyright © 2025 Intel Corporation
*/
-#ifndef _XE_LATE_BIND_TYPES_H_
-#define _XE_LATE_BIND_TYPES_H_
+#ifndef _XE_LATE_BIND_FW_TYPES_H_
+#define _XE_LATE_BIND_FW_TYPES_H_
#include <linux/iosys-map.h>
#include <linux/mutex.h>
diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c
index 2fd55d7c98f9..c2d0a6c34917 100644
--- a/drivers/gpu/drm/xe/xe_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_pagefault.c
@@ -59,6 +59,19 @@ static int xe_pagefault_begin(struct drm_exec *exec, struct xe_vma *vma,
if (!bo)
return 0;
+ /*
+ * Skip validate/migrate for DONTNEED/purged BOs - repopulating
+ * their pages would prevent the shrinker from reclaiming them.
+ * For non-scratch VMs there is no safe fallback so fail the fault.
+ * For scratch VMs let xe_vma_rebind() run normally; it will install
+ * scratch PTEs so the GPU gets safe zero reads instead of faulting.
+ */
+ if (unlikely(xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo))) {
+ if (!xe_vm_has_scratch(vm))
+ return -EACCES;
+ return 0;
+ }
+
return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) :
xe_bo_validate(bo, vm, true, exec);
}
@@ -145,7 +158,7 @@ static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid)
down_read(&xe->usm.lock);
vm = xa_load(&xe->usm.asid_to_vm, asid);
- if (vm && xe_vm_in_fault_mode(vm))
+ if (vm && (xe_vm_in_fault_mode(vm) || xe_vm_has_scratch(vm)))
xe_vm_get(vm);
else
vm = ERR_PTR(-EINVAL);
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index 6cff385227ea..3447848b74e3 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -3,8 +3,8 @@
* Copyright © 2022 Intel Corporation
*/
-#ifndef _XE_PLATFORM_INFO_TYPES_H_
-#define _XE_PLATFORM_INFO_TYPES_H_
+#ifndef _XE_PLATFORM_TYPES_H_
+#define _XE_PLATFORM_TYPES_H_
/*
* Keep this in graphics version based order and chronological order within a
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 713a303c9053..8e5f4f0dea3f 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -531,20 +531,26 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
/* Is this a leaf entry ?*/
if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
struct xe_res_cursor *curs = xe_walk->curs;
- bool is_null = xe_vma_is_null(xe_walk->vma);
- bool is_vram = is_null ? false : xe_res_is_vram(curs);
+ struct xe_bo *bo = xe_vma_bo(xe_walk->vma);
+ bool is_null_or_purged = xe_vma_is_null(xe_walk->vma) ||
+ (bo && xe_bo_is_purged(bo));
+ bool is_vram = is_null_or_purged ? false : xe_res_is_vram(curs);
XE_WARN_ON(xe_walk->va_curs_start != addr);
if (xe_walk->clear_pt) {
pte = 0;
} else {
- pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
+ /*
+ * For purged BOs, treat like null VMAs - pass address 0.
+ * The pte_encode_vma will set XE_PTE_NULL flag for scratch mapping.
+ */
+ pte = vm->pt_ops->pte_encode_vma(is_null_or_purged ? 0 :
xe_res_dma(curs) +
xe_walk->dma_offset,
xe_walk->vma,
pat_index, level);
- if (!is_null)
+ if (!is_null_or_purged)
pte |= is_vram ? xe_walk->default_vram_pte :
xe_walk->default_system_pte;
@@ -568,7 +574,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (unlikely(ret))
return ret;
- if (!is_null && !xe_walk->clear_pt)
+ if (!is_null_or_purged && !xe_walk->clear_pt)
xe_res_next(curs, next - addr);
xe_walk->va_curs_start = next;
xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
@@ -721,6 +727,26 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
};
struct xe_pt *pt = vm->pt_root[tile->id];
int ret;
+ bool is_purged = false;
+
+ /*
+ * Check if BO is purged:
+ * - Scratch VMs: Use scratch PTEs (XE_PTE_NULL) for safe zero reads
+ * - Non-scratch VMs: Clear PTEs to zero (non-present) to avoid mapping to phys addr 0
+ *
+ * For non-scratch VMs, we force clear_pt=true so leaf PTEs become completely
+ * zero instead of creating a PRESENT mapping to physical address 0.
+ */
+ if (bo && xe_bo_is_purged(bo)) {
+ is_purged = true;
+
+ /*
+ * For non-scratch VMs, a NULL rebind should use zero PTEs
+ * (non-present), not a present PTE to phys 0.
+ */
+ if (!xe_vm_has_scratch(vm))
+ xe_walk.clear_pt = true;
+ }
if (range) {
/* Move this entire thing to xe_svm.c? */
@@ -756,11 +782,11 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
}
xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
- xe_walk.dma_offset = bo ? vram_region_gpu_offset(bo->ttm.resource) : 0;
+ xe_walk.dma_offset = (bo && !is_purged) ? vram_region_gpu_offset(bo->ttm.resource) : 0;
if (!range)
xe_bo_assert_held(bo);
- if (!xe_vma_is_null(vma) && !range) {
+ if (!xe_vma_is_null(vma) && !range && !is_purged) {
if (xe_vma_is_userptr(vma))
xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0,
xe_vma_size(vma), &curs);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 4852fdcb4b95..d84d6a422c45 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -342,6 +342,8 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY;
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX;
+ config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
+ DRM_XE_QUERY_CONFIG_FLAG_HAS_PURGING_SUPPORT;
config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 0251098650af..5b627eed1eab 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -322,6 +322,7 @@ static void xe_vma_set_default_attributes(struct xe_vma *vma)
.preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
.pat_index = vma->attr.default_pat_index,
.atomic_access = DRM_XE_ATOMIC_UNDEFINED,
+ .purgeable_state = XE_MADV_PURGEABLE_WILLNEED,
};
xe_vma_mem_attr_copy(&vma->attr, &default_attr);
diff --git a/drivers/gpu/drm/xe/xe_tile_printk.h b/drivers/gpu/drm/xe/xe_tile_printk.h
index 63640a42685d..738433a764bd 100644
--- a/drivers/gpu/drm/xe/xe_tile_printk.h
+++ b/drivers/gpu/drm/xe/xe_tile_printk.h
@@ -3,8 +3,8 @@
* Copyright © 2025 Intel Corporation
*/
-#ifndef _xe_tile_printk_H_
-#define _xe_tile_printk_H_
+#ifndef _XE_TILE_PRINTK_H_
+#define _XE_TILE_PRINTK_H_
#include "xe_printk.h"
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d96e0a0c5605..56e2db50bb36 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -40,6 +40,7 @@
#include "xe_tile.h"
#include "xe_tlb_inval.h"
#include "xe_trace_bo.h"
+#include "xe_vm_madvise.h"
#include "xe_wa.h"
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
@@ -327,6 +328,7 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked)
static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
{
struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+ struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj);
struct drm_gpuva *gpuva;
int ret;
@@ -335,10 +337,16 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
&vm->rebind_list);
+ /* Skip re-populating purged BOs, rebind maps scratch pages. */
+ if (xe_bo_is_purged(bo)) {
+ vm_bo->evicted = false;
+ return 0;
+ }
+
if (!try_wait_for_completion(&vm->xe->pm_block))
return -EAGAIN;
- ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
+ ret = xe_bo_validate(bo, vm, false, exec);
if (ret)
return ret;
@@ -1147,6 +1155,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
static void xe_vma_destroy_late(struct xe_vma *vma)
{
struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_bo *bo = xe_vma_bo(vma);
if (vma->ufence) {
xe_sync_ufence_put(vma->ufence);
@@ -1161,7 +1170,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
} else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
xe_vm_put(vm);
} else {
- xe_bo_put(xe_vma_bo(vma));
+ xe_bo_put(bo);
}
xe_vma_free(vma);
@@ -1187,6 +1196,7 @@ static void vma_destroy_cb(struct dma_fence *fence,
static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
{
struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_bo *bo = xe_vma_bo(vma);
lockdep_assert_held_write(&vm->lock);
xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
@@ -1195,9 +1205,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
xe_userptr_destroy(to_userptr_vma(vma));
} else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
- xe_bo_assert_held(xe_vma_bo(vma));
+ xe_bo_assert_held(bo);
drm_gpuva_unlink(&vma->gpuva);
+ xe_bo_recompute_purgeable_state(bo);
}
xe_vm_assert_held(vm);
@@ -1427,6 +1438,9 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
u16 pat_index, u32 pt_level)
{
+ struct xe_bo *bo = xe_vma_bo(vma);
+ struct xe_vm *vm = xe_vma_vm(vma);
+
pte |= XE_PAGE_PRESENT;
if (likely(!xe_vma_read_only(vma)))
@@ -1435,7 +1449,13 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
pte |= pte_encode_pat_index(pat_index, pt_level);
pte |= pte_encode_ps(pt_level);
- if (unlikely(xe_vma_is_null(vma)))
+ /*
+ * NULL PTEs redirect to scratch page (return zeros on read).
+ * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs.
+ * Never set NULL flag without scratch page - causes undefined behavior.
+ */
+ if (unlikely(xe_vma_is_null(vma) ||
+ (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm))))
pte |= XE_PTE_NULL;
return pte;
@@ -2751,6 +2771,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
.atomic_access = DRM_XE_ATOMIC_UNDEFINED,
.default_pat_index = op->map.pat_index,
.pat_index = op->map.pat_index,
+ .purgeable_state = XE_MADV_PURGEABLE_WILLNEED,
};
flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
@@ -2990,8 +3011,22 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
}
}
+/**
+ * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate()
+ * @res_evict: Allow evicting resources during validation
+ * @validate: Perform BO validation
+ * @request_decompress: Request BO decompression
+ * @check_purged: Reject operation if BO is purged
+ */
+struct xe_vma_lock_and_validate_flags {
+ u32 res_evict : 1;
+ u32 validate : 1;
+ u32 request_decompress : 1;
+ u32 check_purged : 1;
+};
+
static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
- bool res_evict, bool validate, bool request_decompress)
+ struct xe_vma_lock_and_validate_flags flags)
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
@@ -3000,15 +3035,24 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
if (bo) {
if (!bo->vm)
err = drm_exec_lock_obj(exec, &bo->ttm.base);
- if (!err && validate)
+
+ /* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */
+ if (!err && flags.check_purged) {
+ if (xe_bo_madv_is_dontneed(bo))
+ err = -EBUSY; /* BO marked purgeable */
+ else if (xe_bo_is_purged(bo))
+ err = -EINVAL; /* BO already purged */
+ }
+
+ if (!err && flags.validate)
err = xe_bo_validate(bo, vm,
xe_vm_allow_vm_eviction(vm) &&
- res_evict, exec);
+ flags.res_evict, exec);
if (err)
return err;
- if (request_decompress)
+ if (flags.request_decompress)
err = xe_bo_decompress(bo);
}
@@ -3102,10 +3146,14 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
case DRM_GPUVA_OP_MAP:
if (!op->map.invalidate_on_bind)
err = vma_lock_and_validate(exec, op->map.vma,
- res_evict,
- !xe_vm_in_fault_mode(vm) ||
- op->map.immediate,
- op->map.request_decompress);
+ (struct xe_vma_lock_and_validate_flags) {
+ .res_evict = res_evict,
+ .validate = !xe_vm_in_fault_mode(vm) ||
+ op->map.immediate,
+ .request_decompress =
+ op->map.request_decompress,
+ .check_purged = true,
+ });
break;
case DRM_GPUVA_OP_REMAP:
err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
@@ -3114,13 +3162,28 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.remap.unmap->va),
- res_evict, false, false);
+ (struct xe_vma_lock_and_validate_flags) {
+ .res_evict = res_evict,
+ .validate = false,
+ .request_decompress = false,
+ .check_purged = false,
+ });
if (!err && op->remap.prev)
err = vma_lock_and_validate(exec, op->remap.prev,
- res_evict, true, false);
+ (struct xe_vma_lock_and_validate_flags) {
+ .res_evict = res_evict,
+ .validate = true,
+ .request_decompress = false,
+ .check_purged = true,
+ });
if (!err && op->remap.next)
err = vma_lock_and_validate(exec, op->remap.next,
- res_evict, true, false);
+ (struct xe_vma_lock_and_validate_flags) {
+ .res_evict = res_evict,
+ .validate = true,
+ .request_decompress = false,
+ .check_purged = true,
+ });
break;
case DRM_GPUVA_OP_UNMAP:
err = check_ufence(gpuva_to_vma(op->base.unmap.va));
@@ -3129,7 +3192,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.unmap.va),
- res_evict, false, false);
+ (struct xe_vma_lock_and_validate_flags) {
+ .res_evict = res_evict,
+ .validate = false,
+ .request_decompress = false,
+ .check_purged = false,
+ });
break;
case DRM_GPUVA_OP_PREFETCH:
{
@@ -3142,9 +3210,19 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
region <= ARRAY_SIZE(region_to_mem_type));
}
+ /*
+ * Prefetch attempts to migrate BO's backing store without
+ * repopulating it first. Purged BOs have no backing store
+ * to migrate, so reject the operation.
+ */
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.prefetch.va),
- res_evict, false, false);
+ (struct xe_vma_lock_and_validate_flags) {
+ .res_evict = res_evict,
+ .validate = false,
+ .request_decompress = false,
+ .check_purged = true,
+ });
if (!err && !xe_vma_has_no_bo(vma))
err = xe_bo_migrate(xe_vma_bo(vma),
region_to_mem_type[region],
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index e564b12c02d9..66f00d3f5c07 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -13,6 +13,7 @@
#include "xe_pt.h"
#include "xe_svm.h"
#include "xe_tlb_inval.h"
+#include "xe_vm.h"
struct xe_vmas_in_madvise_range {
u64 addr;
@@ -26,6 +27,8 @@ struct xe_vmas_in_madvise_range {
/**
* struct xe_madvise_details - Argument to madvise_funcs
* @dpagemap: Reference-counted pointer to a struct drm_pagemap.
+ * @has_purged_bo: Track if any BO was purged (for purgeable state)
+ * @retained_ptr: User pointer for retained value (for purgeable state)
*
* The madvise IOCTL handler may, in addition to the user-space
* args, have additional info to pass into the madvise_func that
@@ -34,6 +37,8 @@ struct xe_vmas_in_madvise_range {
*/
struct xe_madvise_details {
struct drm_pagemap *dpagemap;
+ bool has_purged_bo;
+ u64 retained_ptr;
};
static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
@@ -180,6 +185,222 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
}
}
+/**
+ * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf
+ * @bo: Buffer object
+ *
+ * Prevent marking imported or exported dma-bufs as purgeable.
+ * For imported BOs, Xe doesn't own the backing store and cannot
+ * safely reclaim pages (exporter or other devices may still be
+ * using them). For exported BOs, external devices may have active
+ * mappings we cannot track.
+ *
+ * Return: true if BO is imported or exported, false otherwise
+ */
+static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo)
+{
+ struct drm_gem_object *obj = &bo->ttm.base;
+
+ /* Imported: exporter owns backing store */
+ if (drm_gem_is_imported(obj))
+ return true;
+
+ /* Exported: external devices may be accessing */
+ if (obj->dma_buf)
+ return true;
+
+ return false;
+}
+
+/**
+ * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation
+ *
+ * Distinguishes whether a BO's VMAs are all DONTNEED, have at least
+ * one WILLNEED, or have no VMAs at all.
+ *
+ * Enum values align with XE_MADV_PURGEABLE_* states for consistency.
+ */
+enum xe_bo_vmas_purge_state {
+ /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */
+ XE_BO_VMAS_STATE_WILLNEED = 0,
+ /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */
+ XE_BO_VMAS_STATE_DONTNEED = 1,
+ /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */
+ XE_BO_VMAS_STATE_NO_VMAS = 2,
+};
+
+/*
+ * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and
+ * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across
+ * both enums so the single-line cast is always valid.
+ */
+static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED,
+ "VMA purge state WILLNEED must equal madv purgeable WILLNEED");
+static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED,
+ "VMA purge state DONTNEED must equal madv purgeable DONTNEED");
+
+/**
+ * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state
+ * @bo: Buffer object
+ *
+ * Check all VMAs across all VMs to determine aggregate purgeable state.
+ * Shared BOs require unanimous DONTNEED state from all mappings.
+ *
+ * Caller must hold BO dma-resv lock.
+ *
+ * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED,
+ * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED,
+ * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs
+ */
+static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo)
+{
+ struct drm_gpuvm_bo *vm_bo;
+ struct drm_gpuva *gpuva;
+ struct drm_gem_object *obj = &bo->ttm.base;
+ bool has_vmas = false;
+
+ xe_bo_assert_held(bo);
+
+ /* Shared dma-bufs cannot be purgeable */
+ if (xe_bo_is_dmabuf_shared(bo))
+ return XE_BO_VMAS_STATE_WILLNEED;
+
+ drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+ drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ has_vmas = true;
+
+ /* Any non-DONTNEED VMA prevents purging */
+ if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED)
+ return XE_BO_VMAS_STATE_WILLNEED;
+ }
+ }
+
+ /*
+ * No VMAs => preserve existing BO purgeable state.
+ * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped.
+ */
+ if (!has_vmas)
+ return XE_BO_VMAS_STATE_NO_VMAS;
+
+ return XE_BO_VMAS_STATE_DONTNEED;
+}
+
+/**
+ * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs
+ * @bo: Buffer object
+ *
+ * Walk all VMAs to determine if BO should be purgeable or not.
+ * Shared BOs require unanimous DONTNEED state from all mappings.
+ * If the BO has no VMAs the existing state is preserved.
+ *
+ * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists,
+ * VM lock must also be held (write) to prevent concurrent VMA modifications.
+ * This is satisfied at both call sites:
+ * - xe_vma_destroy(): holds vm->lock write
+ * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path)
+ *
+ * Return: nothing
+ */
+void xe_bo_recompute_purgeable_state(struct xe_bo *bo)
+{
+ enum xe_bo_vmas_purge_state vma_state;
+
+ if (!bo)
+ return;
+
+ xe_bo_assert_held(bo);
+
+ /*
+ * Once purged, always purged. Cannot transition back to WILLNEED.
+ * This matches i915 semantics where purged BOs are permanently invalid.
+ */
+ if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED)
+ return;
+
+ vma_state = xe_bo_all_vmas_dontneed(bo);
+
+ if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable &&
+ vma_state != XE_BO_VMAS_STATE_NO_VMAS)
+ xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state);
+}
+
+/**
+ * madvise_purgeable - Handle purgeable buffer object advice
+ * @xe: XE device
+ * @vm: VM
+ * @vmas: Array of VMAs
+ * @num_vmas: Number of VMAs
+ * @op: Madvise operation
+ * @details: Madvise details for return values
+ *
+ * Handles DONTNEED/WILLNEED/PURGED states. Tracks if any BO was purged
+ * in details->has_purged_bo for later copy to userspace.
+ */
+static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details)
+{
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE);
+
+ for (i = 0; i < num_vmas; i++) {
+ struct xe_bo *bo = xe_vma_bo(vmas[i]);
+
+ if (!bo) {
+ /* Purgeable state applies to BOs only, skip non-BO VMAs */
+ vmas[i]->skip_invalidation = true;
+ continue;
+ }
+
+ /* BO must be locked before modifying madv state */
+ xe_bo_assert_held(bo);
+
+ /* Skip shared dma-bufs - no PTEs to zap */
+ if (xe_bo_is_dmabuf_shared(bo)) {
+ vmas[i]->skip_invalidation = true;
+ continue;
+ }
+
+ /*
+ * Once purged, always purged. Cannot transition back to WILLNEED.
+ * This matches i915 semantics where purged BOs are permanently invalid.
+ */
+ if (xe_bo_is_purged(bo)) {
+ details->has_purged_bo = true;
+ vmas[i]->skip_invalidation = true;
+ continue;
+ }
+
+ switch (op->purge_state_val.val) {
+ case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED:
+ vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED;
+ vmas[i]->skip_invalidation = true;
+
+ xe_bo_recompute_purgeable_state(bo);
+ break;
+ case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED:
+ vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED;
+ /*
+ * Don't zap PTEs at DONTNEED time -- pages are still
+ * alive. The zap happens in xe_bo_move_notify() right
+ * before the shrinker frees them.
+ */
+ vmas[i]->skip_invalidation = true;
+
+ xe_bo_recompute_purgeable_state(bo);
+ break;
+ default:
+ /* Should never hit - values validated in madvise_args_are_sane() */
+ xe_assert(vm->xe, 0);
+ return;
+ }
+ }
+}
+
typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
struct drm_xe_madvise *op,
@@ -189,6 +410,7 @@ static const madvise_func madvise_funcs[] = {
[DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
[DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
[DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
+ [DRM_XE_VMA_ATTR_PURGEABLE_STATE] = madvise_purgeable,
};
static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
@@ -319,6 +541,19 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
return false;
break;
}
+ case DRM_XE_VMA_ATTR_PURGEABLE_STATE:
+ {
+ u32 val = args->purge_state_val.val;
+
+ if (XE_IOCTL_DBG(xe, !(val == DRM_XE_VMA_PURGEABLE_STATE_WILLNEED ||
+ val == DRM_XE_VMA_PURGEABLE_STATE_DONTNEED)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->purge_state_val.pad))
+ return false;
+
+ break;
+ }
default:
if (XE_IOCTL_DBG(xe, 1))
return false;
@@ -337,6 +572,12 @@ static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise
memset(details, 0, sizeof(*details));
+ /* Store retained pointer for purgeable state */
+ if (args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE) {
+ details->retained_ptr = args->purge_state_val.retained_ptr;
+ return 0;
+ }
+
if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) {
int fd = args->preferred_mem_loc.devmem_fd;
struct drm_pagemap *dpagemap;
@@ -365,6 +606,21 @@ static void xe_madvise_details_fini(struct xe_madvise_details *details)
drm_pagemap_put(details->dpagemap);
}
+static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details *details)
+{
+ u32 retained;
+
+ if (!details->retained_ptr)
+ return 0;
+
+ retained = !details->has_purged_bo;
+
+ if (put_user(retained, (u32 __user *)u64_to_user_ptr(details->retained_ptr)))
+ return -EFAULT;
+
+ return 0;
+}
+
static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
int num_vmas, u32 atomic_val)
{
@@ -416,13 +672,21 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_madvise *args = data;
- struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
- .range = args->range, };
+ struct xe_vmas_in_madvise_range madvise_range = {
+ /*
+ * Userspace may pass canonical (sign-extended) addresses.
+ * Strip the sign extension to get the internal non-canonical
+ * form used by the GPUVM, matching xe_vm_bind_ioctl() behavior.
+ */
+ .addr = xe_device_uncanonicalize_addr(xe, args->start),
+ .range = args->range,
+ };
struct xe_madvise_details details;
u16 pat_index, coh_mode;
struct xe_vm *vm;
struct drm_exec exec;
int err, attr_type;
+ bool do_retained;
vm = xe_vm_lookup(xef, args->vm_id);
if (XE_IOCTL_DBG(xe, !vm))
@@ -433,6 +697,25 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
goto put_vm;
}
+ /* Cache whether we need to write retained, and validate it's initialized to 0 */
+ do_retained = args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE &&
+ args->purge_state_val.retained_ptr;
+ if (do_retained) {
+ u32 retained;
+ u32 __user *retained_ptr;
+
+ retained_ptr = u64_to_user_ptr(args->purge_state_val.retained_ptr);
+ if (get_user(retained, retained_ptr)) {
+ err = -EFAULT;
+ goto put_vm;
+ }
+
+ if (XE_IOCTL_DBG(xe, retained != 0)) {
+ err = -EINVAL;
+ goto put_vm;
+ }
+ }
+
xe_svm_flush(vm);
err = down_write_killable(&vm->lock);
@@ -448,7 +731,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
if (err)
goto unlock_vm;
- err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+ err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range);
if (err)
goto madv_fini;
@@ -510,10 +793,18 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
}
attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
+
+ /* Ensure the madvise function exists for this type */
+ if (!madvise_funcs[attr_type]) {
+ err = -EINVAL;
+ goto err_fini;
+ }
+
madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
&details);
- err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
+ err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr,
+ madvise_range.addr + args->range);
if (madvise_range.has_svm_userptr_vmas)
xe_svm_notifier_unlock(vm);
@@ -528,6 +819,10 @@ madv_fini:
xe_madvise_details_fini(&details);
unlock_vm:
up_write(&vm->lock);
+
+ /* Write retained value to user after releasing all locks */
+ if (!err && do_retained)
+ err = xe_madvise_purgeable_retained_to_user(&details);
put_vm:
xe_vm_put(vm);
return err;
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
index b0e1fc445f23..39acd2689ca0 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.h
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.h
@@ -8,8 +8,11 @@
struct drm_device;
struct drm_file;
+struct xe_bo;
int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+void xe_bo_recompute_purgeable_state(struct xe_bo *bo);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 3ab2cef25426..a94827d7fbec 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -96,6 +96,17 @@ struct xe_vma_mem_attr {
* same as default_pat_index unless overwritten by madvise.
*/
u16 pat_index;
+
+ /**
+ * @purgeable_state: Purgeable hint for this VMA mapping
+ *
+ * Per-VMA purgeable state from madvise. Valid states are WILLNEED (0)
+ * or DONTNEED (1). Shared BOs require all VMAs to be DONTNEED before
+ * the BO can be purged. PURGED state exists only at BO level.
+ *
+ * Protected by BO dma-resv lock. Set via DRM_IOCTL_XE_MADVISE.
+ */
+ u32 purgeable_state;
};
struct xe_vma {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 6c99514a85e1..ae2fda23ce7c 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -432,6 +432,7 @@ struct drm_xe_query_config {
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2)
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3)
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_STATE_CACHE_PERF_FIX (1 << 4)
+ #define DRM_XE_QUERY_CONFIG_FLAG_HAS_PURGING_SUPPORT (1 << 5)
#define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2
#define DRM_XE_QUERY_CONFIG_VA_BITS 3
#define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4
@@ -2171,6 +2172,7 @@ struct drm_xe_query_eu_stall {
* - DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: Set preferred memory location.
* - DRM_XE_MEM_RANGE_ATTR_ATOMIC: Set atomic access policy.
* - DRM_XE_MEM_RANGE_ATTR_PAT: Set page attribute table index.
+ * - DRM_XE_VMA_ATTR_PURGEABLE_STATE: Set purgeable state for BOs.
*
* Example:
*
@@ -2203,6 +2205,7 @@ struct drm_xe_madvise {
#define DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC 0
#define DRM_XE_MEM_RANGE_ATTR_ATOMIC 1
#define DRM_XE_MEM_RANGE_ATTR_PAT 2
+#define DRM_XE_VMA_ATTR_PURGEABLE_STATE 3
/** @type: type of attribute */
__u32 type;
@@ -2293,6 +2296,72 @@ struct drm_xe_madvise {
/** @pat_index.reserved: Reserved */
__u64 reserved;
} pat_index;
+
+ /**
+ * @purge_state_val: Purgeable state configuration
+ *
+ * Used when @type == DRM_XE_VMA_ATTR_PURGEABLE_STATE.
+ *
+ * Configures the purgeable state of buffer objects in the specified
+ * virtual address range. This allows applications to hint to the kernel
+ * about bo's usage patterns for better memory management.
+ *
+ * By default all VMAs are in WILLNEED state.
+ *
+ * Supported values for @purge_state_val.val:
+ * - DRM_XE_VMA_PURGEABLE_STATE_WILLNEED (0): Marks BO as needed.
+ * If the BO was previously purged, the kernel sets the __u32 at
+ * @retained_ptr to 0 (backing store lost) so the application knows
+ * it must recreate the BO.
+ *
+ * - DRM_XE_VMA_PURGEABLE_STATE_DONTNEED (1): Marks BO as not currently
+ * needed. Kernel may purge it under memory pressure to reclaim memory.
+ * Only applies to non-shared BOs. The kernel sets the __u32 at
+ * @retained_ptr to 1 if the backing store still exists (not yet purged),
+ * or 0 if it was already purged.
+ *
+ * Important: Once marked as DONTNEED, touching the BO's memory
+ * is undefined behavior. It may succeed temporarily (before the
+ * kernel purges the backing store) but will suddenly fail once
+ * the BO transitions to PURGED state.
+ *
+ * To transition back: use WILLNEED and check @retained_ptr —
+ * if 0, backing store was lost and the BO must be recreated.
+ *
+ * The following operations are blocked in DONTNEED state to
+ * prevent the BO from being re-mapped after madvise:
+ * - New mmap() calls: Fail with -EBUSY
+ * - VM_BIND operations: Fail with -EBUSY
+ * - New dma-buf exports: Fail with -EBUSY
+ * - CPU page faults (existing mmap): Fail with SIGBUS
+ * - GPU page faults (fault-mode VMs): Fail with -EACCES
+ */
+ struct {
+#define DRM_XE_VMA_PURGEABLE_STATE_WILLNEED 0
+#define DRM_XE_VMA_PURGEABLE_STATE_DONTNEED 1
+ /** @purge_state_val.val: value for DRM_XE_VMA_ATTR_PURGEABLE_STATE */
+ __u32 val;
+
+ /** @purge_state_val.pad: MBZ */
+ __u32 pad;
+ /**
+ * @purge_state_val.retained_ptr: Pointer to a __u32 output
+ * field for backing store status.
+ *
+ * Userspace must initialize the __u32 value at this address
+ * to 0 before the ioctl. Kernel writes a __u32 after the
+ * operation:
+ * - 1 if backing store exists (not purged)
+ * - 0 if backing store was purged
+ *
+ * If userspace fails to initialize to 0, ioctl returns -EINVAL.
+ * This ensures a safe default (0 = assume purged) if kernel
+ * cannot write the result.
+ *
+ * Similar to i915's drm_i915_gem_madvise.retained field.
+ */
+ __u64 retained_ptr;
+ } purge_state_val;
};
/** @reserved: Reserved */