summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSunil Khatri <sunil.khatri@amd.com>2026-05-18 19:58:08 +0530
committerAlex Deucher <alexander.deucher@amd.com>2026-05-27 11:58:31 -0400
commitcf4aafdccefccc7f8236fed028d06725246e289e (patch)
tree8022e148e5b6dad4fd6d9dcc3435a23e1c092952
parenta00caed2302c604c19a5cab781e34d7ba4fa7558 (diff)
drm/amdgpu/userq: make sure queue is valid in the hang_detect_work
Thread 1: Running amdgpu_userq_destroy which eventually remove the queue from door bell and set userq_mgr = NULL. Thread2: An interrupt might have scheduled the hang_detect_work which still need userq_mgr to be valid but could get an NULL ptrs. To fix that make sure we cancel the hang_detect_work again before setting userq_mgr to NULL. Along with that we also need all the queue va to remain valid till we could be running anything on the queue and hence moving the userq_va post hang_detect handler is cancelled. Signed-off-by: Sunil Khatri <sunil.khatri@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> (cherry picked from commit 1a66ceb98b137d18d303b9889f0e7d8c4db73943)
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 5da107dffcce..8d7dad1d30eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -427,8 +427,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue)
xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
amdgpu_userq_fence_driver_free(queue);
queue->fence_drv = NULL;
- queue->userq_mgr = NULL;
- list_del(&queue->userq_va_list);
up_read(&adev->reset_domain->sem);
}
@@ -619,11 +617,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
/* Cancel any pending hang detection work and cleanup */
cancel_delayed_work_sync(&queue->hang_detect_work);
-
- amdgpu_bo_reserve(vm->root.bo, true);
- amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
- amdgpu_bo_unreserve(vm->root.bo);
-
mutex_lock(&uq_mgr->userq_mutex);
amdgpu_userq_wait_for_last_fence(queue);
@@ -635,6 +628,13 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
amdgpu_userq_cleanup(queue);
mutex_unlock(&uq_mgr->userq_mutex);
+ cancel_delayed_work_sync(&queue->hang_detect_work);
+ amdgpu_bo_reserve(vm->root.bo, true);
+ amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
+ amdgpu_bo_unreserve(vm->root.bo);
+ list_del(&queue->userq_va_list);
+ queue->userq_mgr = NULL;
+
amdgpu_bo_reserve(queue->db_obj.obj, true);
amdgpu_bo_unpin(queue->db_obj.obj);
amdgpu_bo_unreserve(queue->db_obj.obj);