diff options
| author | Riana Tauro <riana.tauro@intel.com> | 2026-03-04 13:14:09 +0530 |
|---|---|---|
| committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2026-03-05 19:38:55 -0500 |
| commit | b40db12b542f503b5ec689d18d473299d49eeb60 (patch) | |
| tree | d46a94b2129d7925709fc51c6cc06231c75ed858 /include/uapi | |
| parent | c36218dc49f5e9ef9e3074670fdae7ac3a7e794f (diff) | |
drm/xe/xe_drm_ras: Add support for XE DRM RAS
Allocate correctable, uncorrectable nodes for every xe device. Each node
contains error component, counters and respective query counter functions.
Add basic functionality to create and register drm nodes.
Below operations can be performed using Generic netlink DRM RAS interface:
1) List Nodes:
$ sudo ynl --family drm_ras --dump list-nodes
[{'device-name': '0000:03:00.0',
'node-id': 0,
'node-name': 'correctable-errors',
'node-type': 'error-counter'},
{'device-name': '0000:03:00.0',
'node-id': 1,
'node-name': 'uncorrectable-errors',
'node-type': 'error-counter'}]
2) Get Error counters:
$ sudo ynl --family drm_ras --dump get-error-counter --json '{"node-id":0}'
[{'error-id': 1, 'error-name': 'core-compute', 'error-value': 0},
{'error-id': 2, 'error-name': 'soc-internal', 'error-value': 0}]
3) Get specific Error counter:
$ sudo ynl --family drm_ras --do get-error-counter --json '{"node-id":0, "error-id":1}'
{'error-id': 1, 'error-name': 'core-compute', 'error-value': 0}
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Link: https://patch.msgid.link/20260304074412.464435-9-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'include/uapi')
| -rw-r--r-- | include/uapi/drm/xe_drm.h | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index ef2565048bdf..b0264c32ceb2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -2357,6 +2357,85 @@ struct drm_xe_exec_queue_set_property { __u64 reserved[2]; }; +/** + * DOC: Xe DRM RAS + * + * The enums and strings defined below map to the attributes of the DRM RAS Netlink Interface. + * Refer to Documentation/netlink/specs/drm_ras.yaml for complete interface specification. + * + * Node Registration + * ================= + * + * The driver registers DRM RAS nodes for each error severity level. + * enum drm_xe_ras_error_severity defines the node-id, while DRM_XE_RAS_ERROR_SEVERITY_NAMES maps + * node-id to node-name. + * + * Error Classification + * ==================== + * + * Each node contains a list of error counters. Each error is identified by a error-id and + * an error-name. enum drm_xe_ras_error_component defines the error-id, while + * DRM_XE_RAS_ERROR_COMPONENT_NAMES maps error-id to error-name. + * + * User Interface + * ============== + * + * To retrieve error values of a error counter, userspace applications should + * follow the below steps: + * + * 1. Use command LIST_NODES to enumerate all available nodes + * 2. Select node by node-id or node-name + * 3. Use command GET_ERROR_COUNTERS to list errors of specific node + * 4. Query specific error values using either error-id or error-name + * + * .. code-block:: C + * + * // Lookup tables for ID-to-name resolution + * static const char *nodes[] = DRM_XE_RAS_ERROR_SEVERITY_NAMES; + * static const char *errors[] = DRM_XE_RAS_ERROR_COMPONENT_NAMES; + * + */ + +/** + * enum drm_xe_ras_error_severity - DRM RAS error severity. + */ +enum drm_xe_ras_error_severity { + /** @DRM_XE_RAS_ERR_SEV_CORRECTABLE: Correctable Error */ + DRM_XE_RAS_ERR_SEV_CORRECTABLE = 0, + /** @DRM_XE_RAS_ERR_SEV_UNCORRECTABLE: Uncorrectable Error */ + DRM_XE_RAS_ERR_SEV_UNCORRECTABLE, + /** @DRM_XE_RAS_ERR_SEV_MAX: Max severity */ + DRM_XE_RAS_ERR_SEV_MAX /* non-ABI */ +}; + +/** + * enum drm_xe_ras_error_component - DRM RAS error component. + */ +enum drm_xe_ras_error_component { + /** @DRM_XE_RAS_ERR_COMP_CORE_COMPUTE: Core Compute Error */ + DRM_XE_RAS_ERR_COMP_CORE_COMPUTE = 1, + /** @DRM_XE_RAS_ERR_COMP_SOC_INTERNAL: SoC Internal Error */ + DRM_XE_RAS_ERR_COMP_SOC_INTERNAL, + /** @DRM_XE_RAS_ERR_COMP_MAX: Max Error */ + DRM_XE_RAS_ERR_COMP_MAX /* non-ABI */ +}; + +/* + * Error severity to name mapping. + */ +#define DRM_XE_RAS_ERROR_SEVERITY_NAMES { \ + [DRM_XE_RAS_ERR_SEV_CORRECTABLE] = "correctable-errors", \ + [DRM_XE_RAS_ERR_SEV_UNCORRECTABLE] = "uncorrectable-errors", \ +} + +/* + * Error component to name mapping. + */ +#define DRM_XE_RAS_ERROR_COMPONENT_NAMES { \ + [DRM_XE_RAS_ERR_COMP_CORE_COMPUTE] = "core-compute", \ + [DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal" \ +} + #if defined(__cplusplus) } #endif |
