55 files changed, 5994 insertions, 1135 deletions
diff --git a/rust/kernel/alloc/allocator/iter.rs b/rust/kernel/alloc/allocator/iter.rs
index 5759f86029b7..e0a70b7a744a 100644
--- a/rust/kernel/alloc/allocator/iter.rs
+++ b/rust/kernel/alloc/allocator/iter.rs
@@ -42,15 +42,9 @@ impl<'a> Iterator for VmallocPageIter<'a> {
             return None;
         }
 
-        // TODO: Use `NonNull::add()` instead, once the minimum supported compiler version is
-        // bumped to 1.80 or later.
-        //
         // SAFETY: `offset` is in the interval `[0, (self.page_count() - 1) * page::PAGE_SIZE]`,
         // hence the resulting pointer is guaranteed to be within the same allocation.
-        let ptr = unsafe { self.buf.as_ptr().add(offset) };
-
-        // SAFETY: `ptr` is guaranteed to be non-null given that it is derived from `self.buf`.
-        let ptr = unsafe { NonNull::new_unchecked(ptr) };
+        let ptr = unsafe { self.buf.add(offset) };
 
         // SAFETY:
         // - `ptr` is a valid pointer to a `Vmalloc` allocation.
diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs
index 622b3529edfc..bd6da02c7ab8 100644
--- a/rust/kernel/alloc/kbox.rs
+++ b/rust/kernel/alloc/kbox.rs
@@ -77,33 +77,8 @@ use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption};
 /// `self.0` is always properly aligned and either points to memory allocated with `A` or, for
 /// zero-sized types, is a dangling, well aligned pointer.
 #[repr(transparent)]
-#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, derive(core::marker::CoercePointee))]
-pub struct Box<#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, pointee)] T: ?Sized, A: Allocator>(
-    NonNull<T>,
-    PhantomData<A>,
-);
-
-// This is to allow coercion from `Box<T, A>` to `Box<U, A>` if `T` can be converted to the
-// dynamically-sized type (DST) `U`.
-#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
-impl<T, U, A> core::ops::CoerceUnsized<Box<U, A>> for Box<T, A>
-where
-    T: ?Sized + core::marker::Unsize<U>,
-    U: ?Sized,
-    A: Allocator,
-{
-}
-
-// This is to allow `Box<U, A>` to be dispatched on when `Box<T, A>` can be coerced into `Box<U,
-// A>`.
-#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
-impl<T, U, A> core::ops::DispatchFromDyn<Box<U, A>> for Box<T, A>
-where
-    T: ?Sized + core::marker::Unsize<U>,
-    U: ?Sized,
-    A: Allocator,
-{
-}
+#[derive(core::marker::CoercePointee)]
+pub struct Box<#[pointee] T: ?Sized, A: Allocator>(NonNull<T>, PhantomData<A>);
 
 /// Type alias for [`Box`] with a [`Kmalloc`] allocator.
 ///
diff --git a/rust/kernel/alloc/kvec/errors.rs b/rust/kernel/alloc/kvec/errors.rs
index e7de5049ee47..985c5f2c3962 100644
--- a/rust/kernel/alloc/kvec/errors.rs
+++ b/rust/kernel/alloc/kvec/errors.rs
@@ -15,6 +15,7 @@ impl<T> fmt::Debug for PushError<T> {
 }
 
 impl<T> From<PushError<T>> for Error {
+    #[inline]
     fn from(_: PushError<T>) -> Error {
         // Returning ENOMEM isn't appropriate because the system is not out of memory. The vector
         // is just full and we are refusing to resize it.
@@ -32,6 +33,7 @@ impl fmt::Debug for RemoveError {
 }
 
 impl From<RemoveError> for Error {
+    #[inline]
     fn from(_: RemoveError) -> Error {
         EINVAL
     }
@@ -55,6 +57,7 @@ impl<T> fmt::Debug for InsertError<T> {
 }
 
 impl<T> From<InsertError<T>> for Error {
+    #[inline]
     fn from(_: InsertError<T>) -> Error {
         EINVAL
     }
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index c8b0ecb17082..912cb805caf5 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -140,9 +140,7 @@ impl GenDiskBuilder {
             devnode: None,
             alternative_gpt_sector: None,
             get_unique_id: None,
-            // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to
-            // be merged (unstable in rustc 1.78 which is staged for linux 6.10)
-            // <https://github.com/rust-lang/rust/issues/119618>
+            // TODO: Set to `THIS_MODULE`.
             owner: core::ptr::null_mut(),
             pr_ops: core::ptr::null_mut(),
             free_disk: None,
diff --git a/rust/kernel/build_assert.rs b/rust/kernel/build_assert.rs
index f8124dbc663f..2ea2154ec30c 100644
--- a/rust/kernel/build_assert.rs
+++ b/rust/kernel/build_assert.rs
@@ -1,10 +1,144 @@
 // SPDX-License-Identifier: GPL-2.0
 
-//! Build-time assert.
+//! Various assertions that happen during build-time.
+//!
+//! There are three types of build-time assertions that you can use:
+//! - [`static_assert!`]
+//! - [`const_assert!`]
+//! - [`build_assert!`]
+//!
+//! The ones towards the bottom of the list are more expressive, while the ones towards the top of
+//! the list are more robust and trigger earlier in the compilation pipeline. Therefore, you should
+//! prefer the ones towards the top of the list wherever possible.
+//!
+//! # Choosing the correct assertion
+//!
+//! If you're asserting outside any bodies (e.g. initializers or function bodies), you should use
+//! [`static_assert!`] as it is the only assertion that can be used in that context.
+//!
+//! Inside bodies, if your assertion condition does not depend on any variable or generics, you
+//! should use [`static_assert!`]. If the condition depends on generics, but not variables
+//! (including function arguments), you should use [`const_assert!`]. Otherwise, use
+//! [`build_assert!`]. The same is true regardless if the function is `const fn`.
+//!
+//! ```
+//! // Outside any bodies.
+//! static_assert!(core::mem::size_of::<u8>() == 1);
+//! // `const_assert!` and `build_assert!` cannot be used here, they will fail to compile.
+//!
+//! #[inline(always)]
+//! fn foo<const N: usize>(v: usize) {
+//!     static_assert!(core::mem::size_of::<u8>() == 1); // Preferred.
+//!     const_assert!(core::mem::size_of::<u8>() == 1); // Discouraged.
+//!     build_assert!(core::mem::size_of::<u8>() == 1); // Discouraged.
+//!
+//!     // `static_assert!(N > 1);` is not allowed.
+//!     const_assert!(N > 1); // Preferred.
+//!     build_assert!(N > 1); // Discouraged.
+//!
+//!     // `static_assert!(v > 1);` is not allowed.
+//!     // `const_assert!(v > 1);` is not allowed.
+//!     build_assert!(v > 1); // Works.
+//! }
+//! ```
+//!
+//! # Detailed behavior
+//!
+//! `static_assert!()` is equivalent to `static_assert` in C. It requires `expr` to be a constant
+//! expression. This expression cannot refer to any generics. A `static_assert!(expr)` in a program
+//! is always evaluated, regardless if the function it appears in is used or not. This is also the
+//! only usable assertion outside a body.
+//!
+//! `const_assert!()` has no direct C equivalence. It is a more powerful version of
+//! `static_assert!()`, where it may refer to generics in a function. Note that due to the ability
+//! to refer to generics, the assertion is tied to a specific instance of a function. So if it is
+//! used in a generic function that is not instantiated, the assertion will not be checked. For this
+//! reason, `static_assert!()` is preferred wherever possible.
+//!
+//! `build_assert!()` is equivalent to `BUILD_BUG_ON`. It is even more powerful than
+//! `const_assert!()` because it can be used to check tautologies that depend on runtime value (this
+//! is the same as `BUILD_BUG_ON`). However, the assertion failure mechanism can possibly be
+//! undefined symbols and linker errors, it is not developer friendly to debug, so it is recommended
+//! to avoid it and prefer other two assertions where possible.
+
+pub use crate::{
+    build_assert,
+    build_error,
+    const_assert,
+    static_assert, //
+};
 
 #[doc(hidden)]
 pub use build_error::build_error;
 
+/// Static assert (i.e. compile-time assert).
+///
+/// Similar to C11 [`_Static_assert`] and C++11 [`static_assert`].
+///
+/// An optional panic message can be supplied after the expression.
+/// Currently only a string literal without formatting is supported
+/// due to constness limitations of the [`assert!`] macro.
+///
+/// The feature may be added to Rust in the future: see [RFC 2790].
+///
+/// You cannot refer to generics or variables with [`static_assert!`]. If you need to refer to
+/// generics, use [`const_assert!`]; if you need to refer to variables, use [`build_assert!`]. See
+/// the [module documentation](self).
+///
+/// [`_Static_assert`]: https://en.cppreference.com/w/c/language/_Static_assert
+/// [`static_assert`]: https://en.cppreference.com/w/cpp/language/static_assert
+/// [RFC 2790]: https://github.com/rust-lang/rfcs/issues/2790
+///
+/// # Examples
+///
+/// ```
+/// static_assert!(42 > 24);
+/// static_assert!(core::mem::size_of::<u8>() == 1);
+///
+/// const X: &[u8] = b"bar";
+/// static_assert!(X[1] == b'a');
+///
+/// const fn f(x: i32) -> i32 {
+///     x + 2
+/// }
+/// static_assert!(f(40) == 42);
+/// static_assert!(f(40) == 42, "f(x) must add 2 to the given input.");
+/// ```
+#[macro_export]
+macro_rules! static_assert {
+    ($condition:expr $(,$arg:literal)?) => {
+        const _: () = ::core::assert!($condition $(,$arg)?);
+    };
+}
+
+/// Assertion during constant evaluation.
+///
+/// This is a more powerful version of [`static_assert!`] that can refer to generics inside
+/// functions or implementation blocks. However, it also has a limitation where it can only appear
+/// in places where statements can appear; for example, you cannot use it as an item in the module.
+///
+/// [`static_assert!`] should be preferred if no generics are referred to in the condition. You
+/// cannot refer to variables with [`const_assert!`] (even inside `const fn`); if you need the
+/// capability, use [`build_assert!`]. See the [module documentation](self).
+///
+/// # Examples
+///
+/// ```
+/// fn foo<const N: usize>() {
+///     const_assert!(N > 1);
+/// }
+///
+/// fn bar<T>() {
+///     const_assert!(size_of::<T>() > 0, "T cannot be ZST");
+/// }
+/// ```
+#[macro_export]
+macro_rules! const_assert {
+    ($condition:expr $(,$arg:literal)?) => {
+        const { ::core::assert!($condition $(,$arg)?) };
+    };
+}
+
 /// Fails the build if the code path calling `build_error!` can possibly be executed.
 ///
 /// If the macro is executed in const context, `build_error!` will panic.
@@ -38,44 +172,33 @@ macro_rules! build_error {
 /// will panic. If the compiler or optimizer cannot guarantee the condition will
 /// be evaluated to `true`, a build error will be triggered.
 ///
-/// [`static_assert!`] should be preferred to `build_assert!` whenever possible.
+/// When a condition depends on a function argument, the function must be annotated with
+/// `#[inline(always)]`. Without this attribute, the compiler may choose to not inline the
+/// function, preventing it from optimizing out the error path.
+///
+/// If the assertion condition does not depend on any variables or generics, you should use
+/// [`static_assert!`]. If the assertion condition does not depend on variables, but does depend on
+/// generics, you should use [`const_assert!`]. See the [module documentation](self).
 ///
 /// # Examples
 ///
-/// These examples show that different types of [`assert!`] will trigger errors
-/// at different stage of compilation. It is preferred to err as early as
-/// possible, so [`static_assert!`] should be used whenever possible.
-/// ```ignore
-/// fn foo() {
-///     static_assert!(1 > 1); // Compile-time error
-///     build_assert!(1 > 1); // Build-time error
-///     assert!(1 > 1); // Run-time error
-/// }
 /// ```
+/// #[inline(always)] // Important.
+/// fn bar(n: usize) {
+///     build_assert!(n > 1);
+/// }
 ///
-/// When the condition refers to generic parameters or parameters of an inline function,
-/// [`static_assert!`] cannot be used. Use `build_assert!` in this scenario.
-/// ```
-/// fn foo<const N: usize>() {
-///     // `static_assert!(N > 1);` is not allowed
-///     build_assert!(N > 1); // Build-time check
-///     assert!(N > 1); // Run-time check
+/// fn foo() {
+///     bar(2);
 /// }
-/// ```
 ///
-/// When a condition depends on a function argument, the function must be annotated with
-/// `#[inline(always)]`. Without this attribute, the compiler may choose to not inline the
-/// function, preventing it from optimizing out the error path.
-/// ```
-/// #[inline(always)]
-/// fn bar(n: usize) {
-///     // `static_assert!(n > 1);` is not allowed
-///     build_assert!(n > 1); // Build-time check
-///     assert!(n > 1); // Run-time check
+/// #[inline(always)] // Important.
+/// const fn const_bar(n: usize) {
+///     build_assert!(n > 1);
 /// }
-/// ```
 ///
-/// [`static_assert!`]: crate::static_assert!
+/// const _: () = const_bar(2);
+/// ```
 #[macro_export]
 macro_rules! build_assert {
     ($cond:expr $(,)?) => {{
diff --git a/rust/kernel/clk.rs b/rust/kernel/clk.rs
index 4059aff34d09..7abbd0767d8c 100644
--- a/rust/kernel/clk.rs
+++ b/rust/kernel/clk.rs
@@ -128,6 +128,13 @@ mod common_clk {
     #[repr(transparent)]
     pub struct Clk(*mut bindings::clk);
 
+    // SAFETY: It is safe to call `clk_put` on another thread than where `clk_get` was called.
+    unsafe impl Send for Clk {}
+
+    // SAFETY: It is safe to call any combination of the `&self` methods in parallel, as the
+    // methods are synchronized internally.
+    unsafe impl Sync for Clk {}
+
     impl Clk {
         /// Gets [`Clk`] corresponding to a [`Device`] and a connection id.
         ///
diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs
index 76faa1ac8501..d8d26870bea2 100644
--- a/rust/kernel/cpufreq.rs
+++ b/rust/kernel/cpufreq.rs
@@ -401,6 +401,7 @@ impl TableBuilder {
 /// ```
 /// use kernel::cpufreq::{DEFAULT_TRANSITION_LATENCY_NS, Policy};
 ///
+/// #[allow(clippy::double_parens, reason = "False positive before 1.92.0")]
 /// fn update_policy(policy: &mut Policy) {
 ///     policy
 ///         .set_dvfs_possible_from_any_cpu(true)
@@ -1256,18 +1257,17 @@ impl<T: Driver> Registration<T> {
     /// # Safety
     ///
     /// - This function may only be called from the cpufreq C infrastructure.
+    /// - The pointer arguments must be valid pointers.
     unsafe extern "C" fn adjust_perf_callback(
-        cpu: c_uint,
+        ptr: *mut bindings::cpufreq_policy,
         min_perf: c_ulong,
         target_perf: c_ulong,
         capacity: c_ulong,
     ) {
-        // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number.
-        let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) };
-
-        if let Ok(mut policy) = PolicyCpu::from_cpu(cpu_id) {
-            T::adjust_perf(&mut policy, min_perf, target_perf, capacity);
-        }
+        // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
+        // lifetime of `policy`.
+        let policy = unsafe { Policy::from_raw_mut(ptr) };
+        T::adjust_perf(policy, min_perf, target_perf, capacity);
     }
 
     /// Driver's `get_intermediate` callback.
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
index 94e0548e7687..6d5396a43ebe 100644
--- a/rust/kernel/device.rs
+++ b/rust/kernel/device.rs
@@ -489,6 +489,17 @@ impl<Ctx: DeviceContext> Device<Ctx> {
         // defined as a `#[repr(transparent)]` wrapper around `fwnode_handle`.
         Some(unsafe { &*fwnode_handle.cast() })
     }
+
+    /// Returns the name of the device.
+    ///
+    /// This is the kobject name of the device, or its initial name if the kobject is not yet
+    /// available.
+    #[inline]
+    pub fn name(&self) -> &CStr {
+        // SAFETY: By its type invariant `self.as_raw()` is a valid pointer to a `struct device`.
+        // The returned string is valid for the lifetime of the device.
+        unsafe { CStr::from_char_ptr(bindings::dev_name(self.as_raw())) }
+    }
 }
 
 // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic
@@ -575,7 +586,7 @@ pub struct CoreInternal;
 /// The bound context indicates that for the entire duration of the lifetime of a [`Device<Bound>`]
 /// reference, the [`Device`] is guaranteed to be bound to a driver.
 ///
-/// Some APIs, such as [`dma::CoherentAllocation`] or [`Devres`] rely on the [`Device`] to be bound,
+/// Some APIs, such as [`dma::Coherent`] or [`Devres`] rely on the [`Device`] to be bound,
 /// which can be proven with the [`Bound`] device context.
 ///
 /// Any abstraction that can guarantee a scope where the corresponding bus device is bound, should
@@ -584,7 +595,7 @@ pub struct CoreInternal;
 ///
 /// [`Devres`]: kernel::devres::Devres
 /// [`Devres::access`]: kernel::devres::Devres::access
-/// [`dma::CoherentAllocation`]: kernel::dma::CoherentAllocation
+/// [`dma::Coherent`]: kernel::dma::Coherent
 pub struct Bound;
 
 mod private {
diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs
index 6afe196be42c..9e5f93aed20c 100644
--- a/rust/kernel/devres.rs
+++ b/rust/kernel/devres.rs
@@ -23,9 +23,22 @@ use crate::{
         rcu,
         Arc, //
     },
-    types::ForeignOwnable,
+    types::{
+        ForeignOwnable,
+        Opaque, //
+    },
 };
 
+/// Inner type that embeds a `struct devres_node` and the `Revocable<T>`.
+#[repr(C)]
+#[pin_data]
+struct Inner<T> {
+    #[pin]
+    node: Opaque<bindings::devres_node>,
+    #[pin]
+    data: Revocable<T>,
+}
+
 /// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to
 /// manage their lifetime.
 ///
@@ -111,12 +124,64 @@ use crate::{
 /// ```
 pub struct Devres<T: Send> {
     dev: ARef<Device>,
-    /// Pointer to [`Self::devres_callback`].
-    ///
-    /// Has to be stored, since Rust does not guarantee to always return the same address for a
-    /// function. However, the C API uses the address as a key.
-    callback: unsafe extern "C" fn(*mut c_void),
-    data: Arc<Revocable<T>>,
+    inner: Arc<Inner<T>>,
+}
+
+// Calling the FFI functions from the `base` module directly from the `Devres<T>` impl may result in
+// them being called directly from driver modules. This happens since the Rust compiler will use
+// monomorphisation, so it might happen that functions are instantiated within the calling driver
+// module. For now, work around this with `#[inline(never)]` helpers.
+//
+// TODO: Remove once a more generic solution has been implemented. For instance, we may be able to
+// leverage `bindgen` to take care of this depending on whether a symbol is (already) exported.
+mod base {
+    use kernel::{
+        bindings,
+        prelude::*, //
+    };
+
+    #[inline(never)]
+    #[allow(clippy::missing_safety_doc)]
+    pub(super) unsafe fn devres_node_init(
+        node: *mut bindings::devres_node,
+        release: bindings::dr_node_release_t,
+        free: bindings::dr_node_free_t,
+    ) {
+        // SAFETY: Safety requirements are the same as `bindings::devres_node_init`.
+        unsafe { bindings::devres_node_init(node, release, free) }
+    }
+
+    #[inline(never)]
+    #[allow(clippy::missing_safety_doc)]
+    pub(super) unsafe fn devres_set_node_dbginfo(
+        node: *mut bindings::devres_node,
+        name: *const c_char,
+        size: usize,
+    ) {
+        // SAFETY: Safety requirements are the same as `bindings::devres_set_node_dbginfo`.
+        unsafe { bindings::devres_set_node_dbginfo(node, name, size) }
+    }
+
+    #[inline(never)]
+    #[allow(clippy::missing_safety_doc)]
+    pub(super) unsafe fn devres_node_add(
+        dev: *mut bindings::device,
+        node: *mut bindings::devres_node,
+    ) {
+        // SAFETY: Safety requirements are the same as `bindings::devres_node_add`.
+        unsafe { bindings::devres_node_add(dev, node) }
+    }
+
+    #[must_use]
+    #[inline(never)]
+    #[allow(clippy::missing_safety_doc)]
+    pub(super) unsafe fn devres_node_remove(
+        dev: *mut bindings::device,
+        node: *mut bindings::devres_node,
+    ) -> bool {
+        // SAFETY: Safety requirements are the same as `bindings::devres_node_remove`.
+        unsafe { bindings::devres_node_remove(dev, node) }
+    }
 }
 
 impl<T: Send> Devres<T> {
@@ -128,58 +193,86 @@ impl<T: Send> Devres<T> {
     where
         Error: From<E>,
     {
-        let callback = Self::devres_callback;
-        let data = Arc::pin_init(Revocable::new(data), GFP_KERNEL)?;
-        let devres_data = data.clone();
+        let inner = Arc::pin_init::<Error>(
+            try_pin_init!(Inner {
+                node <- Opaque::ffi_init(|node: *mut bindings::devres_node| {
+                    // SAFETY: `node` is a valid pointer to an uninitialized `struct devres_node`.
+                    unsafe {
+                        base::devres_node_init(
+                            node,
+                            Some(Self::devres_node_release),
+                            Some(Self::devres_node_free_node),
+                        )
+                    };
+
+                    // SAFETY: `node` is a valid pointer to an uninitialized `struct devres_node`.
+                    unsafe {
+                        base::devres_set_node_dbginfo(
+                            node,
+                            // TODO: Use `core::any::type_name::<T>()` once it is a `const fn`,
+                            // such that we can convert the `&str` to a `&CStr` at compile-time.
+                            c"Devres<T>".as_char_ptr(),
+                            core::mem::size_of::<Revocable<T>>(),
+                        )
+                    };
+                }),
+                data <- Revocable::new(data),
+            }),
+            GFP_KERNEL,
+        )?;
 
         // SAFETY:
-        // - `dev.as_raw()` is a pointer to a valid bound device.
-        // - `data` is guaranteed to be a valid for the duration of the lifetime of `Self`.
-        // - `devm_add_action()` is guaranteed not to call `callback` for the entire lifetime of
-        //   `dev`.
-        to_result(unsafe {
-            bindings::devm_add_action(
-                dev.as_raw(),
-                Some(callback),
-                Arc::as_ptr(&data).cast_mut().cast(),
-            )
-        })?;
-
-        // `devm_add_action()` was successful and has consumed the reference count.
-        core::mem::forget(devres_data);
+        // - `dev` is a valid pointer to a bound `struct device`.
+        // - `node` is a valid pointer to a `struct devres_node`.
+        // - `devres_node_add()` is guaranteed not to call `devres_node_release()` for the entire
+        //    lifetime of `dev`.
+        unsafe { base::devres_node_add(dev.as_raw(), inner.node.get()) };
+
+        // Take additional reference count for `devres_node_add()`.
+        core::mem::forget(inner.clone());
 
         Ok(Self {
             dev: dev.into(),
-            callback,
-            data,
+            inner,
         })
     }
 
     fn data(&self) -> &Revocable<T> {
-        &self.data
+        &self.inner.data
     }
 
     #[allow(clippy::missing_safety_doc)]
-    unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) {
-        // SAFETY: In `Self::new` we've passed a valid pointer of `Revocable<T>` to
-        // `devm_add_action()`, hence `ptr` must be a valid pointer to `Revocable<T>`.
-        let data = unsafe { Arc::from_raw(ptr.cast::<Revocable<T>>()) };
+    unsafe extern "C" fn devres_node_release(
+        _dev: *mut bindings::device,
+        node: *mut bindings::devres_node,
+    ) {
+        let node = Opaque::cast_from(node);
+
+        // SAFETY: `node` is in the same allocation as its container.
+        let inner = unsafe { kernel::container_of!(node, Inner<T>, node) };
+
+        // SAFETY: `inner` is a valid `Inner<T>` pointer.
+        let inner = unsafe { &*inner };
+
+        inner.data.revoke();
+    }
+
+    #[allow(clippy::missing_safety_doc)]
+    unsafe extern "C" fn devres_node_free_node(node: *mut bindings::devres_node) {
+        let node = Opaque::cast_from(node);
+
+        // SAFETY: `node` is in the same allocation as its container.
+        let inner = unsafe { kernel::container_of!(node, Inner<T>, node) };
 
-        data.revoke();
+        // SAFETY: `inner` points to the entire `Inner<T>` allocation.
+        drop(unsafe { Arc::from_raw(inner) });
     }
 
-    fn remove_action(&self) -> bool {
+    fn remove_node(&self) -> bool {
         // SAFETY:
-        // - `self.dev` is a valid `Device`,
-        // - the `action` and `data` pointers are the exact same ones as given to
-        //   `devm_add_action()` previously,
-        (unsafe {
-            bindings::devm_remove_action_nowarn(
-                self.dev.as_raw(),
-                Some(self.callback),
-                core::ptr::from_ref(self.data()).cast_mut().cast(),
-            )
-        } == 0)
+        // - `self.device().as_raw()` is a valid pointer to a bound `struct device`.
+        // - `self.inner.node.get()` is a valid pointer to a `struct devres_node`.
+        unsafe { base::devres_node_remove(self.device().as_raw(), self.inner.node.get()) }
     }
 
     /// Return a reference of the [`Device`] this [`Devres`] instance has been created with.
@@ -261,12 +354,12 @@ impl<T: Send> Drop for Devres<T> {
         // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data
         // anymore, hence it is safe not to wait for the grace period to finish.
         if unsafe { self.data().revoke_nosync() } {
-            // We revoked `self.data` before the devres action did, hence try to remove it.
-            if self.remove_action() {
+            // We revoked `self.data` before devres did, hence try to remove it.
+            if self.remove_node() {
                 // SAFETY: In `Self::new` we have taken an additional reference count of `self.data`
-                // for `devm_add_action()`. Since `remove_action()` was successful, we have to drop
+                // for `devres_node_add()`. Since `remove_node()` was successful, we have to drop
                 // this additional reference count.
-                drop(unsafe { Arc::from_raw(Arc::as_ptr(&self.data)) });
+                drop(unsafe { Arc::from_raw(Arc::as_ptr(&self.inner)) });
             }
         }
     }
diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs
index 909d56fd5118..4995ee5dc689 100644
--- a/rust/kernel/dma.rs
+++ b/rust/kernel/dma.rs
@@ -5,14 +5,31 @@
 //! C header: [`include/linux/dma-mapping.h`](srctree/include/linux/dma-mapping.h)
 
 use crate::{
-    bindings, build_assert, device,
-    device::{Bound, Core},
-    error::{to_result, Result},
+    bindings,
+    debugfs,
+    device::{
+        self,
+        Bound,
+        Core, //
+    },
+    error::to_result,
+    fs::file,
     prelude::*,
+    ptr::KnownSize,
     sync::aref::ARef,
-    transmute::{AsBytes, FromBytes},
+    transmute::{
+        AsBytes,
+        FromBytes, //
+    }, //
+    uaccess::UserSliceWriter,
+};
+use core::{
+    ops::{
+        Deref,
+        DerefMut, //
+    },
+    ptr::NonNull, //
 };
-use core::ptr::NonNull;
 
 /// DMA address type.
 ///
@@ -39,7 +56,7 @@ pub trait Device: AsRef<device::Device<Core>> {
     /// # Safety
     ///
     /// This method must not be called concurrently with any DMA allocation or mapping primitives,
-    /// such as [`CoherentAllocation::alloc_attrs`].
+    /// such as [`Coherent::zeroed`].
     unsafe fn dma_set_mask(&self, mask: DmaMask) -> Result {
         // SAFETY:
         // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid.
@@ -56,7 +73,7 @@ pub trait Device: AsRef<device::Device<Core>> {
     /// # Safety
     ///
     /// This method must not be called concurrently with any DMA allocation or mapping primitives,
-    /// such as [`CoherentAllocation::alloc_attrs`].
+    /// such as [`Coherent::zeroed`].
     unsafe fn dma_set_coherent_mask(&self, mask: DmaMask) -> Result {
         // SAFETY:
         // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid.
@@ -75,7 +92,7 @@ pub trait Device: AsRef<device::Device<Core>> {
     /// # Safety
     ///
     /// This method must not be called concurrently with any DMA allocation or mapping primitives,
-    /// such as [`CoherentAllocation::alloc_attrs`].
+    /// such as [`Coherent::zeroed`].
     unsafe fn dma_set_mask_and_coherent(&self, mask: DmaMask) -> Result {
         // SAFETY:
         // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid.
@@ -94,7 +111,7 @@ pub trait Device: AsRef<device::Device<Core>> {
     /// # Safety
     ///
     /// This method must not be called concurrently with any DMA allocation or mapping primitives,
-    /// such as [`CoherentAllocation::alloc_attrs`].
+    /// such as [`Coherent::zeroed`].
     unsafe fn dma_set_max_seg_size(&self, size: u32) {
         // SAFETY:
         // - By the type invariant of `device::Device`, `self.as_ref().as_raw()` is valid.
@@ -194,12 +211,12 @@ impl DmaMask {
 ///
 /// ```
 /// # use kernel::device::{Bound, Device};
-/// use kernel::dma::{attrs::*, CoherentAllocation};
+/// use kernel::dma::{attrs::*, Coherent};
 ///
 /// # fn test(dev: &Device<Bound>) -> Result {
 /// let attribs = DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_WARN;
-/// let c: CoherentAllocation<u64> =
-///     CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, attribs)?;
+/// let c: Coherent<[u64]> =
+///     Coherent::zeroed_slice_with_attrs(dev, 4, GFP_KERNEL, attribs)?;
 /// # Ok::<(), Error>(()) }
 /// ```
 #[derive(Clone, Copy, PartialEq)]
@@ -250,9 +267,6 @@ pub mod attrs {
     /// Specifies that writes to the mapping may be buffered to improve performance.
     pub const DMA_ATTR_WRITE_COMBINE: Attrs = Attrs(bindings::DMA_ATTR_WRITE_COMBINE);
 
-    /// Lets the platform to avoid creating a kernel virtual mapping for the allocated buffer.
-    pub const DMA_ATTR_NO_KERNEL_MAPPING: Attrs = Attrs(bindings::DMA_ATTR_NO_KERNEL_MAPPING);
-
     /// Allows platform code to skip synchronization of the CPU cache for the given buffer assuming
     /// that it has been already transferred to 'device' domain.
     pub const DMA_ATTR_SKIP_CPU_SYNC: Attrs = Attrs(bindings::DMA_ATTR_SKIP_CPU_SYNC);
@@ -344,23 +358,228 @@ impl From<DataDirection> for bindings::dma_data_direction {
     }
 }
 
+/// CPU-owned DMA allocation that can be converted into a device-shared [`Coherent`] object.
+///
+/// Unlike [`Coherent`], a [`CoherentBox`] is guaranteed to be fully owned by the CPU -- its DMA
+/// address is not exposed and it cannot be accessed by a device. This means it can safely be used
+/// like a normal boxed allocation (e.g. direct reads, writes, and mutable slices are all safe).
+///
+/// A typical use is to allocate a [`CoherentBox`], populate it with normal CPU access, and then
+/// convert it into a [`Coherent`] object to share it with the device.
+///
+/// # Examples
+///
+/// `CoherentBox<T>`:
+///
+/// ```
+/// # use kernel::device::{
+/// #     Bound,
+/// #     Device,
+/// # };
+/// use kernel::dma::{attrs::*,
+///     Coherent,
+///     CoherentBox,
+/// };
+///
+/// # fn test(dev: &Device<Bound>) -> Result {
+/// let mut dmem: CoherentBox<u64> = CoherentBox::zeroed(dev, GFP_KERNEL)?;
+/// *dmem = 42;
+/// let dmem: Coherent<u64> = dmem.into();
+/// # Ok::<(), Error>(()) }
+/// ```
+///
+/// `CoherentBox<[T]>`:
+///
+///
+/// ```
+/// # use kernel::device::{
+/// #     Bound,
+/// #     Device,
+/// # };
+/// use kernel::dma::{attrs::*,
+///     Coherent,
+///     CoherentBox,
+/// };
+///
+/// # fn test(dev: &Device<Bound>) -> Result {
+/// let mut dmem: CoherentBox<[u64]> = CoherentBox::zeroed_slice(dev, 4, GFP_KERNEL)?;
+/// dmem.fill(42);
+/// let dmem: Coherent<[u64]> = dmem.into();
+/// # Ok::<(), Error>(()) }
+/// ```
+pub struct CoherentBox<T: KnownSize + ?Sized>(Coherent<T>);
+
+impl<T: AsBytes + FromBytes> CoherentBox<[T]> {
+    /// [`CoherentBox`] variant of [`Coherent::zeroed_slice_with_attrs`].
+    #[inline]
+    pub fn zeroed_slice_with_attrs(
+        dev: &device::Device<Bound>,
+        count: usize,
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Self> {
+        Coherent::zeroed_slice_with_attrs(dev, count, gfp_flags, dma_attrs).map(Self)
+    }
+
+    /// Same as [CoherentBox::zeroed_slice_with_attrs], but with `dma::Attrs(0)`.
+    #[inline]
+    pub fn zeroed_slice(
+        dev: &device::Device<Bound>,
+        count: usize,
+        gfp_flags: kernel::alloc::Flags,
+    ) -> Result<Self> {
+        Self::zeroed_slice_with_attrs(dev, count, gfp_flags, Attrs(0))
+    }
+
+    /// Initializes the element at `i` using the given initializer.
+    ///
+    /// Returns `EINVAL` if `i` is out of bounds.
+    pub fn init_at<E>(&mut self, i: usize, init: impl Init<T, E>) -> Result
+    where
+        Error: From<E>,
+    {
+        if i >= self.0.len() {
+            return Err(EINVAL);
+        }
+
+        let ptr = &raw mut self[i];
+
+        // SAFETY:
+        // - `ptr` is valid, properly aligned, and within this allocation.
+        // - `T: AsBytes + FromBytes` guarantees all bit patterns are valid, so partial writes on
+        //   error cannot leave the element in an invalid state.
+        // - The DMA address has not been exposed yet, so there is no concurrent device access.
+        unsafe { init.__init(ptr)? };
+
+        Ok(())
+    }
+
+    /// Allocates a region of coherent memory of the same size as `data` and initializes it with a
+    /// copy of its contents.
+    ///
+    /// This is the [`CoherentBox`] variant of [`Coherent::from_slice_with_attrs`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use core::ops::Deref;
+    ///
+    /// # use kernel::device::{Bound, Device};
+    /// use kernel::dma::{
+    ///     attrs::*,
+    ///     CoherentBox
+    /// };
+    ///
+    /// # fn test(dev: &Device<Bound>) -> Result {
+    /// let data = [0u8, 1u8, 2u8, 3u8];
+    /// let c: CoherentBox<[u8]> =
+    ///     CoherentBox::from_slice_with_attrs(dev, &data, GFP_KERNEL, DMA_ATTR_NO_WARN)?;
+    ///
+    /// assert_eq!(c.deref(), &data);
+    /// # Ok::<(), Error>(()) }
+    /// ```
+    pub fn from_slice_with_attrs(
+        dev: &device::Device<Bound>,
+        data: &[T],
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Self>
+    where
+        T: Copy,
+    {
+        let mut slice = Self(Coherent::<T>::alloc_slice_with_attrs(
+            dev,
+            data.len(),
+            gfp_flags,
+            dma_attrs,
+        )?);
+
+        // PANIC: `slice` was created with length `data.len()`.
+        slice.copy_from_slice(data);
+
+        Ok(slice)
+    }
+
+    /// Performs the same functionality as [`CoherentBox::from_slice_with_attrs`], except the
+    /// `dma_attrs` is 0 by default.
+    #[inline]
+    pub fn from_slice(
+        dev: &device::Device<Bound>,
+        data: &[T],
+        gfp_flags: kernel::alloc::Flags,
+    ) -> Result<Self>
+    where
+        T: Copy,
+    {
+        Self::from_slice_with_attrs(dev, data, gfp_flags, Attrs(0))
+    }
+}
+
+impl<T: AsBytes + FromBytes> CoherentBox<T> {
+    /// Same as [`CoherentBox::zeroed_slice_with_attrs`], but for a single element.
+    #[inline]
+    pub fn zeroed_with_attrs(
+        dev: &device::Device<Bound>,
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Self> {
+        Coherent::zeroed_with_attrs(dev, gfp_flags, dma_attrs).map(Self)
+    }
+
+    /// Same as [`CoherentBox::zeroed_slice`], but for a single element.
+    #[inline]
+    pub fn zeroed(dev: &device::Device<Bound>, gfp_flags: kernel::alloc::Flags) -> Result<Self> {
+        Self::zeroed_with_attrs(dev, gfp_flags, Attrs(0))
+    }
+}
+
+impl<T: KnownSize + ?Sized> Deref for CoherentBox<T> {
+    type Target = T;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        // SAFETY:
+        // - We have not exposed the DMA address yet, so there can't be any concurrent access by a
+        //   device.
+        // - We have exclusive access to `self.0`.
+        unsafe { self.0.as_ref() }
+    }
+}
+
+impl<T: AsBytes + FromBytes + KnownSize + ?Sized> DerefMut for CoherentBox<T> {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        // SAFETY:
+        // - We have not exposed the DMA address yet, so there can't be any concurrent access by a
+        //   device.
+        // - We have exclusive access to `self.0`.
+        unsafe { self.0.as_mut() }
+    }
+}
+
+impl<T: AsBytes + FromBytes + KnownSize + ?Sized> From<CoherentBox<T>> for Coherent<T> {
+    #[inline]
+    fn from(value: CoherentBox<T>) -> Self {
+        value.0
+    }
+}
+
 /// An abstraction of the `dma_alloc_coherent` API.
 ///
 /// This is an abstraction around the `dma_alloc_coherent` API which is used to allocate and map
 /// large coherent DMA regions.
 ///
-/// A [`CoherentAllocation`] instance contains a pointer to the allocated region (in the
+/// A [`Coherent`] instance contains a pointer to the allocated region (in the
 /// processor's virtual address space) and the device address which can be given to the device
-/// as the DMA address base of the region. The region is released once [`CoherentAllocation`]
+/// as the DMA address base of the region. The region is released once [`Coherent`]
 /// is dropped.
 ///
 /// # Invariants
 ///
-/// - For the lifetime of an instance of [`CoherentAllocation`], the `cpu_addr` is a valid pointer
+/// - For the lifetime of an instance of [`Coherent`], the `cpu_addr` is a valid pointer
 ///   to an allocated region of coherent memory and `dma_handle` is the DMA address base of the
 ///   region.
-/// - The size in bytes of the allocation is equal to `size_of::<T> * count`.
-/// - `size_of::<T> * count` fits into a `usize`.
+/// - The size in bytes of the allocation is equal to size information via pointer.
 // TODO
 //
 // DMA allocations potentially carry device resources (e.g.IOMMU mappings), hence for soundness
@@ -371,142 +590,43 @@ impl From<DataDirection> for bindings::dma_data_direction {
 // allocation from surviving device unbind; it would require RCU read side critical sections to
 // access the memory, which may require subsequent unnecessary copies.
 //
-// Hence, find a way to revoke the device resources of a `CoherentAllocation`, but not the
-// entire `CoherentAllocation` including the allocated memory itself.
-pub struct CoherentAllocation<T: AsBytes + FromBytes> {
+// Hence, find a way to revoke the device resources of a `Coherent`, but not the
+// entire `Coherent` including the allocated memory itself.
+pub struct Coherent<T: KnownSize + ?Sized> {
     dev: ARef<device::Device>,
     dma_handle: DmaAddress,
-    count: usize,
     cpu_addr: NonNull<T>,
     dma_attrs: Attrs,
 }
 
-impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
-    /// Allocates a region of `size_of::<T> * count` of coherent memory.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// # use kernel::device::{Bound, Device};
-    /// use kernel::dma::{attrs::*, CoherentAllocation};
-    ///
-    /// # fn test(dev: &Device<Bound>) -> Result {
-    /// let c: CoherentAllocation<u64> =
-    ///     CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, DMA_ATTR_NO_WARN)?;
-    /// # Ok::<(), Error>(()) }
-    /// ```
-    pub fn alloc_attrs(
-        dev: &device::Device<Bound>,
-        count: usize,
-        gfp_flags: kernel::alloc::Flags,
-        dma_attrs: Attrs,
-    ) -> Result<CoherentAllocation<T>> {
-        build_assert!(
-            core::mem::size_of::<T>() > 0,
-            "It doesn't make sense for the allocated type to be a ZST"
-        );
-
-        let size = count
-            .checked_mul(core::mem::size_of::<T>())
-            .ok_or(EOVERFLOW)?;
-        let mut dma_handle = 0;
-        // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`.
-        let addr = unsafe {
-            bindings::dma_alloc_attrs(
-                dev.as_raw(),
-                size,
-                &mut dma_handle,
-                gfp_flags.as_raw(),
-                dma_attrs.as_raw(),
-            )
-        };
-        let addr = NonNull::new(addr).ok_or(ENOMEM)?;
-        // INVARIANT:
-        // - We just successfully allocated a coherent region which is accessible for
-        //   `count` elements, hence the cpu address is valid. We also hold a refcounted reference
-        //   to the device.
-        // - The allocated `size` is equal to `size_of::<T> * count`.
-        // - The allocated `size` fits into a `usize`.
-        Ok(Self {
-            dev: dev.into(),
-            dma_handle,
-            count,
-            cpu_addr: addr.cast(),
-            dma_attrs,
-        })
-    }
-
-    /// Performs the same functionality as [`CoherentAllocation::alloc_attrs`], except the
-    /// `dma_attrs` is 0 by default.
-    pub fn alloc_coherent(
-        dev: &device::Device<Bound>,
-        count: usize,
-        gfp_flags: kernel::alloc::Flags,
-    ) -> Result<CoherentAllocation<T>> {
-        CoherentAllocation::alloc_attrs(dev, count, gfp_flags, Attrs(0))
-    }
-
-    /// Returns the number of elements `T` in this allocation.
-    ///
-    /// Note that this is not the size of the allocation in bytes, which is provided by
-    /// [`Self::size`].
-    pub fn count(&self) -> usize {
-        self.count
-    }
-
+impl<T: KnownSize + ?Sized> Coherent<T> {
     /// Returns the size in bytes of this allocation.
+    #[inline]
     pub fn size(&self) -> usize {
-        // INVARIANT: The type invariant of `Self` guarantees that `size_of::<T> * count` fits into
-        // a `usize`.
-        self.count * core::mem::size_of::<T>()
+        T::size(self.cpu_addr.as_ptr())
     }
 
-    /// Returns the base address to the allocated region in the CPU's virtual address space.
-    pub fn start_ptr(&self) -> *const T {
+    /// Returns the raw pointer to the allocated region in the CPU's virtual address space.
+    #[inline]
+    pub fn as_ptr(&self) -> *const T {
         self.cpu_addr.as_ptr()
     }
 
-    /// Returns the base address to the allocated region in the CPU's virtual address space as
+    /// Returns the raw pointer to the allocated region in the CPU's virtual address space as
     /// a mutable pointer.
-    pub fn start_ptr_mut(&mut self) -> *mut T {
+    #[inline]
+    pub fn as_mut_ptr(&self) -> *mut T {
         self.cpu_addr.as_ptr()
     }
 
     /// Returns a DMA handle which may be given to the device as the DMA address base of
     /// the region.
+    #[inline]
     pub fn dma_handle(&self) -> DmaAddress {
         self.dma_handle
     }
 
-    /// Returns a DMA handle starting at `offset` (in units of `T`) which may be given to the
-    /// device as the DMA address base of the region.
-    ///
-    /// Returns `EINVAL` if `offset` is not within the bounds of the allocation.
-    pub fn dma_handle_with_offset(&self, offset: usize) -> Result<DmaAddress> {
-        if offset >= self.count {
-            Err(EINVAL)
-        } else {
-            // INVARIANT: The type invariant of `Self` guarantees that `size_of::<T> * count` fits
-            // into a `usize`, and `offset` is inferior to `count`.
-            Ok(self.dma_handle + (offset * core::mem::size_of::<T>()) as DmaAddress)
-        }
-    }
-
-    /// Common helper to validate a range applied from the allocated region in the CPU's virtual
-    /// address space.
-    fn validate_range(&self, offset: usize, count: usize) -> Result {
-        if offset.checked_add(count).ok_or(EOVERFLOW)? > self.count {
-            return Err(EINVAL);
-        }
-        Ok(())
-    }
-
-    /// Returns the data from the region starting from `offset` as a slice.
-    /// `offset` and `count` are in units of `T`, not the number of bytes.
-    ///
-    /// For ringbuffer type of r/w access or use-cases where the pointer to the live data is needed,
-    /// [`CoherentAllocation::start_ptr`] or [`CoherentAllocation::start_ptr_mut`] could be used
-    /// instead.
+    /// Returns a reference to the data in the region.
     ///
     /// # Safety
     ///
@@ -514,19 +634,13 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
     ///   slice is live.
     /// * Callers must ensure that this call does not race with a write to the same region while
     ///   the returned slice is live.
-    pub unsafe fn as_slice(&self, offset: usize, count: usize) -> Result<&[T]> {
-        self.validate_range(offset, count)?;
-        // SAFETY:
-        // - The pointer is valid due to type invariant on `CoherentAllocation`,
-        //   we've just checked that the range and index is within bounds. The immutability of the
-        //   data is also guaranteed by the safety requirements of the function.
-        // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked
-        //   that `self.count` won't overflow early in the constructor.
-        Ok(unsafe { core::slice::from_raw_parts(self.start_ptr().add(offset), count) })
+    #[inline]
+    pub unsafe fn as_ref(&self) -> &T {
+        // SAFETY: per safety requirement.
+        unsafe { &*self.as_ptr() }
     }
 
-    /// Performs the same functionality as [`CoherentAllocation::as_slice`], except that a mutable
-    /// slice is returned.
+    /// Returns a mutable reference to the data in the region.
     ///
     /// # Safety
     ///
@@ -534,68 +648,11 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
     ///   slice is live.
     /// * Callers must ensure that this call does not race with a read or write to the same region
     ///   while the returned slice is live.
-    pub unsafe fn as_slice_mut(&mut self, offset: usize, count: usize) -> Result<&mut [T]> {
-        self.validate_range(offset, count)?;
-        // SAFETY:
-        // - The pointer is valid due to type invariant on `CoherentAllocation`,
-        //   we've just checked that the range and index is within bounds. The immutability of the
-        //   data is also guaranteed by the safety requirements of the function.
-        // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked
-        //   that `self.count` won't overflow early in the constructor.
-        Ok(unsafe { core::slice::from_raw_parts_mut(self.start_ptr_mut().add(offset), count) })
-    }
-
-    /// Writes data to the region starting from `offset`. `offset` is in units of `T`, not the
-    /// number of bytes.
-    ///
-    /// # Safety
-    ///
-    /// * Callers must ensure that this call does not race with a read or write to the same region
-    ///   that overlaps with this write.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// # fn test(alloc: &mut kernel::dma::CoherentAllocation<u8>) -> Result {
-    /// let somedata: [u8; 4] = [0xf; 4];
-    /// let buf: &[u8] = &somedata;
-    /// // SAFETY: There is no concurrent HW operation on the device and no other R/W access to the
-    /// // region.
-    /// unsafe { alloc.write(buf, 0)?; }
-    /// # Ok::<(), Error>(()) }
-    /// ```
-    pub unsafe fn write(&mut self, src: &[T], offset: usize) -> Result {
-        self.validate_range(offset, src.len())?;
-        // SAFETY:
-        // - The pointer is valid due to type invariant on `CoherentAllocation`
-        //   and we've just checked that the range and index is within bounds.
-        // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked
-        //   that `self.count` won't overflow early in the constructor.
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                src.as_ptr(),
-                self.start_ptr_mut().add(offset),
-                src.len(),
-            )
-        };
-        Ok(())
-    }
-
-    /// Returns a pointer to an element from the region with bounds checking. `offset` is in
-    /// units of `T`, not the number of bytes.
-    ///
-    /// Public but hidden since it should only be used from [`dma_read`] and [`dma_write`] macros.
-    #[doc(hidden)]
-    pub fn item_from_index(&self, offset: usize) -> Result<*mut T> {
-        if offset >= self.count {
-            return Err(EINVAL);
-        }
-        // SAFETY:
-        // - The pointer is valid due to type invariant on `CoherentAllocation`
-        // and we've just checked that the range and index is within bounds.
-        // - `offset` can't overflow since it is smaller than `self.count` and we've checked
-        // that `self.count` won't overflow early in the constructor.
-        Ok(unsafe { self.cpu_addr.as_ptr().add(offset) })
+    #[expect(clippy::mut_from_ref, reason = "unsafe to use API")]
+    #[inline]
+    pub unsafe fn as_mut(&self) -> &mut T {
+        // SAFETY: per safety requirement.
+        unsafe { &mut *self.as_mut_ptr() }
     }
 
     /// Reads the value of `field` and ensures that its type is [`FromBytes`].
@@ -645,18 +702,276 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
     }
 }
 
+impl<T: AsBytes + FromBytes> Coherent<T> {
+    /// Allocates a region of `T` of coherent memory.
+    fn alloc_with_attrs(
+        dev: &device::Device<Bound>,
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Self> {
+        const {
+            assert!(
+                core::mem::size_of::<T>() > 0,
+                "It doesn't make sense for the allocated type to be a ZST"
+            );
+        }
+
+        let mut dma_handle = 0;
+        // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`.
+        let addr = unsafe {
+            bindings::dma_alloc_attrs(
+                dev.as_raw(),
+                core::mem::size_of::<T>(),
+                &mut dma_handle,
+                gfp_flags.as_raw(),
+                dma_attrs.as_raw(),
+            )
+        };
+        let cpu_addr = NonNull::new(addr.cast()).ok_or(ENOMEM)?;
+        // INVARIANT:
+        // - We just successfully allocated a coherent region which is adequately sized for `T`,
+        //   hence the cpu address is valid.
+        // - We also hold a refcounted reference to the device.
+        Ok(Self {
+            dev: dev.into(),
+            dma_handle,
+            cpu_addr,
+            dma_attrs,
+        })
+    }
+
+    /// Allocates a region of type `T` of coherent memory.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use kernel::device::{
+    /// #     Bound,
+    /// #     Device,
+    /// # };
+    /// use kernel::dma::{
+    ///     attrs::*,
+    ///     Coherent,
+    /// };
+    ///
+    /// # fn test(dev: &Device<Bound>) -> Result {
+    /// let c: Coherent<[u64; 4]> =
+    ///     Coherent::zeroed_with_attrs(dev, GFP_KERNEL, DMA_ATTR_NO_WARN)?;
+    /// # Ok::<(), Error>(()) }
+    /// ```
+    #[inline]
+    pub fn zeroed_with_attrs(
+        dev: &device::Device<Bound>,
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Self> {
+        Self::alloc_with_attrs(dev, gfp_flags | __GFP_ZERO, dma_attrs)
+    }
+
+    /// Performs the same functionality as [`Coherent::zeroed_with_attrs`], except the
+    /// `dma_attrs` is 0 by default.
+    #[inline]
+    pub fn zeroed(dev: &device::Device<Bound>, gfp_flags: kernel::alloc::Flags) -> Result<Self> {
+        Self::zeroed_with_attrs(dev, gfp_flags, Attrs(0))
+    }
+
+    /// Same as [`Coherent::zeroed_with_attrs`], but instead of a zero-initialization the memory is
+    /// initialized with `init`.
+    pub fn init_with_attrs<E>(
+        dev: &device::Device<Bound>,
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+        init: impl Init<T, E>,
+    ) -> Result<Self>
+    where
+        Error: From<E>,
+    {
+        let dmem = Self::alloc_with_attrs(dev, gfp_flags, dma_attrs)?;
+        let ptr = dmem.as_mut_ptr();
+
+        // SAFETY:
+        // - `ptr` is valid, properly aligned, and points to exclusively owned memory.
+        // - If `__init` fails, `self` is dropped, which safely frees the underlying `Coherent`'s
+        //   DMA memory. `T: AsBytes + FromBytes` ensures there are no complex `Drop` requirements
+        //   we are bypassing.
+        unsafe { init.__init(ptr)? };
+
+        Ok(dmem)
+    }
+
+    /// Same as [`Coherent::zeroed`], but instead of a zero-initialization the memory is initialized
+    /// with `init`.
+    #[inline]
+    pub fn init<E>(
+        dev: &device::Device<Bound>,
+        gfp_flags: kernel::alloc::Flags,
+        init: impl Init<T, E>,
+    ) -> Result<Self>
+    where
+        Error: From<E>,
+    {
+        Self::init_with_attrs(dev, gfp_flags, Attrs(0), init)
+    }
+
+    /// Allocates a region of `[T; len]` of coherent memory.
+    fn alloc_slice_with_attrs(
+        dev: &device::Device<Bound>,
+        len: usize,
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Coherent<[T]>> {
+        const {
+            assert!(
+                core::mem::size_of::<T>() > 0,
+                "It doesn't make sense for the allocated type to be a ZST"
+            );
+        }
+
+        // `dma_alloc_attrs` cannot handle zero-length allocation, bail early.
+        if len == 0 {
+            Err(EINVAL)?;
+        }
+
+        let size = core::mem::size_of::<T>().checked_mul(len).ok_or(ENOMEM)?;
+        let mut dma_handle = 0;
+        // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`.
+        let addr = unsafe {
+            bindings::dma_alloc_attrs(
+                dev.as_raw(),
+                size,
+                &mut dma_handle,
+                gfp_flags.as_raw(),
+                dma_attrs.as_raw(),
+            )
+        };
+        let cpu_addr = NonNull::slice_from_raw_parts(NonNull::new(addr.cast()).ok_or(ENOMEM)?, len);
+        // INVARIANT:
+        // - We just successfully allocated a coherent region which is adequately sized for
+        //   `[T; len]`, hence the cpu address is valid.
+        // - We also hold a refcounted reference to the device.
+        Ok(Coherent {
+            dev: dev.into(),
+            dma_handle,
+            cpu_addr,
+            dma_attrs,
+        })
+    }
+
+    /// Allocates a zeroed region of type `T` of coherent memory.
+    ///
+    /// Unlike `Coherent::<[T; N]>::zeroed_with_attrs`, `Coherent::<T>::zeroed_slices` support
+    /// a runtime length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use kernel::device::{
+    /// #     Bound,
+    /// #     Device,
+    /// # };
+    /// use kernel::dma::{
+    ///     attrs::*,
+    ///     Coherent,
+    /// };
+    ///
+    /// # fn test(dev: &Device<Bound>) -> Result {
+    /// let c: Coherent<[u64]> =
+    ///     Coherent::zeroed_slice_with_attrs(dev, 4, GFP_KERNEL, DMA_ATTR_NO_WARN)?;
+    /// # Ok::<(), Error>(()) }
+    /// ```
+    #[inline]
+    pub fn zeroed_slice_with_attrs(
+        dev: &device::Device<Bound>,
+        len: usize,
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Coherent<[T]>> {
+        Coherent::alloc_slice_with_attrs(dev, len, gfp_flags | __GFP_ZERO, dma_attrs)
+    }
+
+    /// Performs the same functionality as [`Coherent::zeroed_slice_with_attrs`], except the
+    /// `dma_attrs` is 0 by default.
+    #[inline]
+    pub fn zeroed_slice(
+        dev: &device::Device<Bound>,
+        len: usize,
+        gfp_flags: kernel::alloc::Flags,
+    ) -> Result<Coherent<[T]>> {
+        Self::zeroed_slice_with_attrs(dev, len, gfp_flags, Attrs(0))
+    }
+
+    /// Allocates a region of coherent memory of the same size as `data` and initializes it with a
+    /// copy of its contents.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use kernel::device::{Bound, Device};
+    /// use kernel::dma::{
+    ///     attrs::*,
+    ///     Coherent
+    /// };
+    ///
+    /// # fn test(dev: &Device<Bound>) -> Result {
+    /// let data = [0u8, 1u8, 2u8, 3u8];
+    /// // `c` has the same content as `data`.
+    /// let c: Coherent<[u8]> =
+    ///     Coherent::from_slice_with_attrs(dev, &data, GFP_KERNEL, DMA_ATTR_NO_WARN)?;
+    ///
+    /// # Ok::<(), Error>(()) }
+    /// ```
+    #[inline]
+    pub fn from_slice_with_attrs(
+        dev: &device::Device<Bound>,
+        data: &[T],
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Coherent<[T]>>
+    where
+        T: Copy,
+    {
+        CoherentBox::from_slice_with_attrs(dev, data, gfp_flags, dma_attrs).map(Into::into)
+    }
+
+    /// Performs the same functionality as [`Coherent::from_slice_with_attrs`], except the
+    /// `dma_attrs` is 0 by default.
+    #[inline]
+    pub fn from_slice(
+        dev: &device::Device<Bound>,
+        data: &[T],
+        gfp_flags: kernel::alloc::Flags,
+    ) -> Result<Coherent<[T]>>
+    where
+        T: Copy,
+    {
+        Self::from_slice_with_attrs(dev, data, gfp_flags, Attrs(0))
+    }
+}
+
+impl<T> Coherent<[T]> {
+    /// Returns the number of elements `T` in this allocation.
+    ///
+    /// Note that this is not the size of the allocation in bytes, which is provided by
+    /// [`Self::size`].
+    #[inline]
+    #[expect(clippy::len_without_is_empty, reason = "Coherent slice is never empty")]
+    pub fn len(&self) -> usize {
+        self.cpu_addr.len()
+    }
+}
+
 /// Note that the device configured to do DMA must be halted before this object is dropped.
-impl<T: AsBytes + FromBytes> Drop for CoherentAllocation<T> {
+impl<T: KnownSize + ?Sized> Drop for Coherent<T> {
     fn drop(&mut self) {
-        let size = self.count * core::mem::size_of::<T>();
+        let size = T::size(self.cpu_addr.as_ptr());
         // SAFETY: Device pointer is guaranteed as valid by the type invariant on `Device`.
         // The cpu address, and the dma handle are valid due to the type invariants on
-        // `CoherentAllocation`.
+        // `Coherent`.
         unsafe {
             bindings::dma_free_attrs(
                 self.dev.as_raw(),
                 size,
-                self.start_ptr_mut().cast(),
+                self.cpu_addr.as_ptr().cast(),
                 self.dma_handle,
                 self.dma_attrs.as_raw(),
             )
@@ -664,17 +979,170 @@ impl<T: AsBytes + FromBytes> Drop for CoherentAllocation<T> {
     }
 }
 
-// SAFETY: It is safe to send a `CoherentAllocation` to another thread if `T`
+// SAFETY: It is safe to send a `Coherent` to another thread if `T`
 // can be sent to another thread.
-unsafe impl<T: AsBytes + FromBytes + Send> Send for CoherentAllocation<T> {}
+unsafe impl<T: KnownSize + Send + ?Sized> Send for Coherent<T> {}
+
+// SAFETY: Sharing `&Coherent` across threads is safe if `T` is `Sync`, because all
+// methods that access the buffer contents (`field_read`, `field_write`, `as_slice`,
+// `as_slice_mut`) are `unsafe`, and callers are responsible for ensuring no data races occur.
+// The safe methods only return metadata or raw pointers whose use requires `unsafe`.
+unsafe impl<T: KnownSize + ?Sized + AsBytes + FromBytes + Sync> Sync for Coherent<T> {}
+
+impl<T: KnownSize + AsBytes + ?Sized> debugfs::BinaryWriter for Coherent<T> {
+    fn write_to_slice(
+        &self,
+        writer: &mut UserSliceWriter,
+        offset: &mut file::Offset,
+    ) -> Result<usize> {
+        if offset.is_negative() {
+            return Err(EINVAL);
+        }
+
+        // If the offset is too large for a usize (e.g. on 32-bit platforms),
+        // then consider that as past EOF and just return 0 bytes.
+        let Ok(offset_val) = usize::try_from(*offset) else {
+            return Ok(0);
+        };
+
+        let count = self.size().saturating_sub(offset_val).min(writer.len());
+
+        writer.write_dma(self, offset_val, count)?;
+
+        *offset += count as i64;
+        Ok(count)
+    }
+}
+
+/// An opaque DMA allocation without a kernel virtual mapping.
+///
+/// Unlike [`Coherent`], a `CoherentHandle` does not provide CPU access to the allocated memory.
+/// The allocation is always performed with `DMA_ATTR_NO_KERNEL_MAPPING`, meaning no kernel
+/// virtual mapping is created for the buffer. The value returned by the C API as the CPU
+/// address is an opaque handle used only to free the allocation.
+///
+/// This is useful for buffers that are only ever accessed by hardware.
+///
+/// # Invariants
+///
+/// - `cpu_handle` holds the opaque handle returned by `dma_alloc_attrs` with
+///   `DMA_ATTR_NO_KERNEL_MAPPING` set, and is only valid for passing back to `dma_free_attrs`.
+/// - `dma_handle` is the corresponding bus address for device DMA.
+/// - `size` is the allocation size in bytes as passed to `dma_alloc_attrs`.
+/// - `dma_attrs` contains the attributes used for the allocation, always including
+///   `DMA_ATTR_NO_KERNEL_MAPPING`.
+pub struct CoherentHandle {
+    dev: ARef<device::Device>,
+    dma_handle: DmaAddress,
+    cpu_handle: NonNull<c_void>,
+    size: usize,
+    dma_attrs: Attrs,
+}
+
+impl CoherentHandle {
+    /// Allocates `size` bytes of coherent DMA memory without creating a kernel virtual mapping.
+    ///
+    /// Additional DMA attributes may be passed via `dma_attrs`; `DMA_ATTR_NO_KERNEL_MAPPING` is
+    /// always set implicitly.
+    ///
+    /// Returns `EINVAL` if `size` is zero, `ENOMEM` if the allocation fails.
+    pub fn alloc_with_attrs(
+        dev: &device::Device<Bound>,
+        size: usize,
+        gfp_flags: kernel::alloc::Flags,
+        dma_attrs: Attrs,
+    ) -> Result<Self> {
+        if size == 0 {
+            return Err(EINVAL);
+        }
+
+        let dma_attrs = dma_attrs | Attrs(bindings::DMA_ATTR_NO_KERNEL_MAPPING);
+        let mut dma_handle = 0;
+        // SAFETY: `dev.as_raw()` is valid by the type invariant on `device::Device`.
+        let cpu_handle = unsafe {
+            bindings::dma_alloc_attrs(
+                dev.as_raw(),
+                size,
+                &mut dma_handle,
+                gfp_flags.as_raw(),
+                dma_attrs.as_raw(),
+            )
+        };
+
+        let cpu_handle = NonNull::new(cpu_handle).ok_or(ENOMEM)?;
+
+        // INVARIANT: `cpu_handle` is the opaque handle from a successful `dma_alloc_attrs` call
+        // with `DMA_ATTR_NO_KERNEL_MAPPING`, `dma_handle` is the corresponding DMA address,
+        // and we hold a refcounted reference to the device.
+        Ok(Self {
+            dev: dev.into(),
+            dma_handle,
+            cpu_handle,
+            size,
+            dma_attrs,
+        })
+    }
+
+    /// Allocates `size` bytes of coherent DMA memory without creating a kernel virtual mapping.
+    #[inline]
+    pub fn alloc(
+        dev: &device::Device<Bound>,
+        size: usize,
+        gfp_flags: kernel::alloc::Flags,
+    ) -> Result<Self> {
+        Self::alloc_with_attrs(dev, size, gfp_flags, Attrs(0))
+    }
+
+    /// Returns the DMA handle for this allocation.
+    ///
+    /// This address can be programmed into device hardware for DMA access.
+    #[inline]
+    pub fn dma_handle(&self) -> DmaAddress {
+        self.dma_handle
+    }
+
+    /// Returns the size in bytes of this allocation.
+    #[inline]
+    pub fn size(&self) -> usize {
+        self.size
+    }
+}
+
+impl Drop for CoherentHandle {
+    fn drop(&mut self) {
+        // SAFETY: All values are valid by the type invariants on `CoherentHandle`.
+        // `cpu_handle` is the opaque handle from `dma_alloc_attrs` and is passed back unchanged.
+        unsafe {
+            bindings::dma_free_attrs(
+                self.dev.as_raw(),
+                self.size,
+                self.cpu_handle.as_ptr(),
+                self.dma_handle,
+                self.dma_attrs.as_raw(),
+            )
+        }
+    }
+}
+
+// SAFETY: `CoherentHandle` only holds a device reference, a DMA handle, an opaque CPU handle,
+// and a size. None of these are tied to a specific thread.
+unsafe impl Send for CoherentHandle {}
+
+// SAFETY: `CoherentHandle` provides no CPU access to the underlying allocation. The only
+// operations on `&CoherentHandle` are reading the DMA handle and size, both of which are
+// plain `Copy` values.
+unsafe impl Sync for CoherentHandle {}
 
 /// Reads a field of an item from an allocated region of structs.
 ///
+/// The syntax is of the form `kernel::dma_read!(dma, proj)` where `dma` is an expression evaluating
+/// to a [`Coherent`] and `proj` is a [projection specification](kernel::ptr::project!).
+///
 /// # Examples
 ///
 /// ```
 /// use kernel::device::Device;
-/// use kernel::dma::{attrs::*, CoherentAllocation};
+/// use kernel::dma::{attrs::*, Coherent};
 ///
 /// struct MyStruct { field: u32, }
 ///
@@ -683,42 +1151,35 @@ unsafe impl<T: AsBytes + FromBytes + Send> Send for CoherentAllocation<T> {}
 /// // SAFETY: Instances of `MyStruct` have no uninitialized portions.
 /// unsafe impl kernel::transmute::AsBytes for MyStruct{};
 ///
-/// # fn test(alloc: &kernel::dma::CoherentAllocation<MyStruct>) -> Result {
-/// let whole = kernel::dma_read!(alloc[2]);
-/// let field = kernel::dma_read!(alloc[1].field);
+/// # fn test(alloc: &kernel::dma::Coherent<[MyStruct]>) -> Result {
+/// let whole = kernel::dma_read!(alloc, [2]?);
+/// let field = kernel::dma_read!(alloc, [1]?.field);
 /// # Ok::<(), Error>(()) }
 /// ```
 #[macro_export]
 macro_rules! dma_read {
-    ($dma:expr, $idx: expr, $($field:tt)*) => {{
-        (|| -> ::core::result::Result<_, $crate::error::Error> {
-            let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?;
-            // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be
-            // dereferenced. The compiler also further validates the expression on whether `field`
-            // is a member of `item` when expanded by the macro.
-            unsafe {
-                let ptr_field = ::core::ptr::addr_of!((*item) $($field)*);
-                ::core::result::Result::Ok(
-                    $crate::dma::CoherentAllocation::field_read(&$dma, ptr_field)
-                )
-            }
-        })()
+    ($dma:expr, $($proj:tt)*) => {{
+        let dma = &$dma;
+        let ptr = $crate::ptr::project!(
+            $crate::dma::Coherent::as_ptr(dma), $($proj)*
+        );
+        // SAFETY: The pointer created by the projection is within the DMA region.
+        unsafe { $crate::dma::Coherent::field_read(dma, ptr) }
     }};
-    ($dma:ident [ $idx:expr ] $($field:tt)* ) => {
-        $crate::dma_read!($dma, $idx, $($field)*)
-    };
-    ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => {
-        $crate::dma_read!($($dma).*, $idx, $($field)*)
-    };
 }
 
 /// Writes to a field of an item from an allocated region of structs.
 ///
+/// The syntax is of the form `kernel::dma_write!(dma, proj, val)` where `dma` is an expression
+/// evaluating to a [`Coherent`], `proj` is a
+/// [projection specification](kernel::ptr::project!), and `val` is the value to be written to the
+/// projected location.
+///
 /// # Examples
 ///
 /// ```
 /// use kernel::device::Device;
-/// use kernel::dma::{attrs::*, CoherentAllocation};
+/// use kernel::dma::{attrs::*, Coherent};
 ///
 /// struct MyStruct { member: u32, }
 ///
@@ -727,38 +1188,32 @@ macro_rules! dma_read {
 /// // SAFETY: Instances of `MyStruct` have no uninitialized portions.
 /// unsafe impl kernel::transmute::AsBytes for MyStruct{};
 ///
-/// # fn test(alloc: &kernel::dma::CoherentAllocation<MyStruct>) -> Result {
-/// kernel::dma_write!(alloc[2].member = 0xf);
-/// kernel::dma_write!(alloc[1] = MyStruct { member: 0xf });
+/// # fn test(alloc: &kernel::dma::Coherent<[MyStruct]>) -> Result {
+/// kernel::dma_write!(alloc, [2]?.member, 0xf);
+/// kernel::dma_write!(alloc, [1]?, MyStruct { member: 0xf });
 /// # Ok::<(), Error>(()) }
 /// ```
 #[macro_export]
 macro_rules! dma_write {
-    ($dma:ident [ $idx:expr ] $($field:tt)*) => {{
-        $crate::dma_write!($dma, $idx, $($field)*)
-    }};
-    ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => {{
-        $crate::dma_write!($($dma).*, $idx, $($field)*)
+    (@parse [$dma:expr] [$($proj:tt)*] [, $val:expr]) => {{
+        let dma = &$dma;
+        let ptr = $crate::ptr::project!(
+            mut $crate::dma::Coherent::as_mut_ptr(dma), $($proj)*
+        );
+        let val = $val;
+        // SAFETY: The pointer created by the projection is within the DMA region.
+        unsafe { $crate::dma::Coherent::field_write(dma, ptr, val) }
     }};
-    ($dma:expr, $idx: expr, = $val:expr) => {
-        (|| -> ::core::result::Result<_, $crate::error::Error> {
-            let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?;
-            // SAFETY: `item_from_index` ensures that `item` is always a valid item.
-            unsafe { $crate::dma::CoherentAllocation::field_write(&$dma, item, $val) }
-            ::core::result::Result::Ok(())
-        })()
+    (@parse [$dma:expr] [$($proj:tt)*] [.$field:tt $($rest:tt)*]) => {
+        $crate::dma_write!(@parse [$dma] [$($proj)* .$field] [$($rest)*])
+    };
+    (@parse [$dma:expr] [$($proj:tt)*] [[$index:expr]? $($rest:tt)*]) => {
+        $crate::dma_write!(@parse [$dma] [$($proj)* [$index]?] [$($rest)*])
+    };
+    (@parse [$dma:expr] [$($proj:tt)*] [[$index:expr] $($rest:tt)*]) => {
+        $crate::dma_write!(@parse [$dma] [$($proj)* [$index]] [$($rest)*])
     };
-    ($dma:expr, $idx: expr, $(.$field:ident)* = $val:expr) => {
-        (|| -> ::core::result::Result<_, $crate::error::Error> {
-            let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?;
-            // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be
-            // dereferenced. The compiler also further validates the expression on whether `field`
-            // is a member of `item` when expanded by the macro.
-            unsafe {
-                let ptr_field = ::core::ptr::addr_of_mut!((*item) $(.$field)*);
-                $crate::dma::CoherentAllocation::field_write(&$dma, ptr_field, $val)
-            }
-            ::core::result::Result::Ok(())
-        })()
+    ($dma:expr, $($rest:tt)*) => {
+        $crate::dma_write!(@parse [$dma] [] [$($rest)*])
     };
 }
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index 3ce8f62a0056..adbafe8db54d 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -6,15 +6,34 @@
 
 use crate::{
     alloc::allocator::Kmalloc,
-    bindings, device, drm,
-    drm::driver::AllocImpl,
+    bindings, device,
+    drm::{
+        self,
+        driver::AllocImpl, //
+    },
     error::from_err_ptr,
-    error::Result,
     prelude::*,
-    sync::aref::{ARef, AlwaysRefCounted},
+    sync::aref::{
+        ARef,
+        AlwaysRefCounted, //
+    },
     types::Opaque,
+    workqueue::{
+        HasDelayedWork,
+        HasWork,
+        Work,
+        WorkItem, //
+    },
+};
+use core::{
+    alloc::Layout,
+    mem,
+    ops::Deref,
+    ptr::{
+        self,
+        NonNull, //
+    },
 };
-use core::{alloc::Layout, mem, ops::Deref, ptr, ptr::NonNull};
 
 #[cfg(CONFIG_DRM_LEGACY)]
 macro_rules! drm_legacy_fields {
@@ -227,3 +246,61 @@ unsafe impl<T: drm::Driver> Send for Device<T> {}
 // SAFETY: A `drm::Device` can be shared among threads because all immutable methods are protected
 // by the synchronization in `struct drm_device`.
 unsafe impl<T: drm::Driver> Sync for Device<T> {}
+
+impl<T, const ID: u64> WorkItem<ID> for Device<T>
+where
+    T: drm::Driver,
+    T::Data: WorkItem<ID, Pointer = ARef<Device<T>>>,
+    T::Data: HasWork<Device<T>, ID>,
+{
+    type Pointer = ARef<Device<T>>;
+
+    fn run(ptr: ARef<Device<T>>) {
+        T::Data::run(ptr);
+    }
+}
+
+// SAFETY:
+//
+// - `raw_get_work` and `work_container_of` return valid pointers by relying on
+// `T::Data::raw_get_work` and `container_of`. In particular, `T::Data` is
+// stored inline in `drm::Device`, so the `container_of` call is valid.
+//
+// - The two methods are true inverses of each other: given `ptr: *mut
+// Device<T>`, `raw_get_work` will return a `*mut Work<Device<T>, ID>` through
+// `T::Data::raw_get_work` and given a `ptr: *mut Work<Device<T>, ID>`,
+// `work_container_of` will return a `*mut Device<T>` through `container_of`.
+unsafe impl<T, const ID: u64> HasWork<Device<T>, ID> for Device<T>
+where
+    T: drm::Driver,
+    T::Data: HasWork<Device<T>, ID>,
+{
+    unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<Device<T>, ID> {
+        // SAFETY: The caller promises that `ptr` points to a valid `Device<T>`.
+        let data_ptr = unsafe { &raw mut (*ptr).data };
+
+        // SAFETY: `data_ptr` is a valid pointer to `T::Data`.
+        unsafe { T::Data::raw_get_work(data_ptr) }
+    }
+
+    unsafe fn work_container_of(ptr: *mut Work<Device<T>, ID>) -> *mut Self {
+        // SAFETY: The caller promises that `ptr` points at a `Work` field in
+        // `T::Data`.
+        let data_ptr = unsafe { T::Data::work_container_of(ptr) };
+
+        // SAFETY: `T::Data` is stored as the `data` field in `Device<T>`.
+        unsafe { crate::container_of!(data_ptr, Self, data) }
+    }
+}
+
+// SAFETY: Our `HasWork<T, ID>` implementation returns a `work_struct` that is
+// stored in the `work` field of a `delayed_work` with the same access rules as
+// the `work_struct` owing to the bound on `T::Data: HasDelayedWork<Device<T>,
+// ID>`, which requires that `T::Data::raw_get_work` return a `work_struct` that
+// is inside a `delayed_work`.
+unsafe impl<T, const ID: u64> HasDelayedWork<Device<T>, ID> for Device<T>
+where
+    T: drm::Driver,
+    T::Data: HasDelayedWork<Device<T>, ID>,
+{
+}
diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs
index e09f977b5b51..5233bdebc9fc 100644
--- a/rust/kernel/drm/driver.rs
+++ b/rust/kernel/drm/driver.rs
@@ -5,12 +5,14 @@
 //! C header: [`include/drm/drm_drv.h`](srctree/include/drm/drm_drv.h)
 
 use crate::{
-    bindings, device, devres, drm,
-    error::{to_result, Result},
+    bindings,
+    device,
+    devres,
+    drm,
+    error::to_result,
     prelude::*,
-    sync::aref::ARef,
+    sync::aref::ARef, //
 };
-use macros::vtable;
 
 /// Driver use the GEM memory manager. This should be set for all modern drivers.
 pub(crate) const FEAT_GEM: u32 = bindings::drm_driver_feature_DRIVER_GEM;
diff --git a/rust/kernel/drm/file.rs b/rust/kernel/drm/file.rs
index 8c46f8d51951..10160601ce5a 100644
--- a/rust/kernel/drm/file.rs
+++ b/rust/kernel/drm/file.rs
@@ -4,9 +4,13 @@
 //!
 //! C header: [`include/drm/drm_file.h`](srctree/include/drm/drm_file.h)
 
-use crate::{bindings, drm, error::Result, prelude::*, types::Opaque};
+use crate::{
+    bindings,
+    drm,
+    prelude::*,
+    types::Opaque, //
+};
 use core::marker::PhantomData;
-use core::pin::Pin;
 
 /// Trait that must be implemented by DRM drivers to represent a DRM File (a client instance).
 pub trait DriverFile {
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
index d49a9ba02635..75acda7ba500 100644
--- a/rust/kernel/drm/gem/mod.rs
+++ b/rust/kernel/drm/gem/mod.rs
@@ -5,15 +5,66 @@
 //! C header: [`include/drm/drm_gem.h`](srctree/include/drm/drm_gem.h)
 
 use crate::{
-    alloc::flags::*,
-    bindings, drm,
-    drm::driver::{AllocImpl, AllocOps},
-    error::{to_result, Result},
+    bindings,
+    drm::{
+        self,
+        driver::{
+            AllocImpl,
+            AllocOps, //
+        },
+    },
+    error::to_result,
     prelude::*,
-    sync::aref::{ARef, AlwaysRefCounted},
+    sync::aref::{
+        ARef,
+        AlwaysRefCounted, //
+    },
     types::Opaque,
 };
-use core::{ops::Deref, ptr::NonNull};
+use core::{
+    ops::Deref,
+    ptr::NonNull, //
+};
+
+#[cfg(CONFIG_RUST_DRM_GEM_SHMEM_HELPER)]
+pub mod shmem;
+
+/// A macro for implementing [`AlwaysRefCounted`] for any GEM object type.
+///
+/// Since all GEM objects use the same refcounting scheme.
+#[macro_export]
+macro_rules! impl_aref_for_gem_obj {
+    (
+        impl $( <$( $tparam_id:ident ),+> )? for $type:ty
+        $(
+            where
+                $( $bind_param:path : $bind_trait:path ),+
+        )?
+    ) => {
+        // SAFETY: All GEM objects are refcounted.
+        unsafe impl $( <$( $tparam_id ),+> )? $crate::sync::aref::AlwaysRefCounted for $type
+        where
+            Self: IntoGEMObject,
+            $( $( $bind_param : $bind_trait ),+ )?
+        {
+            fn inc_ref(&self) {
+                // SAFETY: The existence of a shared reference guarantees that the refcount is
+                // non-zero.
+                unsafe { bindings::drm_gem_object_get(self.as_raw()) };
+            }
+
+            unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) {
+                // SAFETY: `obj` is a valid pointer to an `Object<T>`.
+                let obj = unsafe { obj.as_ref() }.as_raw();
+
+                // SAFETY: The safety requirements guarantee that the refcount is non-zero.
+                unsafe { bindings::drm_gem_object_put(obj) };
+            }
+        }
+    };
+}
+#[cfg_attr(not(CONFIG_RUST_DRM_GEM_SHMEM_HELPER), allow(unused))]
+pub(crate) use impl_aref_for_gem_obj;
 
 /// A type alias for retrieving a [`Driver`]s [`DriverFile`] implementation from its
 /// [`DriverObject`] implementation.
@@ -27,8 +78,15 @@ pub trait DriverObject: Sync + Send + Sized {
     /// Parent `Driver` for this object.
     type Driver: drm::Driver;
 
+    /// The data type to use for passing arguments to [`DriverObject::new`].
+    type Args;
+
     /// Create a new driver data object for a GEM object of a given size.
-    fn new(dev: &drm::Device<Self::Driver>, size: usize) -> impl PinInit<Self, Error>;
+    fn new(
+        dev: &drm::Device<Self::Driver>,
+        size: usize,
+        args: Self::Args,
+    ) -> impl PinInit<Self, Error>;
 
     /// Open a new handle to an existing object, associated with a File.
     fn open(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>) -> Result {
@@ -162,6 +220,18 @@ pub trait BaseObject: IntoGEMObject {
 
 impl<T: IntoGEMObject> BaseObject for T {}
 
+/// Crate-private base operations shared by all GEM object classes.
+#[cfg_attr(not(CONFIG_RUST_DRM_GEM_SHMEM_HELPER), expect(unused))]
+pub(crate) trait BaseObjectPrivate: IntoGEMObject {
+    /// Return a pointer to this object's dma_resv.
+    fn raw_dma_resv(&self) -> *mut bindings::dma_resv {
+        // SAFETY: `self.as_raw()` always returns a valid pointer to the base DRM GEM object.
+        unsafe { (*self.as_raw()).resv }
+    }
+}
+
+impl<T: IntoGEMObject> BaseObjectPrivate for T {}
+
 /// A base GEM object.
 ///
 /// # Invariants
@@ -195,11 +265,11 @@ impl<T: DriverObject> Object<T> {
     };
 
     /// Create a new GEM object.
-    pub fn new(dev: &drm::Device<T::Driver>, size: usize) -> Result<ARef<Self>> {
+    pub fn new(dev: &drm::Device<T::Driver>, size: usize, args: T::Args) -> Result<ARef<Self>> {
         let obj: Pin<KBox<Self>> = KBox::pin_init(
             try_pin_init!(Self {
                 obj: Opaque::new(bindings::drm_gem_object::default()),
-                data <- T::new(dev, size),
+                data <- T::new(dev, size, args),
             }),
             GFP_KERNEL,
         )?;
@@ -252,21 +322,7 @@ impl<T: DriverObject> Object<T> {
     }
 }
 
-// SAFETY: Instances of `Object<T>` are always reference-counted.
-unsafe impl<T: DriverObject> crate::sync::aref::AlwaysRefCounted for Object<T> {
-    fn inc_ref(&self) {
-        // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
-        unsafe { bindings::drm_gem_object_get(self.as_raw()) };
-    }
-
-    unsafe fn dec_ref(obj: NonNull<Self>) {
-        // SAFETY: `obj` is a valid pointer to an `Object<T>`.
-        let obj = unsafe { obj.as_ref() };
-
-        // SAFETY: The safety requirements guarantee that the refcount is non-zero.
-        unsafe { bindings::drm_gem_object_put(obj.as_raw()) }
-    }
-}
+impl_aref_for_gem_obj!(impl<T> for Object<T> where T: DriverObject);
 
 impl<T: DriverObject> super::private::Sealed for Object<T> {}
 
diff --git a/rust/kernel/drm/gem/shmem.rs b/rust/kernel/drm/gem/shmem.rs
new file mode 100644
index 000000000000..d025fb035195
--- /dev/null
+++ b/rust/kernel/drm/gem/shmem.rs
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! DRM GEM shmem helper objects
+//!
+//! C header: [`include/linux/drm/drm_gem_shmem_helper.h`](srctree/include/drm/drm_gem_shmem_helper.h)
+
+// TODO:
+// - There are a number of spots here that manually acquire/release the DMA reservation lock using
+//   dma_resv_(un)lock(). In the future we should add support for ww mutex, expose a method to
+//   acquire a reference to the WwMutex, and then use that directly instead of the C functions here.
+
+use crate::{
+    container_of,
+    drm::{
+        device,
+        driver,
+        gem,
+        private::Sealed, //
+    },
+    error::to_result,
+    prelude::*,
+    types::{
+        ARef,
+        Opaque, //
+    }, //
+};
+use core::{
+    ops::{
+        Deref,
+        DerefMut, //
+    },
+    ptr::NonNull,
+};
+use gem::{
+    BaseObjectPrivate,
+    DriverObject,
+    IntoGEMObject, //
+};
+
+/// A struct for controlling the creation of shmem-backed GEM objects.
+///
+/// This is used with [`Object::new()`] to control various properties that can only be set when
+/// initially creating a shmem-backed GEM object.
+#[derive(Default)]
+pub struct ObjectConfig<'a, T: DriverObject> {
+    /// Whether to set the write-combine map flag.
+    pub map_wc: bool,
+
+    /// Reuse the DMA reservation from another GEM object.
+    ///
+    /// The newly created [`Object`] will hold an owned refcount to `parent_resv_obj` if specified.
+    pub parent_resv_obj: Option<&'a Object<T>>,
+}
+
+/// A shmem-backed GEM object.
+///
+/// # Invariants
+///
+/// `obj` contains a valid initialized `struct drm_gem_shmem_object` for the lifetime of this
+/// object.
+#[repr(C)]
+#[pin_data]
+pub struct Object<T: DriverObject> {
+    #[pin]
+    obj: Opaque<bindings::drm_gem_shmem_object>,
+    /// Parent object that owns this object's DMA reservation object.
+    parent_resv_obj: Option<ARef<Object<T>>>,
+    #[pin]
+    inner: T,
+}
+
+super::impl_aref_for_gem_obj!(impl<T> for Object<T> where T: DriverObject);
+
+// SAFETY: All GEM objects are thread-safe.
+unsafe impl<T: DriverObject> Send for Object<T> {}
+
+// SAFETY: All GEM objects are thread-safe.
+unsafe impl<T: DriverObject> Sync for Object<T> {}
+
+impl<T: DriverObject> Object<T> {
+    /// `drm_gem_object_funcs` vtable suitable for GEM shmem objects.
+    const VTABLE: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs {
+        free: Some(Self::free_callback),
+        open: Some(super::open_callback::<T>),
+        close: Some(super::close_callback::<T>),
+        print_info: Some(bindings::drm_gem_shmem_object_print_info),
+        export: None,
+        pin: Some(bindings::drm_gem_shmem_object_pin),
+        unpin: Some(bindings::drm_gem_shmem_object_unpin),
+        get_sg_table: Some(bindings::drm_gem_shmem_object_get_sg_table),
+        vmap: Some(bindings::drm_gem_shmem_object_vmap),
+        vunmap: Some(bindings::drm_gem_shmem_object_vunmap),
+        mmap: Some(bindings::drm_gem_shmem_object_mmap),
+        status: None,
+        rss: None,
+        #[allow(unused_unsafe, reason = "Safe since Rust 1.82.0")]
+        // SAFETY: `drm_gem_shmem_vm_ops` is a valid, static const on the C side.
+        vm_ops: unsafe { &raw const bindings::drm_gem_shmem_vm_ops },
+        evict: None,
+    };
+
+    /// Return a raw pointer to the embedded drm_gem_shmem_object.
+    fn as_raw_shmem(&self) -> *mut bindings::drm_gem_shmem_object {
+        self.obj.get()
+    }
+
+    /// Create a new shmem-backed DRM object of the given size.
+    ///
+    /// Additional config options can be specified using `config`.
+    pub fn new(
+        dev: &device::Device<T::Driver>,
+        size: usize,
+        config: ObjectConfig<'_, T>,
+        args: T::Args,
+    ) -> Result<ARef<Self>> {
+        let new: Pin<KBox<Self>> = KBox::try_pin_init(
+            try_pin_init!(Self {
+                obj <- Opaque::init_zeroed(),
+                parent_resv_obj: config.parent_resv_obj.map(|p| p.into()),
+                inner <- T::new(dev, size, args),
+            }),
+            GFP_KERNEL,
+        )?;
+
+        // SAFETY: `obj.as_raw()` is guaranteed to be valid by the initialization above.
+        unsafe { (*new.as_raw()).funcs = &Self::VTABLE };
+
+        // SAFETY: The arguments are all valid via the type invariants.
+        to_result(unsafe { bindings::drm_gem_shmem_init(dev.as_raw(), new.as_raw_shmem(), size) })?;
+
+        // SAFETY: We never move out of `self`.
+        let new = KBox::into_raw(unsafe { Pin::into_inner_unchecked(new) });
+
+        // SAFETY: We're taking over the owned refcount from `drm_gem_shmem_init`.
+        let obj = unsafe { ARef::from_raw(NonNull::new_unchecked(new)) };
+
+        // Start filling out values from `config`
+        if let Some(parent_resv) = config.parent_resv_obj {
+            // SAFETY: We have yet to expose the new gem object outside of this function, so it is
+            // safe to modify this field.
+            unsafe { (*obj.obj.get()).base.resv = parent_resv.raw_dma_resv() };
+        }
+
+        // SAFETY: We have yet to expose this object outside of this function, so we're guaranteed
+        // to have exclusive access - thus making this safe to hold a mutable reference to.
+        let shmem = unsafe { &mut *obj.as_raw_shmem() };
+        shmem.set_map_wc(config.map_wc);
+
+        Ok(obj)
+    }
+
+    /// Returns the `Device` that owns this GEM object.
+    pub fn dev(&self) -> &device::Device<T::Driver> {
+        // SAFETY: `dev` will have been initialized in `Self::new()` by `drm_gem_shmem_init()`.
+        unsafe { device::Device::from_raw((*self.as_raw()).dev) }
+    }
+
+    extern "C" fn free_callback(obj: *mut bindings::drm_gem_object) {
+        // SAFETY:
+        // - DRM always passes a valid gem object here
+        // - We used drm_gem_shmem_create() in our create_gem_object callback, so we know that
+        //   `obj` is contained within a drm_gem_shmem_object
+        let this = unsafe { container_of!(obj, bindings::drm_gem_shmem_object, base) };
+
+        // SAFETY:
+        // - We're in free_callback - so this function is safe to call.
+        // - We won't be using the gem resources on `this` after this call.
+        unsafe { bindings::drm_gem_shmem_release(this) };
+
+        // SAFETY:
+        // - We verified above that `obj` is valid, which makes `this` valid
+        // - This function is set in AllocOps, so we know that `this` is contained within a
+        //   `Object<T>`
+        let this = unsafe { container_of!(Opaque::cast_from(this), Self, obj) }.cast_mut();
+
+        // SAFETY: We're recovering the Kbox<> we created in gem_create_object()
+        let _ = unsafe { KBox::from_raw(this) };
+    }
+}
+
+impl<T: DriverObject> Deref for Object<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+
+impl<T: DriverObject> DerefMut for Object<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.inner
+    }
+}
+
+impl<T: DriverObject> Sealed for Object<T> {}
+
+impl<T: DriverObject> gem::IntoGEMObject for Object<T> {
+    fn as_raw(&self) -> *mut bindings::drm_gem_object {
+        // SAFETY:
+        // - Our immutable reference is proof that this is safe to dereference.
+        // - `obj` is always a valid drm_gem_shmem_object via our type invariants.
+        unsafe { &raw mut (*self.obj.get()).base }
+    }
+
+    unsafe fn from_raw<'a>(obj: *mut bindings::drm_gem_object) -> &'a Object<T> {
+        // SAFETY: The safety contract of from_gem_obj() guarantees that `obj` is contained within
+        // `Self`
+        unsafe {
+            let obj = Opaque::cast_from(container_of!(obj, bindings::drm_gem_shmem_object, base));
+
+            &*container_of!(obj, Object<T>, obj)
+        }
+    }
+}
+
+impl<T: DriverObject> driver::AllocImpl for Object<T> {
+    type Driver = T::Driver;
+
+    const ALLOC_OPS: driver::AllocOps = driver::AllocOps {
+        gem_create_object: None,
+        prime_handle_to_fd: None,
+        prime_fd_to_handle: None,
+        gem_prime_import: None,
+        gem_prime_import_sg_table: Some(bindings::drm_gem_shmem_prime_import_sg_table),
+        dumb_create: Some(bindings::drm_gem_shmem_dumb_create),
+        dumb_map_offset: None,
+    };
+}
diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
index 258b12afdcba..05cf869ac090 100644
--- a/rust/kernel/error.rs
+++ b/rust/kernel/error.rs
@@ -67,6 +67,7 @@ pub mod code {
     declare_err!(EDOM, "Math argument out of domain of func.");
     declare_err!(ERANGE, "Math result not representable.");
     declare_err!(EOVERFLOW, "Value too large for defined data type.");
+    declare_err!(EMSGSIZE, "Message too long.");
     declare_err!(ETIMEDOUT, "Connection timed out.");
     declare_err!(ERESTARTSYS, "Restart the system call.");
     declare_err!(ERESTARTNOINTR, "System call was interrupted by a signal and will be restarted.");
@@ -216,36 +217,42 @@ impl fmt::Debug for Error {
 }
 
 impl From<AllocError> for Error {
+    #[inline]
     fn from(_: AllocError) -> Error {
         code::ENOMEM
     }
 }
 
 impl From<TryFromIntError> for Error {
+    #[inline]
     fn from(_: TryFromIntError) -> Error {
         code::EINVAL
     }
 }
 
 impl From<Utf8Error> for Error {
+    #[inline]
     fn from(_: Utf8Error) -> Error {
         code::EINVAL
     }
 }
 
 impl From<LayoutError> for Error {
+    #[inline]
     fn from(_: LayoutError) -> Error {
         code::ENOMEM
     }
 }
 
 impl From<fmt::Error> for Error {
+    #[inline]
     fn from(_: fmt::Error) -> Error {
         code::EINVAL
     }
 }
 
 impl From<core::convert::Infallible> for Error {
+    #[inline]
     fn from(e: core::convert::Infallible) -> Error {
         match e {}
     }
@@ -446,6 +453,9 @@ pub fn to_result(err: crate::ffi::c_int) -> Result {
 /// for errors. This function performs the check and converts the "error pointer"
 /// to a normal pointer in an idiomatic fashion.
 ///
+/// Note that a `NULL` pointer is not considered an error pointer, and is returned
+/// as-is, wrapped in [`Ok`].
+///
 /// # Examples
 ///
 /// ```ignore
@@ -460,6 +470,34 @@ pub fn to_result(err: crate::ffi::c_int) -> Result {
 ///     from_err_ptr(unsafe { bindings::devm_platform_ioremap_resource(pdev.to_ptr(), index) })
 /// }
 /// ```
+///
+/// ```
+/// # use kernel::error::from_err_ptr;
+/// # mod bindings {
+/// #     #![expect(clippy::missing_safety_doc)]
+/// #     use kernel::prelude::*;
+/// #     pub(super) unsafe fn einval_err_ptr() -> *mut kernel::ffi::c_void {
+/// #         EINVAL.to_ptr()
+/// #     }
+/// #     pub(super) unsafe fn null_ptr() -> *mut kernel::ffi::c_void {
+/// #         core::ptr::null_mut()
+/// #     }
+/// #     pub(super) unsafe fn non_null_ptr() -> *mut kernel::ffi::c_void {
+/// #         0x1234 as *mut kernel::ffi::c_void
+/// #     }
+/// # }
+/// // SAFETY: ...
+/// let einval_err = from_err_ptr(unsafe { bindings::einval_err_ptr() });
+/// assert_eq!(einval_err, Err(EINVAL));
+///
+/// // SAFETY: ...
+/// let null_ok = from_err_ptr(unsafe { bindings::null_ptr() });
+/// assert_eq!(null_ok, Ok(core::ptr::null_mut()));
+///
+/// // SAFETY: ...
+/// let non_null = from_err_ptr(unsafe { bindings::non_null_ptr() }).unwrap();
+/// assert_ne!(non_null, core::ptr::null_mut());
+/// ```
 pub fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
     // CAST: Casting a pointer to `*const crate::ffi::c_void` is always valid.
     let const_ptr: *const crate::ffi::c_void = ptr.cast();
diff --git a/rust/kernel/gpu.rs b/rust/kernel/gpu.rs
new file mode 100644
index 000000000000..1dc5d0c8c09d
--- /dev/null
+++ b/rust/kernel/gpu.rs
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! GPU subsystem abstractions.
+
+#[cfg(CONFIG_GPU_BUDDY = "y")]
+pub mod buddy;
diff --git a/rust/kernel/gpu/buddy.rs b/rust/kernel/gpu/buddy.rs
new file mode 100644
index 000000000000..d502ada6ebbd
--- /dev/null
+++ b/rust/kernel/gpu/buddy.rs
@@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! GPU buddy allocator bindings.
+//!
+//! C header: [`include/linux/gpu_buddy.h`](srctree/include/linux/gpu_buddy.h)
+//!
+//! This module provides Rust abstractions over the Linux kernel's GPU buddy
+//! allocator, which implements a binary buddy memory allocator.
+//!
+//! The buddy allocator manages a contiguous address space and allocates blocks
+//! in power-of-two sizes, useful for GPU physical memory management.
+//!
+//! # Examples
+//!
+//! Create a buddy allocator and perform a basic range allocation:
+//!
+//! ```
+//! use kernel::{
+//!     gpu::buddy::{
+//!         GpuBuddy,
+//!         GpuBuddyAllocFlags,
+//!         GpuBuddyAllocMode,
+//!         GpuBuddyParams, //
+//!     },
+//!     prelude::*,
+//!     ptr::Alignment,
+//!     sizes::*, //
+//! };
+//!
+//! // Create a 1GB buddy allocator with 4KB minimum chunk size.
+//! let buddy = GpuBuddy::new(GpuBuddyParams {
+//!     base_offset: 0,
+//!     size: SZ_1G as u64,
+//!     chunk_size: Alignment::new::<SZ_4K>(),
+//! })?;
+//!
+//! assert_eq!(buddy.size(), SZ_1G as u64);
+//! assert_eq!(buddy.chunk_size(), Alignment::new::<SZ_4K>());
+//! let initial_free = buddy.avail();
+//!
+//! // Allocate 16MB. Block lands at the top of the address range.
+//! let allocated = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::Simple,
+//!         SZ_16M as u64,
+//!         Alignment::new::<SZ_16M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64);
+//!
+//! let block = allocated.iter().next().expect("expected one block");
+//! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64);
+//! assert_eq!(block.order(), 12); // 2^12 pages = 16MB
+//! assert_eq!(block.size(), SZ_16M as u64);
+//! assert_eq!(allocated.iter().count(), 1);
+//!
+//! // Dropping the allocation returns the range to the buddy allocator.
+//! drop(allocated);
+//! assert_eq!(buddy.avail(), initial_free);
+//! # Ok::<(), Error>(())
+//! ```
+//!
+//! Top-down allocation allocates from the highest addresses:
+//!
+//! ```
+//! # use kernel::{
+//! #     gpu::buddy::{GpuBuddy, GpuBuddyAllocMode, GpuBuddyAllocFlags, GpuBuddyParams},
+//! #     prelude::*,
+//! #     ptr::Alignment,
+//! #     sizes::*, //
+//! # };
+//! # let buddy = GpuBuddy::new(GpuBuddyParams {
+//! #     base_offset: 0,
+//! #     size: SZ_1G as u64,
+//! #     chunk_size: Alignment::new::<SZ_4K>(),
+//! # })?;
+//! # let initial_free = buddy.avail();
+//! let topdown = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::TopDown,
+//!         SZ_16M as u64,
+//!         Alignment::new::<SZ_16M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64);
+//!
+//! let block = topdown.iter().next().expect("expected one block");
+//! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64);
+//! assert_eq!(block.order(), 12);
+//! assert_eq!(block.size(), SZ_16M as u64);
+//!
+//! // Dropping the allocation returns the range to the buddy allocator.
+//! drop(topdown);
+//! assert_eq!(buddy.avail(), initial_free);
+//! # Ok::<(), Error>(())
+//! ```
+//!
+//! Non-contiguous allocation can fill fragmented memory by returning multiple
+//! blocks:
+//!
+//! ```
+//! # use kernel::{
+//! #     gpu::buddy::{
+//! #         GpuBuddy, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams,
+//! #     },
+//! #     prelude::*,
+//! #     ptr::Alignment,
+//! #     sizes::*, //
+//! # };
+//! # let buddy = GpuBuddy::new(GpuBuddyParams {
+//! #     base_offset: 0,
+//! #     size: SZ_1G as u64,
+//! #     chunk_size: Alignment::new::<SZ_4K>(),
+//! # })?;
+//! # let initial_free = buddy.avail();
+//! // Create fragmentation by allocating 4MB blocks at [0,4M) and [8M,12M).
+//! let frag1 = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::Range(0..SZ_4M as u64),
+//!         SZ_4M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.avail(), initial_free - SZ_4M as u64);
+//!
+//! let frag2 = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64),
+//!         SZ_4M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.avail(), initial_free - SZ_8M as u64);
+//!
+//! // Allocate 8MB, this returns 2 blocks from the holes.
+//! let fragmented = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::Range(0..SZ_16M as u64),
+//!         SZ_8M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64);
+//!
+//! let (mut count, mut total) = (0u32, 0u64);
+//! for block in fragmented.iter() {
+//!     assert_eq!(block.size(), SZ_4M as u64);
+//!     total += block.size();
+//!     count += 1;
+//! }
+//! assert_eq!(total, SZ_8M as u64);
+//! assert_eq!(count, 2);
+//! # Ok::<(), Error>(())
+//! ```
+//!
+//! Contiguous allocation fails when only fragmented space is available:
+//!
+//! ```
+//! # use kernel::{
+//! #     gpu::buddy::{
+//! #         GpuBuddy, GpuBuddyAllocFlag, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams,
+//! #     },
+//! #     prelude::*,
+//! #     ptr::Alignment,
+//! #     sizes::*, //
+//! # };
+//! // Create a small 16MB buddy allocator with fragmented memory.
+//! let small = GpuBuddy::new(GpuBuddyParams {
+//!     base_offset: 0,
+//!     size: SZ_16M as u64,
+//!     chunk_size: Alignment::new::<SZ_4K>(),
+//! })?;
+//!
+//! let _hole1 = KBox::pin_init(
+//!     small.alloc_blocks(
+//!         GpuBuddyAllocMode::Range(0..SZ_4M as u64),
+//!         SZ_4M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//!
+//! let _hole2 = KBox::pin_init(
+//!     small.alloc_blocks(
+//!         GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64),
+//!         SZ_4M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//!
+//! // 8MB contiguous should fail, only two non-contiguous 4MB holes exist.
+//! let result = KBox::pin_init(
+//!     small.alloc_blocks(
+//!         GpuBuddyAllocMode::Simple,
+//!         SZ_8M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlag::Contiguous,
+//!     ),
+//!     GFP_KERNEL,
+//! );
+//! assert!(result.is_err());
+//! # Ok::<(), Error>(())
+//! ```
+
+use core::ops::Range;
+
+use crate::{
+    bindings,
+    clist_create,
+    error::to_result,
+    interop::list::CListHead,
+    new_mutex,
+    prelude::*,
+    ptr::Alignment,
+    sync::{
+        lock::mutex::MutexGuard,
+        Arc,
+        Mutex, //
+    },
+    types::Opaque, //
+};
+
+/// Allocation mode for the GPU buddy allocator.
+///
+/// The mode determines the primary allocation strategy. Modes are mutually
+/// exclusive: an allocation is either simple, range-constrained, or top-down.
+///
+/// Orthogonal modifier flags (e.g., contiguous, clear) are specified separately
+/// via [`GpuBuddyAllocFlags`].
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum GpuBuddyAllocMode {
+    /// Simple allocation without constraints.
+    Simple,
+    /// Range-based allocation within the given address range.
+    Range(Range<u64>),
+    /// Allocate from top of address space downward.
+    TopDown,
+}
+
+impl GpuBuddyAllocMode {
+    /// Returns the C flags corresponding to the allocation mode.
+    fn as_flags(&self) -> usize {
+        match self {
+            Self::Simple => 0,
+            Self::Range(_) => bindings::GPU_BUDDY_RANGE_ALLOCATION,
+            Self::TopDown => bindings::GPU_BUDDY_TOPDOWN_ALLOCATION,
+        }
+    }
+
+    /// Extracts the range start/end, defaulting to `(0, 0)` for non-range modes.
+    fn range(&self) -> (u64, u64) {
+        match self {
+            Self::Range(range) => (range.start, range.end),
+            _ => (0, 0),
+        }
+    }
+}
+
+crate::impl_flags!(
+    /// Modifier flags for GPU buddy allocation.
+    ///
+    /// These flags can be combined with any [`GpuBuddyAllocMode`] to control
+    /// additional allocation behavior.
+    #[derive(Clone, Copy, Default, PartialEq, Eq)]
+    pub struct GpuBuddyAllocFlags(usize);
+
+    /// Individual modifier flag for GPU buddy allocation.
+    #[derive(Clone, Copy, PartialEq, Eq)]
+    pub enum GpuBuddyAllocFlag {
+        /// Allocate physically contiguous blocks.
+        Contiguous = bindings::GPU_BUDDY_CONTIGUOUS_ALLOCATION,
+
+        /// Request allocation from cleared (zeroed) memory.
+        Clear = bindings::GPU_BUDDY_CLEAR_ALLOCATION,
+
+        /// Disable trimming of partially used blocks.
+        TrimDisable = bindings::GPU_BUDDY_TRIM_DISABLE,
+    }
+);
+
+/// Parameters for creating a GPU buddy allocator.
+pub struct GpuBuddyParams {
+    /// Base offset (in bytes) where the managed memory region starts.
+    /// Allocations will be offset by this value.
+    pub base_offset: u64,
+    /// Total size (in bytes) of the address space managed by the allocator.
+    pub size: u64,
+    /// Minimum allocation unit / chunk size; must be >= 4KB.
+    pub chunk_size: Alignment,
+}
+
+/// Inner structure holding the actual buddy allocator.
+///
+/// # Synchronization
+///
+/// The C `gpu_buddy` API requires synchronization (see `include/linux/gpu_buddy.h`).
+/// Internal locking ensures all allocator and free operations are properly
+/// synchronized, preventing races between concurrent allocations and the
+/// freeing that occurs when [`AllocatedBlocks`] is dropped.
+///
+/// # Invariants
+///
+/// The inner [`Opaque`] contains an initialized buddy allocator.
+#[pin_data(PinnedDrop)]
+struct GpuBuddyInner {
+    #[pin]
+    inner: Opaque<bindings::gpu_buddy>,
+
+    // TODO: Replace `Mutex<()>` with `Mutex<Opaque<..>>` once `Mutex::new()`
+    // accepts `impl PinInit<T>`.
+    #[pin]
+    lock: Mutex<()>,
+    /// Cached creation parameters (do not change after init).
+    params: GpuBuddyParams,
+}
+
+impl GpuBuddyInner {
+    /// Create a pin-initializer for the buddy allocator.
+    fn new(params: GpuBuddyParams) -> impl PinInit<Self, Error> {
+        let size = params.size;
+        let chunk_size = params.chunk_size;
+
+        // INVARIANT: `gpu_buddy_init` returns 0 on success, at which point the
+        // `gpu_buddy` structure is initialized and ready for use with all
+        // `gpu_buddy_*` APIs. `try_pin_init!` only completes if all fields succeed,
+        // so the invariant holds when construction finishes.
+        try_pin_init!(Self {
+            inner <- Opaque::try_ffi_init(|ptr| {
+                // SAFETY: `ptr` points to valid uninitialized memory from the pin-init
+                // infrastructure. `gpu_buddy_init` will initialize the structure.
+                to_result(unsafe {
+                    bindings::gpu_buddy_init(ptr, size, chunk_size.as_usize() as u64)
+                })
+            }),
+            lock <- new_mutex!(()),
+            params,
+        })
+    }
+
+    /// Lock the mutex and return a guard for accessing the allocator.
+    fn lock(&self) -> GpuBuddyGuard<'_> {
+        GpuBuddyGuard {
+            inner: self,
+            _guard: self.lock.lock(),
+        }
+    }
+}
+
+#[pinned_drop]
+impl PinnedDrop for GpuBuddyInner {
+    fn drop(self: Pin<&mut Self>) {
+        let guard = self.lock();
+
+        // SAFETY: Per the type invariant, `inner` contains an initialized
+        // allocator. `guard` provides exclusive access.
+        unsafe { bindings::gpu_buddy_fini(guard.as_raw()) };
+    }
+}
+
+// SAFETY: `GpuBuddyInner` can be sent between threads.
+unsafe impl Send for GpuBuddyInner {}
+
+// SAFETY: `GpuBuddyInner` is `Sync` because `GpuBuddyInner::lock`
+// serializes all access to the C allocator, preventing data races.
+unsafe impl Sync for GpuBuddyInner {}
+
+/// Guard that proves the lock is held, enabling access to the allocator.
+///
+/// The `_guard` holds the lock for the duration of this guard's lifetime.
+struct GpuBuddyGuard<'a> {
+    inner: &'a GpuBuddyInner,
+    _guard: MutexGuard<'a, ()>,
+}
+
+impl GpuBuddyGuard<'_> {
+    /// Get a raw pointer to the underlying C `gpu_buddy` structure.
+    fn as_raw(&self) -> *mut bindings::gpu_buddy {
+        self.inner.inner.get()
+    }
+}
+
+/// GPU buddy allocator instance.
+///
+/// This structure wraps the C `gpu_buddy` allocator using reference counting.
+/// The allocator is automatically cleaned up when all references are dropped.
+///
+/// Refer to the module-level documentation for usage examples.
+pub struct GpuBuddy(Arc<GpuBuddyInner>);
+
+impl GpuBuddy {
+    /// Create a new buddy allocator.
+    ///
+    /// The allocator manages a contiguous address space of the given size, with the
+    /// specified minimum allocation unit (chunk_size must be at least 4KB).
+    pub fn new(params: GpuBuddyParams) -> Result<Self> {
+        Arc::pin_init(GpuBuddyInner::new(params), GFP_KERNEL).map(Self)
+    }
+
+    /// Get the base offset for allocations.
+    pub fn base_offset(&self) -> u64 {
+        self.0.params.base_offset
+    }
+
+    /// Get the chunk size (minimum allocation unit).
+    pub fn chunk_size(&self) -> Alignment {
+        self.0.params.chunk_size
+    }
+
+    /// Get the total managed size.
+    pub fn size(&self) -> u64 {
+        self.0.params.size
+    }
+
+    /// Get the available (free) memory in bytes.
+    pub fn avail(&self) -> u64 {
+        let guard = self.0.lock();
+
+        // SAFETY: Per the type invariant, `inner` contains an initialized allocator.
+        // `guard` provides exclusive access.
+        unsafe { (*guard.as_raw()).avail }
+    }
+
+    /// Allocate blocks from the buddy allocator.
+    ///
+    /// Returns a pin-initializer for [`AllocatedBlocks`].
+    pub fn alloc_blocks(
+        &self,
+        mode: GpuBuddyAllocMode,
+        size: u64,
+        min_block_size: Alignment,
+        flags: impl Into<GpuBuddyAllocFlags>,
+    ) -> impl PinInit<AllocatedBlocks, Error> {
+        let buddy_arc = Arc::clone(&self.0);
+        let (start, end) = mode.range();
+        let mode_flags = mode.as_flags();
+        let modifier_flags = flags.into();
+
+        // Create pin-initializer that initializes list and allocates blocks.
+        try_pin_init!(AllocatedBlocks {
+            buddy: buddy_arc,
+            list <- CListHead::new(),
+            _: {
+                // Reject zero-sized or inverted ranges.
+                if let GpuBuddyAllocMode::Range(range) = &mode {
+                    if range.is_empty() {
+                        Err::<(), Error>(EINVAL)?;
+                    }
+                }
+
+                // Lock while allocating to serialize with concurrent frees.
+                let guard = buddy.lock();
+
+                // SAFETY: Per the type invariant, `inner` contains an initialized
+                // allocator. `guard` provides exclusive access.
+                to_result(unsafe {
+                    bindings::gpu_buddy_alloc_blocks(
+                        guard.as_raw(),
+                        start,
+                        end,
+                        size,
+                        min_block_size.as_usize() as u64,
+                        list.as_raw(),
+                        mode_flags | usize::from(modifier_flags),
+                    )
+                })?
+            }
+        })
+    }
+}
+
+/// Allocated blocks from the buddy allocator with automatic cleanup.
+///
+/// This structure owns a list of allocated blocks and ensures they are
+/// automatically freed when dropped. Use `iter()` to iterate over all
+/// allocated blocks.
+///
+/// # Invariants
+///
+/// - `list` is an initialized, valid list head containing allocated blocks.
+#[pin_data(PinnedDrop)]
+pub struct AllocatedBlocks {
+    #[pin]
+    list: CListHead,
+    buddy: Arc<GpuBuddyInner>,
+}
+
+impl AllocatedBlocks {
+    /// Check if the block list is empty.
+    pub fn is_empty(&self) -> bool {
+        // An empty list head points to itself.
+        !self.list.is_linked()
+    }
+
+    /// Iterate over allocated blocks.
+    ///
+    /// Returns an iterator yielding [`AllocatedBlock`] values. Each [`AllocatedBlock`]
+    /// borrows `self` and is only valid for the duration of that borrow.
+    pub fn iter(&self) -> impl Iterator<Item = AllocatedBlock<'_>> + '_ {
+        let head = self.list.as_raw();
+        // SAFETY: Per the type invariant, `list` is an initialized sentinel `list_head`
+        // and is not concurrently modified (we hold a `&self` borrow). The list contains
+        // `gpu_buddy_block` items linked via `__bindgen_anon_1.link`. `Block` is
+        // `#[repr(transparent)]` over `gpu_buddy_block`.
+        let clist = unsafe {
+            clist_create!(
+                head,
+                Block,
+                bindings::gpu_buddy_block,
+                __bindgen_anon_1.link
+            )
+        };
+
+        clist
+            .iter()
+            .map(|this| AllocatedBlock { this, blocks: self })
+    }
+}
+
+#[pinned_drop]
+impl PinnedDrop for AllocatedBlocks {
+    fn drop(self: Pin<&mut Self>) {
+        let guard = self.buddy.lock();
+
+        // SAFETY:
+        // - list is valid per the type's invariants.
+        // - guard provides exclusive access to the allocator.
+        unsafe {
+            bindings::gpu_buddy_free_list(guard.as_raw(), self.list.as_raw(), 0);
+        }
+    }
+}
+
+/// A GPU buddy block.
+///
+/// Transparent wrapper over C `gpu_buddy_block` structure. This type is returned
+/// as references during iteration over [`AllocatedBlocks`].
+///
+/// # Invariants
+///
+/// The inner [`Opaque`] contains a valid, allocated `gpu_buddy_block`.
+#[repr(transparent)]
+struct Block(Opaque<bindings::gpu_buddy_block>);
+
+impl Block {
+    /// Get a raw pointer to the underlying C block.
+    fn as_raw(&self) -> *mut bindings::gpu_buddy_block {
+        self.0.get()
+    }
+
+    /// Get the block's raw offset in the buddy address space (without base offset).
+    fn offset(&self) -> u64 {
+        // SAFETY: `self.as_raw()` is valid per the type's invariants.
+        unsafe { bindings::gpu_buddy_block_offset(self.as_raw()) }
+    }
+
+    /// Get the block order.
+    fn order(&self) -> u32 {
+        // SAFETY: `self.as_raw()` is valid per the type's invariants.
+        unsafe { bindings::gpu_buddy_block_order(self.as_raw()) }
+    }
+}
+
+// SAFETY: `Block` is a wrapper around `gpu_buddy_block` which can be
+// sent across threads safely.
+unsafe impl Send for Block {}
+
+// SAFETY: `Block` is only accessed through shared references after
+// allocation, and thus safe to access concurrently across threads.
+unsafe impl Sync for Block {}
+
+/// A buddy block paired with its owning [`AllocatedBlocks`] context.
+///
+/// Unlike a raw block, which only knows its offset within the buddy address
+/// space, an [`AllocatedBlock`] also has access to the allocator's `base_offset`
+/// and `chunk_size`, enabling it to compute absolute offsets and byte sizes.
+///
+/// Returned by [`AllocatedBlocks::iter()`].
+pub struct AllocatedBlock<'a> {
+    this: &'a Block,
+    blocks: &'a AllocatedBlocks,
+}
+
+impl AllocatedBlock<'_> {
+    /// Get the block's offset in the address space.
+    ///
+    /// Returns the absolute offset including the allocator's base offset.
+    /// This is the actual address to use for accessing the allocated memory.
+    pub fn offset(&self) -> u64 {
+        self.blocks.buddy.params.base_offset + self.this.offset()
+    }
+
+    /// Get the block order (size = chunk_size << order).
+    pub fn order(&self) -> u32 {
+        self.this.order()
+    }
+
+    /// Get the block's size in bytes.
+    pub fn size(&self) -> u64 {
+        (self.blocks.buddy.params.chunk_size.as_usize() as u64) << self.this.order()
+    }
+}
diff --git a/rust/kernel/i2c.rs b/rust/kernel/i2c.rs
index bb5b830f48c3..7b908f0c5a58 100644
--- a/rust/kernel/i2c.rs
+++ b/rust/kernel/i2c.rs
@@ -16,10 +16,11 @@ use crate::{
     error::*,
     of,
     prelude::*,
-    types::{
-        AlwaysRefCounted,
-        Opaque, //
-    }, //
+    sync::aref::{
+        ARef,
+        AlwaysRefCounted, //
+    },
+    types::Opaque, //
 };
 
 use core::{
@@ -31,8 +32,6 @@ use core::{
     }, //
 };
 
-use kernel::types::ARef;
-
 /// An I2C device id table.
 #[repr(transparent)]
 #[derive(Clone, Copy)]
@@ -416,7 +415,7 @@ kernel::impl_device_context_deref!(unsafe { I2cAdapter });
 kernel::impl_device_context_into_aref!(I2cAdapter);
 
 // SAFETY: Instances of `I2cAdapter` are always reference-counted.
-unsafe impl crate::types::AlwaysRefCounted for I2cAdapter {
+unsafe impl AlwaysRefCounted for I2cAdapter {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
         unsafe { bindings::i2c_get_adapter(self.index()) };
diff --git a/rust/kernel/interop.rs b/rust/kernel/interop.rs
new file mode 100644
index 000000000000..3b371d782a59
--- /dev/null
+++ b/rust/kernel/interop.rs
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Infrastructure for interfacing Rust code with C kernel subsystems.
+//!
+//! This module is intended for low-level, unsafe Rust infrastructure code
+//! that interoperates between Rust and C. It is *not* for use directly in
+//! Rust drivers.
+
+pub mod list;
diff --git a/rust/kernel/interop/list.rs b/rust/kernel/interop/list.rs
new file mode 100644
index 000000000000..54265ea036bb
--- /dev/null
+++ b/rust/kernel/interop/list.rs
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust interface for C doubly circular intrusive linked lists.
+//!
+//! This module provides Rust abstractions for iterating over C `list_head`-based
+//! linked lists. It should only be used for cases where C and Rust code share
+//! direct access to the same linked list through a C interop interface.
+//!
+//! Note: This *must not* be used by Rust components that just need a linked list
+//! primitive. Use [`kernel::list::List`] instead.
+//!
+//! # Examples
+//!
+//! ```
+//! use kernel::{
+//!     bindings,
+//!     interop::list::clist_create,
+//!     types::Opaque,
+//! };
+//! # // Create test list with values (0, 10, 20) - normally done by C code but it is
+//! # // emulated here for doctests using the C bindings.
+//! # use core::mem::MaybeUninit;
+//! #
+//! # /// C struct with embedded `list_head` (typically will be allocated by C code).
+//! # #[repr(C)]
+//! # pub struct SampleItemC {
+//! #     pub value: i32,
+//! #     pub link: bindings::list_head,
+//! # }
+//! #
+//! # let mut head = MaybeUninit::<bindings::list_head>::uninit();
+//! #
+//! # let head = head.as_mut_ptr();
+//! # // SAFETY: `head` and all the items are test objects allocated in this scope.
+//! # unsafe { bindings::INIT_LIST_HEAD(head) };
+//! #
+//! # let mut items = [
+//! #     MaybeUninit::<SampleItemC>::uninit(),
+//! #     MaybeUninit::<SampleItemC>::uninit(),
+//! #     MaybeUninit::<SampleItemC>::uninit(),
+//! # ];
+//! #
+//! # for (i, item) in items.iter_mut().enumerate() {
+//! #     let ptr = item.as_mut_ptr();
+//! #     // SAFETY: `ptr` points to a valid `MaybeUninit<SampleItemC>`.
+//! #     unsafe { (*ptr).value = i as i32 * 10 };
+//! #     // SAFETY: `&raw mut` creates a pointer valid for `INIT_LIST_HEAD`.
+//! #     unsafe { bindings::INIT_LIST_HEAD(&raw mut (*ptr).link) };
+//! #     // SAFETY: `link` was just initialized and `head` is a valid list head.
+//! #     unsafe { bindings::list_add_tail(&mut (*ptr).link, head) };
+//! # }
+//!
+//! /// Rust wrapper for the C struct.
+//! ///
+//! /// The list item struct in this example is defined in C code as:
+//! ///
+//! /// ```c
+//! /// struct SampleItemC {
+//! ///     int value;
+//! ///     struct list_head link;
+//! /// };
+//! /// ```
+//! #[repr(transparent)]
+//! pub struct Item(Opaque<SampleItemC>);
+//!
+//! impl Item {
+//!     pub fn value(&self) -> i32 {
+//!         // SAFETY: `Item` has the same layout as `SampleItemC`.
+//!         unsafe { (*self.0.get()).value }
+//!     }
+//! }
+//!
+//! // Create typed [`CList`] from sentinel head.
+//! // SAFETY: `head` is valid and initialized, items are `SampleItemC` with
+//! // embedded `link` field, and `Item` is `#[repr(transparent)]` over `SampleItemC`.
+//! let list = unsafe { clist_create!(head, Item, SampleItemC, link) };
+//!
+//! // Iterate directly over typed items.
+//! let mut found_0 = false;
+//! let mut found_10 = false;
+//! let mut found_20 = false;
+//!
+//! for item in list.iter() {
+//!     let val = item.value();
+//!     if val == 0 { found_0 = true; }
+//!     if val == 10 { found_10 = true; }
+//!     if val == 20 { found_20 = true; }
+//! }
+//!
+//! assert!(found_0 && found_10 && found_20);
+//! ```
+
+use core::{
+    iter::FusedIterator,
+    marker::PhantomData, //
+};
+
+use crate::{
+    bindings,
+    types::Opaque, //
+};
+
+use pin_init::{
+    pin_data,
+    pin_init,
+    PinInit, //
+};
+
+/// FFI wrapper for a C `list_head` object used in intrusive linked lists.
+///
+/// # Invariants
+///
+/// - The underlying `list_head` is initialized with valid non-`NULL` `next`/`prev` pointers.
+#[pin_data]
+#[repr(transparent)]
+pub struct CListHead {
+    #[pin]
+    inner: Opaque<bindings::list_head>,
+}
+
+impl CListHead {
+    /// Create a `&CListHead` reference from a raw `list_head` pointer.
+    ///
+    /// # Safety
+    ///
+    /// - `ptr` must be a valid pointer to an initialized `list_head` (e.g. via
+    ///   `INIT_LIST_HEAD()`), with valid non-`NULL` `next`/`prev` pointers.
+    /// - `ptr` must remain valid for the lifetime `'a`.
+    /// - The list and all linked `list_head` nodes must not be modified from
+    ///   anywhere for the lifetime `'a`, unless done so via any [`CListHead`] APIs.
+    #[inline]
+    pub unsafe fn from_raw<'a>(ptr: *mut bindings::list_head) -> &'a Self {
+        // SAFETY:
+        // - `CListHead` has the same layout as `list_head`.
+        // - `ptr` is valid and unmodified for `'a` per caller guarantees.
+        unsafe { &*ptr.cast() }
+    }
+
+    /// Get the raw `list_head` pointer.
+    #[inline]
+    pub fn as_raw(&self) -> *mut bindings::list_head {
+        self.inner.get()
+    }
+
+    /// Get the next [`CListHead`] in the list.
+    #[inline]
+    pub fn next(&self) -> &Self {
+        let raw = self.as_raw();
+        // SAFETY:
+        // - `self.as_raw()` is valid and initialized per type invariants.
+        // - The `next` pointer is valid and non-`NULL` per type invariants
+        //   (initialized via `INIT_LIST_HEAD()` or equivalent).
+        unsafe { Self::from_raw((*raw).next) }
+    }
+
+    /// Check if this node is linked in a list (not isolated).
+    #[inline]
+    pub fn is_linked(&self) -> bool {
+        let raw = self.as_raw();
+        // SAFETY: `self.as_raw()` is valid per type invariants.
+        unsafe { (*raw).next != raw && (*raw).prev != raw }
+    }
+
+    /// Returns a pin-initializer for the list head.
+    pub fn new() -> impl PinInit<Self> {
+        pin_init!(Self {
+            // SAFETY: `INIT_LIST_HEAD` initializes `slot` to a valid empty list.
+            inner <- Opaque::ffi_init(|slot| unsafe { bindings::INIT_LIST_HEAD(slot) }),
+        })
+    }
+}
+
+// SAFETY: `list_head` contains no thread-bound state; it only holds
+// `next`/`prev` pointers.
+unsafe impl Send for CListHead {}
+
+// SAFETY: `CListHead` can be shared among threads as modifications are
+// not allowed at the moment.
+unsafe impl Sync for CListHead {}
+
+impl PartialEq for CListHead {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        core::ptr::eq(self, other)
+    }
+}
+
+impl Eq for CListHead {}
+
+/// Low-level iterator over `list_head` nodes.
+///
+/// An iterator used to iterate over a C intrusive linked list (`list_head`). The caller has to
+/// perform conversion of returned [`CListHead`] to an item (using [`container_of`] or similar).
+///
+/// # Invariants
+///
+/// `current` and `sentinel` are valid references into an initialized linked list.
+struct CListHeadIter<'a> {
+    /// Current position in the list.
+    current: &'a CListHead,
+    /// The sentinel head (used to detect end of iteration).
+    sentinel: &'a CListHead,
+}
+
+impl<'a> Iterator for CListHeadIter<'a> {
+    type Item = &'a CListHead;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        // Check if we've reached the sentinel (end of list).
+        if self.current == self.sentinel {
+            return None;
+        }
+
+        let item = self.current;
+        self.current = item.next();
+        Some(item)
+    }
+}
+
+impl<'a> FusedIterator for CListHeadIter<'a> {}
+
+/// A typed C linked list with a sentinel head intended for FFI use-cases where
+/// a C subsystem manages a linked list that Rust code needs to read. Generally
+/// required only for special cases.
+///
+/// A sentinel head [`CListHead`] represents the entire linked list and can be used
+/// for iteration over items of type `T`; it is not associated with a specific item.
+///
+/// The const generic `OFFSET` specifies the byte offset of the `list_head` field within
+/// the struct that `T` wraps.
+///
+/// # Invariants
+///
+/// - The sentinel [`CListHead`] has valid non-`NULL` `next`/`prev` pointers.
+/// - `OFFSET` is the byte offset of the `list_head` field within the struct that `T` wraps.
+/// - All the list's `list_head` nodes have valid non-`NULL` `next`/`prev` pointers.
+#[repr(transparent)]
+pub struct CList<T, const OFFSET: usize>(CListHead, PhantomData<T>);
+
+impl<T, const OFFSET: usize> CList<T, OFFSET> {
+    /// Create a typed [`CList`] reference from a raw sentinel `list_head` pointer.
+    ///
+    /// # Safety
+    ///
+    /// - `ptr` must be a valid pointer to an initialized sentinel `list_head` (e.g. via
+    ///   `INIT_LIST_HEAD()`), with valid non-`NULL` `next`/`prev` pointers.
+    /// - `ptr` must remain valid for the lifetime `'a`.
+    /// - The list and all linked nodes must not be concurrently modified for the lifetime `'a`.
+    /// - The list must contain items where the `list_head` field is at byte offset `OFFSET`.
+    /// - `T` must be `#[repr(transparent)]` over the C struct.
+    #[inline]
+    pub unsafe fn from_raw<'a>(ptr: *mut bindings::list_head) -> &'a Self {
+        // SAFETY:
+        // - `CList` has the same layout as `CListHead` due to `#[repr(transparent)]`.
+        // - Caller guarantees `ptr` is a valid, sentinel `list_head` object.
+        unsafe { &*ptr.cast() }
+    }
+
+    /// Check if the list is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        !self.0.is_linked()
+    }
+
+    /// Create an iterator over typed items.
+    #[inline]
+    pub fn iter(&self) -> CListIter<'_, T, OFFSET> {
+        let head = &self.0;
+        CListIter {
+            head_iter: CListHeadIter {
+                current: head.next(),
+                sentinel: head,
+            },
+            _phantom: PhantomData,
+        }
+    }
+}
+
+/// High-level iterator over typed list items.
+pub struct CListIter<'a, T, const OFFSET: usize> {
+    head_iter: CListHeadIter<'a>,
+    _phantom: PhantomData<&'a T>,
+}
+
+impl<'a, T, const OFFSET: usize> Iterator for CListIter<'a, T, OFFSET> {
+    type Item = &'a T;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        let head = self.head_iter.next()?;
+
+        // Convert to item using `OFFSET`.
+        //
+        // SAFETY: The pointer calculation is valid because `OFFSET` is derived
+        // from `offset_of!` per type invariants.
+        Some(unsafe { &*head.as_raw().byte_sub(OFFSET).cast::<T>() })
+    }
+}
+
+impl<'a, T, const OFFSET: usize> FusedIterator for CListIter<'a, T, OFFSET> {}
+
+/// Create a C doubly-circular linked list interface [`CList`] from a raw `list_head` pointer.
+///
+/// This macro creates a `CList<T, OFFSET>` that can iterate over items of type `$rust_type`
+/// linked via the `$field` field in the underlying C struct `$c_type`.
+///
+/// # Arguments
+///
+/// - `$head`: Raw pointer to the sentinel `list_head` object (`*mut bindings::list_head`).
+/// - `$rust_type`: Each item's Rust wrapper type.
+/// - `$c_type`: Each item's C struct type that contains the embedded `list_head`.
+/// - `$field`: The name of the `list_head` field within the C struct.
+///
+/// # Safety
+///
+/// The caller must ensure:
+///
+/// - `$head` is a valid, initialized sentinel `list_head` (e.g. via `INIT_LIST_HEAD()`)
+///   pointing to a list that is not concurrently modified for the lifetime of the [`CList`].
+/// - The list contains items of type `$c_type` linked via an embedded `$field`.
+/// - `$rust_type` is `#[repr(transparent)]` over `$c_type` or has compatible layout.
+///
+/// # Examples
+///
+/// Refer to the examples in the [`crate::interop::list`] module documentation.
+#[macro_export]
+macro_rules! clist_create {
+    ($head:expr, $rust_type:ty, $c_type:ty, $($field:tt).+) => {{
+        // Compile-time check that field path is a `list_head`.
+        let _: fn(*const $c_type) -> *const $crate::bindings::list_head =
+            |p| &raw const (*p).$($field).+;
+
+        // Calculate offset and create `CList`.
+        const OFFSET: usize = ::core::mem::offset_of!($c_type, $($field).+);
+        $crate::interop::list::CList::<$rust_type, OFFSET>::from_raw($head)
+    }};
+}
+pub use clist_create;
diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs
index c1cca7b438c3..fcc7678fd9e3 100644
--- a/rust/kernel/io.rs
+++ b/rust/kernel/io.rs
@@ -11,10 +11,14 @@ use crate::{
 
 pub mod mem;
 pub mod poll;
+pub mod register;
 pub mod resource;
 
+pub use crate::register;
 pub use resource::Resource;
 
+use register::LocatedRegister;
+
 /// Physical address type.
 ///
 /// This is a type alias to either `u32` or `u64` depending on the config option
@@ -137,130 +141,6 @@ impl<const SIZE: usize> MmioRaw<SIZE> {
 #[repr(transparent)]
 pub struct Mmio<const SIZE: usize = 0>(MmioRaw<SIZE>);
 
-/// Internal helper macros used to invoke C MMIO read functions.
-///
-/// This macro is intended to be used by higher-level MMIO access macros (define_read) and provides
-/// a unified expansion for infallible vs. fallible read semantics. It emits a direct call into the
-/// corresponding C helper and performs the required cast to the Rust return type.
-///
-/// # Parameters
-///
-/// * `$c_fn` – The C function performing the MMIO read.
-/// * `$self` – The I/O backend object.
-/// * `$ty` – The type of the value to be read.
-/// * `$addr` – The MMIO address to read.
-///
-/// This macro does not perform any validation; all invariants must be upheld by the higher-level
-/// abstraction invoking it.
-macro_rules! call_mmio_read {
-    (infallible, $c_fn:ident, $self:ident, $type:ty, $addr:expr) => {
-        // SAFETY: By the type invariant `addr` is a valid address for MMIO operations.
-        unsafe { bindings::$c_fn($addr as *const c_void) as $type }
-    };
-
-    (fallible, $c_fn:ident, $self:ident, $type:ty, $addr:expr) => {{
-        // SAFETY: By the type invariant `addr` is a valid address for MMIO operations.
-        Ok(unsafe { bindings::$c_fn($addr as *const c_void) as $type })
-    }};
-}
-
-/// Internal helper macros used to invoke C MMIO write functions.
-///
-/// This macro is intended to be used by higher-level MMIO access macros (define_write) and provides
-/// a unified expansion for infallible vs. fallible write semantics. It emits a direct call into the
-/// corresponding C helper and performs the required cast to the Rust return type.
-///
-/// # Parameters
-///
-/// * `$c_fn` – The C function performing the MMIO write.
-/// * `$self` – The I/O backend object.
-/// * `$ty` – The type of the written value.
-/// * `$addr` – The MMIO address to write.
-/// * `$value` – The value to write.
-///
-/// This macro does not perform any validation; all invariants must be upheld by the higher-level
-/// abstraction invoking it.
-macro_rules! call_mmio_write {
-    (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => {
-        // SAFETY: By the type invariant `addr` is a valid address for MMIO operations.
-        unsafe { bindings::$c_fn($value, $addr as *mut c_void) }
-    };
-
-    (fallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => {{
-        // SAFETY: By the type invariant `addr` is a valid address for MMIO operations.
-        unsafe { bindings::$c_fn($value, $addr as *mut c_void) };
-        Ok(())
-    }};
-}
-
-macro_rules! define_read {
-    (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) ->
-     $type_name:ty) => {
-        /// Read IO data from a given offset known at compile time.
-        ///
-        /// Bound checks are performed on compile time, hence if the offset is not known at compile
-        /// time, the build will fail.
-        $(#[$attr])*
-        // Always inline to optimize out error path of `io_addr_assert`.
-        #[inline(always)]
-        $vis fn $name(&self, offset: usize) -> $type_name {
-            let addr = self.io_addr_assert::<$type_name>(offset);
-
-            // SAFETY: By the type invariant `addr` is a valid address for IO operations.
-            $call_macro!(infallible, $c_fn, self, $type_name, addr)
-        }
-    };
-
-    (fallible, $(#[$attr:meta])* $vis:vis $try_name:ident, $call_macro:ident($c_fn:ident) ->
-     $type_name:ty) => {
-        /// Read IO data from a given offset.
-        ///
-        /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is
-        /// out of bounds.
-        $(#[$attr])*
-        $vis fn $try_name(&self, offset: usize) -> Result<$type_name> {
-            let addr = self.io_addr::<$type_name>(offset)?;
-
-            // SAFETY: By the type invariant `addr` is a valid address for IO operations.
-            $call_macro!(fallible, $c_fn, self, $type_name, addr)
-        }
-    };
-}
-pub(crate) use define_read;
-
-macro_rules! define_write {
-    (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) <-
-     $type_name:ty) => {
-        /// Write IO data from a given offset known at compile time.
-        ///
-        /// Bound checks are performed on compile time, hence if the offset is not known at compile
-        /// time, the build will fail.
-        $(#[$attr])*
-        // Always inline to optimize out error path of `io_addr_assert`.
-        #[inline(always)]
-        $vis fn $name(&self, value: $type_name, offset: usize) {
-            let addr = self.io_addr_assert::<$type_name>(offset);
-
-            $call_macro!(infallible, $c_fn, self, $type_name, addr, value);
-        }
-    };
-
-    (fallible, $(#[$attr:meta])* $vis:vis $try_name:ident, $call_macro:ident($c_fn:ident) <-
-     $type_name:ty) => {
-        /// Write IO data from a given offset.
-        ///
-        /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is
-        /// out of bounds.
-        $(#[$attr])*
-        $vis fn $try_name(&self, value: $type_name, offset: usize) -> Result {
-            let addr = self.io_addr::<$type_name>(offset)?;
-
-            $call_macro!(fallible, $c_fn, self, $type_name, addr, value)
-        }
-    };
-}
-pub(crate) use define_write;
-
 /// Checks whether an access of type `U` at the given `offset`
 /// is valid within this region.
 #[inline]
@@ -273,14 +153,74 @@ const fn offset_valid<U>(offset: usize, size: usize) -> bool {
     }
 }
 
-/// Marker trait indicating that an I/O backend supports operations of a certain type.
+/// Trait indicating that an I/O backend supports operations of a certain type and providing an
+/// implementation for these operations.
 ///
 /// Different I/O backends can implement this trait to expose only the operations they support.
 ///
 /// For example, a PCI configuration space may implement `IoCapable<u8>`, `IoCapable<u16>`,
 /// and `IoCapable<u32>`, but not `IoCapable<u64>`, while an MMIO region on a 64-bit
 /// system might implement all four.
-pub trait IoCapable<T> {}
+pub trait IoCapable<T> {
+    /// Performs an I/O read of type `T` at `address` and returns the result.
+    ///
+    /// # Safety
+    ///
+    /// The range `[address..address + size_of::<T>()]` must be within the bounds of `Self`.
+    unsafe fn io_read(&self, address: usize) -> T;
+
+    /// Performs an I/O write of `value` at `address`.
+    ///
+    /// # Safety
+    ///
+    /// The range `[address..address + size_of::<T>()]` must be within the bounds of `Self`.
+    unsafe fn io_write(&self, value: T, address: usize);
+}
+
+/// Describes a given I/O location: its offset, width, and type to convert the raw value from and
+/// into.
+///
+/// This trait is the key abstraction allowing [`Io::read`], [`Io::write`], and [`Io::update`] (and
+/// their fallible [`try_read`](Io::try_read), [`try_write`](Io::try_write) and
+/// [`try_update`](Io::try_update) counterparts) to work uniformly with both raw [`usize`] offsets
+/// (for primitive types like [`u32`]) and typed ones (like those generated by the [`register!`]
+/// macro).
+///
+/// An `IoLoc<T>` carries three pieces of information:
+///
+/// - The offset to access (returned by [`IoLoc::offset`]),
+/// - The width of the access (determined by [`IoLoc::IoType`]),
+/// - The type `T` in which the raw data is returned or provided.
+///
+/// `T` and `IoLoc::IoType` may differ: for instance, a typed register has `T` = the register type
+/// with its bitfields, and `IoType` = its backing primitive (e.g. `u32`).
+pub trait IoLoc<T> {
+    /// Size ([`u8`], [`u16`], etc) of the I/O performed on the returned [`offset`](IoLoc::offset).
+    type IoType: Into<T> + From<T>;
+
+    /// Consumes `self` and returns the offset of this location.
+    fn offset(self) -> usize;
+}
+
+/// Implements [`IoLoc<$ty>`] for [`usize`], allowing [`usize`] to be used as a parameter of
+/// [`Io::read`] and [`Io::write`].
+macro_rules! impl_usize_ioloc {
+    ($($ty:ty),*) => {
+        $(
+            impl IoLoc<$ty> for usize {
+                type IoType = $ty;
+
+                #[inline(always)]
+                fn offset(self) -> usize {
+                    self
+                }
+            }
+        )*
+    }
+}
+
+// Provide the ability to read any primitive type from a [`usize`].
+impl_usize_ioloc!(u8, u16, u32, u64);
 
 /// Types implementing this trait (e.g. MMIO BARs or PCI config regions)
 /// can perform I/O operations on regions of memory.
@@ -322,146 +262,445 @@ pub trait Io {
 
     /// Fallible 8-bit read with runtime bounds check.
     #[inline(always)]
-    fn try_read8(&self, _offset: usize) -> Result<u8>
+    fn try_read8(&self, offset: usize) -> Result<u8>
     where
         Self: IoCapable<u8>,
     {
-        build_error!("Backend does not support fallible 8-bit read")
+        self.try_read(offset)
     }
 
     /// Fallible 16-bit read with runtime bounds check.
     #[inline(always)]
-    fn try_read16(&self, _offset: usize) -> Result<u16>
+    fn try_read16(&self, offset: usize) -> Result<u16>
     where
         Self: IoCapable<u16>,
     {
-        build_error!("Backend does not support fallible 16-bit read")
+        self.try_read(offset)
     }
 
     /// Fallible 32-bit read with runtime bounds check.
     #[inline(always)]
-    fn try_read32(&self, _offset: usize) -> Result<u32>
+    fn try_read32(&self, offset: usize) -> Result<u32>
     where
         Self: IoCapable<u32>,
     {
-        build_error!("Backend does not support fallible 32-bit read")
+        self.try_read(offset)
     }
 
     /// Fallible 64-bit read with runtime bounds check.
     #[inline(always)]
-    fn try_read64(&self, _offset: usize) -> Result<u64>
+    fn try_read64(&self, offset: usize) -> Result<u64>
     where
         Self: IoCapable<u64>,
     {
-        build_error!("Backend does not support fallible 64-bit read")
+        self.try_read(offset)
     }
 
     /// Fallible 8-bit write with runtime bounds check.
     #[inline(always)]
-    fn try_write8(&self, _value: u8, _offset: usize) -> Result
+    fn try_write8(&self, value: u8, offset: usize) -> Result
     where
         Self: IoCapable<u8>,
     {
-        build_error!("Backend does not support fallible 8-bit write")
+        self.try_write(offset, value)
     }
 
     /// Fallible 16-bit write with runtime bounds check.
     #[inline(always)]
-    fn try_write16(&self, _value: u16, _offset: usize) -> Result
+    fn try_write16(&self, value: u16, offset: usize) -> Result
     where
         Self: IoCapable<u16>,
     {
-        build_error!("Backend does not support fallible 16-bit write")
+        self.try_write(offset, value)
     }
 
     /// Fallible 32-bit write with runtime bounds check.
     #[inline(always)]
-    fn try_write32(&self, _value: u32, _offset: usize) -> Result
+    fn try_write32(&self, value: u32, offset: usize) -> Result
     where
         Self: IoCapable<u32>,
     {
-        build_error!("Backend does not support fallible 32-bit write")
+        self.try_write(offset, value)
     }
 
     /// Fallible 64-bit write with runtime bounds check.
     #[inline(always)]
-    fn try_write64(&self, _value: u64, _offset: usize) -> Result
+    fn try_write64(&self, value: u64, offset: usize) -> Result
     where
         Self: IoCapable<u64>,
     {
-        build_error!("Backend does not support fallible 64-bit write")
+        self.try_write(offset, value)
     }
 
     /// Infallible 8-bit read with compile-time bounds check.
     #[inline(always)]
-    fn read8(&self, _offset: usize) -> u8
+    fn read8(&self, offset: usize) -> u8
     where
         Self: IoKnownSize + IoCapable<u8>,
     {
-        build_error!("Backend does not support infallible 8-bit read")
+        self.read(offset)
     }
 
     /// Infallible 16-bit read with compile-time bounds check.
     #[inline(always)]
-    fn read16(&self, _offset: usize) -> u16
+    fn read16(&self, offset: usize) -> u16
     where
         Self: IoKnownSize + IoCapable<u16>,
     {
-        build_error!("Backend does not support infallible 16-bit read")
+        self.read(offset)
     }
 
     /// Infallible 32-bit read with compile-time bounds check.
     #[inline(always)]
-    fn read32(&self, _offset: usize) -> u32
+    fn read32(&self, offset: usize) -> u32
     where
         Self: IoKnownSize + IoCapable<u32>,
     {
-        build_error!("Backend does not support infallible 32-bit read")
+        self.read(offset)
     }
 
     /// Infallible 64-bit read with compile-time bounds check.
     #[inline(always)]
-    fn read64(&self, _offset: usize) -> u64
+    fn read64(&self, offset: usize) -> u64
     where
         Self: IoKnownSize + IoCapable<u64>,
     {
-        build_error!("Backend does not support infallible 64-bit read")
+        self.read(offset)
     }
 
     /// Infallible 8-bit write with compile-time bounds check.
     #[inline(always)]
-    fn write8(&self, _value: u8, _offset: usize)
+    fn write8(&self, value: u8, offset: usize)
     where
         Self: IoKnownSize + IoCapable<u8>,
     {
-        build_error!("Backend does not support infallible 8-bit write")
+        self.write(offset, value)
     }
 
     /// Infallible 16-bit write with compile-time bounds check.
     #[inline(always)]
-    fn write16(&self, _value: u16, _offset: usize)
+    fn write16(&self, value: u16, offset: usize)
     where
         Self: IoKnownSize + IoCapable<u16>,
     {
-        build_error!("Backend does not support infallible 16-bit write")
+        self.write(offset, value)
     }
 
     /// Infallible 32-bit write with compile-time bounds check.
     #[inline(always)]
-    fn write32(&self, _value: u32, _offset: usize)
+    fn write32(&self, value: u32, offset: usize)
     where
         Self: IoKnownSize + IoCapable<u32>,
     {
-        build_error!("Backend does not support infallible 32-bit write")
+        self.write(offset, value)
     }
 
     /// Infallible 64-bit write with compile-time bounds check.
     #[inline(always)]
-    fn write64(&self, _value: u64, _offset: usize)
+    fn write64(&self, value: u64, offset: usize)
     where
         Self: IoKnownSize + IoCapable<u64>,
     {
-        build_error!("Backend does not support infallible 64-bit write")
+        self.write(offset, value)
+    }
+
+    /// Generic fallible read with runtime bounds check.
+    ///
+    /// # Examples
+    ///
+    /// Read a primitive type from an I/O address:
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     Io,
+    ///     Mmio,
+    /// };
+    ///
+    /// fn do_reads(io: &Mmio) -> Result {
+    ///     // 32-bit read from address `0x10`.
+    ///     let v: u32 = io.try_read(0x10)?;
+    ///
+    ///     // 8-bit read from address `0xfff`.
+    ///     let v: u8 = io.try_read(0xfff)?;
+    ///
+    ///     Ok(())
+    /// }
+    /// ```
+    #[inline(always)]
+    fn try_read<T, L>(&self, location: L) -> Result<T>
+    where
+        L: IoLoc<T>,
+        Self: IoCapable<L::IoType>,
+    {
+        let address = self.io_addr::<L::IoType>(location.offset())?;
+
+        // SAFETY: `address` has been validated by `io_addr`.
+        Ok(unsafe { self.io_read(address) }.into())
+    }
+
+    /// Generic fallible write with runtime bounds check.
+    ///
+    /// # Examples
+    ///
+    /// Write a primitive type to an I/O address:
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     Io,
+    ///     Mmio,
+    /// };
+    ///
+    /// fn do_writes(io: &Mmio) -> Result {
+    ///     // 32-bit write of value `1` at address `0x10`.
+    ///     io.try_write(0x10, 1u32)?;
+    ///
+    ///     // 8-bit write of value `0xff` at address `0xfff`.
+    ///     io.try_write(0xfff, 0xffu8)?;
+    ///
+    ///     Ok(())
+    /// }
+    /// ```
+    #[inline(always)]
+    fn try_write<T, L>(&self, location: L, value: T) -> Result
+    where
+        L: IoLoc<T>,
+        Self: IoCapable<L::IoType>,
+    {
+        let address = self.io_addr::<L::IoType>(location.offset())?;
+        let io_value = value.into();
+
+        // SAFETY: `address` has been validated by `io_addr`.
+        unsafe { self.io_write(io_value, address) }
+
+        Ok(())
+    }
+
+    /// Generic fallible write of a fully-located register value.
+    ///
+    /// # Examples
+    ///
+    /// Tuples carrying a location and a value can be used with this method:
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     register,
+    ///     Io,
+    ///     Mmio,
+    /// };
+    ///
+    /// register! {
+    ///     VERSION(u32) @ 0x100 {
+    ///         15:8 major;
+    ///         7:0  minor;
+    ///     }
+    /// }
+    ///
+    /// impl VERSION {
+    ///     fn new(major: u8, minor: u8) -> Self {
+    ///         VERSION::zeroed().with_major(major).with_minor(minor)
+    ///     }
+    /// }
+    ///
+    /// fn do_write_reg(io: &Mmio) -> Result {
+    ///
+    ///     io.try_write_reg(VERSION::new(1, 0))
+    /// }
+    /// ```
+    #[inline(always)]
+    fn try_write_reg<T, L, V>(&self, value: V) -> Result
+    where
+        L: IoLoc<T>,
+        V: LocatedRegister<Location = L, Value = T>,
+        Self: IoCapable<L::IoType>,
+    {
+        let (location, value) = value.into_io_op();
+
+        self.try_write(location, value)
+    }
+
+    /// Generic fallible update with runtime bounds check.
+    ///
+    /// Note: this does not perform any synchronization. The caller is responsible for ensuring
+    /// exclusive access if required.
+    ///
+    /// # Examples
+    ///
+    /// Read the u32 value at address `0x10`, increment it, and store the updated value back:
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     Io,
+    ///     Mmio,
+    /// };
+    ///
+    /// fn do_update(io: &Mmio<0x1000>) -> Result {
+    ///     io.try_update(0x10, |v: u32| {
+    ///         v + 1
+    ///     })
+    /// }
+    /// ```
+    #[inline(always)]
+    fn try_update<T, L, F>(&self, location: L, f: F) -> Result
+    where
+        L: IoLoc<T>,
+        Self: IoCapable<L::IoType>,
+        F: FnOnce(T) -> T,
+    {
+        let address = self.io_addr::<L::IoType>(location.offset())?;
+
+        // SAFETY: `address` has been validated by `io_addr`.
+        let value: T = unsafe { self.io_read(address) }.into();
+        let io_value = f(value).into();
+
+        // SAFETY: `address` has been validated by `io_addr`.
+        unsafe { self.io_write(io_value, address) }
+
+        Ok(())
+    }
+
+    /// Generic infallible read with compile-time bounds check.
+    ///
+    /// # Examples
+    ///
+    /// Read a primitive type from an I/O address:
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     Io,
+    ///     Mmio,
+    /// };
+    ///
+    /// fn do_reads(io: &Mmio<0x1000>) {
+    ///     // 32-bit read from address `0x10`.
+    ///     let v: u32 = io.read(0x10);
+    ///
+    ///     // 8-bit read from the top of the I/O space.
+    ///     let v: u8 = io.read(0xfff);
+    /// }
+    /// ```
+    #[inline(always)]
+    fn read<T, L>(&self, location: L) -> T
+    where
+        L: IoLoc<T>,
+        Self: IoKnownSize + IoCapable<L::IoType>,
+    {
+        let address = self.io_addr_assert::<L::IoType>(location.offset());
+
+        // SAFETY: `address` has been validated by `io_addr_assert`.
+        unsafe { self.io_read(address) }.into()
+    }
+
+    /// Generic infallible write with compile-time bounds check.
+    ///
+    /// # Examples
+    ///
+    /// Write a primitive type to an I/O address:
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     Io,
+    ///     Mmio,
+    /// };
+    ///
+    /// fn do_writes(io: &Mmio<0x1000>) {
+    ///     // 32-bit write of value `1` at address `0x10`.
+    ///     io.write(0x10, 1u32);
+    ///
+    ///     // 8-bit write of value `0xff` at the top of the I/O space.
+    ///     io.write(0xfff, 0xffu8);
+    /// }
+    /// ```
+    #[inline(always)]
+    fn write<T, L>(&self, location: L, value: T)
+    where
+        L: IoLoc<T>,
+        Self: IoKnownSize + IoCapable<L::IoType>,
+    {
+        let address = self.io_addr_assert::<L::IoType>(location.offset());
+        let io_value = value.into();
+
+        // SAFETY: `address` has been validated by `io_addr_assert`.
+        unsafe { self.io_write(io_value, address) }
+    }
+
+    /// Generic infallible write of a fully-located register value.
+    ///
+    /// # Examples
+    ///
+    /// Tuples carrying a location and a value can be used with this method:
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     register,
+    ///     Io,
+    ///     Mmio,
+    /// };
+    ///
+    /// register! {
+    ///     VERSION(u32) @ 0x100 {
+    ///         15:8 major;
+    ///         7:0  minor;
+    ///     }
+    /// }
+    ///
+    /// impl VERSION {
+    ///     fn new(major: u8, minor: u8) -> Self {
+    ///         VERSION::zeroed().with_major(major).with_minor(minor)
+    ///     }
+    /// }
+    ///
+    /// fn do_write_reg(io: &Mmio<0x1000>) {
+    ///     io.write_reg(VERSION::new(1, 0));
+    /// }
+    /// ```
+    #[inline(always)]
+    fn write_reg<T, L, V>(&self, value: V)
+    where
+        L: IoLoc<T>,
+        V: LocatedRegister<Location = L, Value = T>,
+        Self: IoKnownSize + IoCapable<L::IoType>,
+    {
+        let (location, value) = value.into_io_op();
+
+        self.write(location, value)
+    }
+
+    /// Generic infallible update with compile-time bounds check.
+    ///
+    /// Note: this does not perform any synchronization. The caller is responsible for ensuring
+    /// exclusive access if required.
+    ///
+    /// # Examples
+    ///
+    /// Read the u32 value at address `0x10`, increment it, and store the updated value back:
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     Io,
+    ///     Mmio,
+    /// };
+    ///
+    /// fn do_update(io: &Mmio<0x1000>) {
+    ///     io.update(0x10, |v: u32| {
+    ///         v + 1
+    ///     })
+    /// }
+    /// ```
+    #[inline(always)]
+    fn update<T, L, F>(&self, location: L, f: F)
+    where
+        L: IoLoc<T>,
+        Self: IoKnownSize + IoCapable<L::IoType> + Sized,
+        F: FnOnce(T) -> T,
+    {
+        let address = self.io_addr_assert::<L::IoType>(location.offset());
+
+        // SAFETY: `address` has been validated by `io_addr_assert`.
+        let value: T = unsafe { self.io_read(address) }.into();
+        let io_value = f(value).into();
+
+        // SAFETY: `address` has been validated by `io_addr_assert`.
+        unsafe { self.io_write(io_value, address) }
     }
 }
 
@@ -487,14 +726,36 @@ pub trait IoKnownSize: Io {
     }
 }
 
-// MMIO regions support 8, 16, and 32-bit accesses.
-impl<const SIZE: usize> IoCapable<u8> for Mmio<SIZE> {}
-impl<const SIZE: usize> IoCapable<u16> for Mmio<SIZE> {}
-impl<const SIZE: usize> IoCapable<u32> for Mmio<SIZE> {}
+/// Implements [`IoCapable`] on `$mmio` for `$ty` using `$read_fn` and `$write_fn`.
+macro_rules! impl_mmio_io_capable {
+    ($mmio:ident, $(#[$attr:meta])* $ty:ty, $read_fn:ident, $write_fn:ident) => {
+        $(#[$attr])*
+        impl<const SIZE: usize> IoCapable<$ty> for $mmio<SIZE> {
+            unsafe fn io_read(&self, address: usize) -> $ty {
+                // SAFETY: By the trait invariant `address` is a valid address for MMIO operations.
+                unsafe { bindings::$read_fn(address as *const c_void) }
+            }
+
+            unsafe fn io_write(&self, value: $ty, address: usize) {
+                // SAFETY: By the trait invariant `address` is a valid address for MMIO operations.
+                unsafe { bindings::$write_fn(value, address as *mut c_void) }
+            }
+        }
+    };
+}
 
+// MMIO regions support 8, 16, and 32-bit accesses.
+impl_mmio_io_capable!(Mmio, u8, readb, writeb);
+impl_mmio_io_capable!(Mmio, u16, readw, writew);
+impl_mmio_io_capable!(Mmio, u32, readl, writel);
 // MMIO regions on 64-bit systems also support 64-bit accesses.
-#[cfg(CONFIG_64BIT)]
-impl<const SIZE: usize> IoCapable<u64> for Mmio<SIZE> {}
+impl_mmio_io_capable!(
+    Mmio,
+    #[cfg(CONFIG_64BIT)]
+    u64,
+    readq,
+    writeq
+);
 
 impl<const SIZE: usize> Io for Mmio<SIZE> {
     /// Returns the base address of this mapping.
@@ -508,46 +769,6 @@ impl<const SIZE: usize> Io for Mmio<SIZE> {
     fn maxsize(&self) -> usize {
         self.0.maxsize()
     }
-
-    define_read!(fallible, try_read8, call_mmio_read(readb) -> u8);
-    define_read!(fallible, try_read16, call_mmio_read(readw) -> u16);
-    define_read!(fallible, try_read32, call_mmio_read(readl) -> u32);
-    define_read!(
-        fallible,
-        #[cfg(CONFIG_64BIT)]
-        try_read64,
-        call_mmio_read(readq) -> u64
-    );
-
-    define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8);
-    define_write!(fallible, try_write16, call_mmio_write(writew) <- u16);
-    define_write!(fallible, try_write32, call_mmio_write(writel) <- u32);
-    define_write!(
-        fallible,
-        #[cfg(CONFIG_64BIT)]
-        try_write64,
-        call_mmio_write(writeq) <- u64
-    );
-
-    define_read!(infallible, read8, call_mmio_read(readb) -> u8);
-    define_read!(infallible, read16, call_mmio_read(readw) -> u16);
-    define_read!(infallible, read32, call_mmio_read(readl) -> u32);
-    define_read!(
-        infallible,
-        #[cfg(CONFIG_64BIT)]
-        read64,
-        call_mmio_read(readq) -> u64
-    );
-
-    define_write!(infallible, write8, call_mmio_write(writeb) <- u8);
-    define_write!(infallible, write16, call_mmio_write(writew) <- u16);
-    define_write!(infallible, write32, call_mmio_write(writel) <- u32);
-    define_write!(
-        infallible,
-        #[cfg(CONFIG_64BIT)]
-        write64,
-        call_mmio_write(writeq) <- u64
-    );
 }
 
 impl<const SIZE: usize> IoKnownSize for Mmio<SIZE> {
@@ -565,44 +786,70 @@ impl<const SIZE: usize> Mmio<SIZE> {
         // SAFETY: `Mmio` is a transparent wrapper around `MmioRaw`.
         unsafe { &*core::ptr::from_ref(raw).cast() }
     }
+}
+
+/// [`Mmio`] wrapper using relaxed accessors.
+///
+/// This type provides an implementation of [`Io`] that uses relaxed I/O MMIO operands instead of
+/// the regular ones.
+///
+/// See [`Mmio::relaxed`] for a usage example.
+#[repr(transparent)]
+pub struct RelaxedMmio<const SIZE: usize = 0>(Mmio<SIZE>);
+
+impl<const SIZE: usize> Io for RelaxedMmio<SIZE> {
+    #[inline]
+    fn addr(&self) -> usize {
+        self.0.addr()
+    }
+
+    #[inline]
+    fn maxsize(&self) -> usize {
+        self.0.maxsize()
+    }
+}
+
+impl<const SIZE: usize> IoKnownSize for RelaxedMmio<SIZE> {
+    const MIN_SIZE: usize = SIZE;
+}
 
-    define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8);
-    define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16);
-    define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32);
-    define_read!(
-        infallible,
-        #[cfg(CONFIG_64BIT)]
-        pub read64_relaxed,
-        call_mmio_read(readq_relaxed) -> u64
-    );
-
-    define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8);
-    define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16);
-    define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32);
-    define_read!(
-        fallible,
-        #[cfg(CONFIG_64BIT)]
-        pub try_read64_relaxed,
-        call_mmio_read(readq_relaxed) -> u64
-    );
-
-    define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8);
-    define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16);
-    define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32);
-    define_write!(
-        infallible,
-        #[cfg(CONFIG_64BIT)]
-        pub write64_relaxed,
-        call_mmio_write(writeq_relaxed) <- u64
-    );
-
-    define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8);
-    define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16);
-    define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32);
-    define_write!(
-        fallible,
-        #[cfg(CONFIG_64BIT)]
-        pub try_write64_relaxed,
-        call_mmio_write(writeq_relaxed) <- u64
-    );
+impl<const SIZE: usize> Mmio<SIZE> {
+    /// Returns a [`RelaxedMmio`] reference that performs relaxed I/O operations.
+    ///
+    /// Relaxed accessors do not provide ordering guarantees with respect to DMA or memory accesses
+    /// and can be used when such ordering is not required.
+    ///
+    /// # Examples
+    ///
+    /// ```no_run
+    /// use kernel::io::{
+    ///     Io,
+    ///     Mmio,
+    ///     RelaxedMmio,
+    /// };
+    ///
+    /// fn do_io(io: &Mmio<0x100>) {
+    ///     // The access is performed using `readl_relaxed` instead of `readl`.
+    ///     let v = io.relaxed().read32(0x10);
+    /// }
+    ///
+    /// ```
+    pub fn relaxed(&self) -> &RelaxedMmio<SIZE> {
+        // SAFETY: `RelaxedMmio` is `#[repr(transparent)]` over `Mmio`, so `Mmio<SIZE>` and
+        // `RelaxedMmio<SIZE>` have identical layout.
+        unsafe { core::mem::transmute(self) }
+    }
 }
+
+// MMIO regions support 8, 16, and 32-bit accesses.
+impl_mmio_io_capable!(RelaxedMmio, u8, readb_relaxed, writeb_relaxed);
+impl_mmio_io_capable!(RelaxedMmio, u16, readw_relaxed, writew_relaxed);
+impl_mmio_io_capable!(RelaxedMmio, u32, readl_relaxed, writel_relaxed);
+// MMIO regions on 64-bit systems also support 64-bit accesses.
+impl_mmio_io_capable!(
+    RelaxedMmio,
+    #[cfg(CONFIG_64BIT)]
+    u64,
+    readq_relaxed,
+    writeq_relaxed
+);
diff --git a/rust/kernel/io/mem.rs b/rust/kernel/io/mem.rs
index 620022cff401..7dc78d547f7a 100644
--- a/rust/kernel/io/mem.rs
+++ b/rust/kernel/io/mem.rs
@@ -54,6 +54,7 @@ impl<'a> IoRequest<'a> {
     /// use kernel::{
     ///     bindings,
     ///     device::Core,
+    ///     io::Io,
     ///     of,
     ///     platform,
     /// };
@@ -78,9 +79,9 @@ impl<'a> IoRequest<'a> {
     ///       let io = iomem.access(pdev.as_ref())?;
     ///
     ///       // Read and write a 32-bit value at `offset`.
-    ///       let data = io.read32_relaxed(offset);
+    ///       let data = io.read32(offset);
     ///
-    ///       io.write32_relaxed(data, offset);
+    ///       io.write32(data, offset);
     ///
     ///       # Ok(SampleDriver)
     ///     }
@@ -117,6 +118,7 @@ impl<'a> IoRequest<'a> {
     /// use kernel::{
     ///     bindings,
     ///     device::Core,
+    ///     io::Io,
     ///     of,
     ///     platform,
     /// };
@@ -141,9 +143,9 @@ impl<'a> IoRequest<'a> {
     ///
     ///       let io = iomem.access(pdev.as_ref())?;
     ///
-    ///       let data = io.try_read32_relaxed(offset)?;
+    ///       let data = io.try_read32(offset)?;
     ///
-    ///       io.try_write32_relaxed(data, offset)?;
+    ///       io.try_write32(data, offset)?;
     ///
     ///       # Ok(SampleDriver)
     ///     }
diff --git a/rust/kernel/io/register.rs b/rust/kernel/io/register.rs
new file mode 100644
index 000000000000..abc49926abfe
--- /dev/null
+++ b/rust/kernel/io/register.rs
@@ -0,0 +1,1260 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Macro to define register layout and accessors.
+//!
+//! The [`register!`](kernel::io::register!) macro provides an intuitive and readable syntax for
+//! defining a dedicated type for each register and accessing it using [`Io`](super::Io). Each such
+//! type comes with its own field accessors that can return an error if a field's value is invalid.
+//!
+//! Note: most of the items in this module are public so they can be referenced by the macro, but
+//! most are not to be used directly by users. Outside of the `register!` macro itself, the only
+//! items you might want to import from this module are [`WithBase`] and [`Array`].
+//!
+//! # Simple example
+//!
+//! ```no_run
+//! use kernel::io::register;
+//!
+//! register! {
+//!     /// Basic information about the chip.
+//!     pub BOOT_0(u32) @ 0x00000100 {
+//!         /// Vendor ID.
+//!         15:8 vendor_id;
+//!         /// Major revision of the chip.
+//!         7:4 major_revision;
+//!         /// Minor revision of the chip.
+//!         3:0 minor_revision;
+//!     }
+//! }
+//! ```
+//!
+//! This defines a 32-bit `BOOT_0` type which can be read from or written to offset `0x100` of an
+//! `Io` region, with the described bitfields. For instance, `minor_revision` consists of the 4
+//! least significant bits of the type.
+//!
+//! Fields are instances of [`Bounded`](kernel::num::Bounded) and can be read by calling their
+//! getter method, which is named after them. They also have setter methods prefixed with `with_`
+//! for runtime values and `with_const_` for constant values. All setters return the updated
+//! register value.
+//!
+//! Fields can also be transparently converted from/to an arbitrary type by using the `=>` and
+//! `?=>` syntaxes.
+//!
+//! If present, doc comments above register or fields definitions are added to the relevant item
+//! they document (the register type itself, or the field's setter and getter methods).
+//!
+//! Note that multiple registers can be defined in a single `register!` invocation. This can be
+//! useful to group related registers together.
+//!
+//! Here is how the register defined above can be used in code:
+//!
+//!
+//! ```no_run
+//! use kernel::{
+//!     io::{
+//!         register,
+//!         Io,
+//!         IoLoc,
+//!     },
+//!     num::Bounded,
+//! };
+//! # use kernel::io::Mmio;
+//! # register! {
+//! #     pub BOOT_0(u32) @ 0x00000100 {
+//! #         15:8 vendor_id;
+//! #         7:4 major_revision;
+//! #         3:0 minor_revision;
+//! #     }
+//! # }
+//! # fn test(io: &Mmio<0x1000>) {
+//! # fn obtain_vendor_id() -> u8 { 0xff }
+//!
+//! // Read from the register's defined offset (0x100).
+//! let boot0 = io.read(BOOT_0);
+//! pr_info!("chip revision: {}.{}", boot0.major_revision().get(), boot0.minor_revision().get());
+//!
+//! // Update some fields and write the new value back.
+//! let new_boot0 = boot0
+//!     // Constant values.
+//!     .with_const_major_revision::<3>()
+//!     .with_const_minor_revision::<10>()
+//!     // Runtime value.
+//!     .with_vendor_id(obtain_vendor_id());
+//! io.write_reg(new_boot0);
+//!
+//! // Or, build a new value from zero and write it:
+//! io.write_reg(BOOT_0::zeroed()
+//!     .with_const_major_revision::<3>()
+//!     .with_const_minor_revision::<10>()
+//!     .with_vendor_id(obtain_vendor_id())
+//! );
+//!
+//! // Or, read and update the register in a single step.
+//! io.update(BOOT_0, |r| r
+//!     .with_const_major_revision::<3>()
+//!     .with_const_minor_revision::<10>()
+//!     .with_vendor_id(obtain_vendor_id())
+//! );
+//!
+//! // Constant values can also be built using the const setters.
+//! const V: BOOT_0 = pin_init::zeroed::<BOOT_0>()
+//!     .with_const_major_revision::<3>()
+//!     .with_const_minor_revision::<10>();
+//! # }
+//! ```
+//!
+//! For more extensive documentation about how to define registers, see the
+//! [`register!`](kernel::io::register!) macro.
+
+use core::marker::PhantomData;
+
+use crate::io::IoLoc;
+
+use kernel::build_assert;
+
+/// Trait implemented by all registers.
+pub trait Register: Sized {
+    /// Backing primitive type of the register.
+    type Storage: Into<Self> + From<Self>;
+
+    /// Start offset of the register.
+    ///
+    /// The interpretation of this offset depends on the type of the register.
+    const OFFSET: usize;
+}
+
+/// Trait implemented by registers with a fixed offset.
+pub trait FixedRegister: Register {}
+
+/// Allows `()` to be used as the `location` parameter of [`Io::write`](super::Io::write) when
+/// passing a [`FixedRegister`] value.
+impl<T> IoLoc<T> for ()
+where
+    T: FixedRegister,
+{
+    type IoType = T::Storage;
+
+    #[inline(always)]
+    fn offset(self) -> usize {
+        T::OFFSET
+    }
+}
+
+/// A [`FixedRegister`] carries its location in its type. Thus `FixedRegister` values can be used
+/// as an [`IoLoc`].
+impl<T> IoLoc<T> for T
+where
+    T: FixedRegister,
+{
+    type IoType = T::Storage;
+
+    #[inline(always)]
+    fn offset(self) -> usize {
+        T::OFFSET
+    }
+}
+
+/// Location of a fixed register.
+pub struct FixedRegisterLoc<T: FixedRegister>(PhantomData<T>);
+
+impl<T: FixedRegister> FixedRegisterLoc<T> {
+    /// Returns the location of `T`.
+    #[inline(always)]
+    // We do not implement `Default` so we can be const.
+    #[expect(clippy::new_without_default)]
+    pub const fn new() -> Self {
+        Self(PhantomData)
+    }
+}
+
+impl<T> IoLoc<T> for FixedRegisterLoc<T>
+where
+    T: FixedRegister,
+{
+    type IoType = T::Storage;
+
+    #[inline(always)]
+    fn offset(self) -> usize {
+        T::OFFSET
+    }
+}
+
+/// Trait providing a base address to be added to the offset of a relative register to obtain
+/// its actual offset.
+///
+/// The `T` generic argument is used to distinguish which base to use, in case a type provides
+/// several bases. It is given to the `register!` macro to restrict the use of the register to
+/// implementors of this particular variant.
+pub trait RegisterBase<T> {
+    /// Base address to which register offsets are added.
+    const BASE: usize;
+}
+
+/// Trait implemented by all registers that are relative to a base.
+pub trait WithBase {
+    /// Family of bases applicable to this register.
+    type BaseFamily;
+
+    /// Returns the absolute location of this type when using `B` as its base.
+    #[inline(always)]
+    fn of<B: RegisterBase<Self::BaseFamily>>() -> RelativeRegisterLoc<Self, B>
+    where
+        Self: Register,
+    {
+        RelativeRegisterLoc::new()
+    }
+}
+
+/// Trait implemented by relative registers.
+pub trait RelativeRegister: Register + WithBase {}
+
+/// Location of a relative register.
+///
+/// This can either be an immediately accessible regular [`RelativeRegister`], or a
+/// [`RelativeRegisterArray`] that needs one additional resolution through
+/// [`RelativeRegisterLoc::at`].
+pub struct RelativeRegisterLoc<T: WithBase, B: ?Sized>(PhantomData<T>, PhantomData<B>);
+
+impl<T, B> RelativeRegisterLoc<T, B>
+where
+    T: Register + WithBase,
+    B: RegisterBase<T::BaseFamily> + ?Sized,
+{
+    /// Returns the location of a relative register or register array.
+    #[inline(always)]
+    // We do not implement `Default` so we can be const.
+    #[expect(clippy::new_without_default)]
+    pub const fn new() -> Self {
+        Self(PhantomData, PhantomData)
+    }
+
+    // Returns the absolute offset of the relative register using base `B`.
+    //
+    // This is implemented as a private const method so it can be reused by the [`IoLoc`]
+    // implementations of both [`RelativeRegisterLoc`] and [`RelativeRegisterArrayLoc`].
+    #[inline]
+    const fn offset(self) -> usize {
+        B::BASE + T::OFFSET
+    }
+}
+
+impl<T, B> IoLoc<T> for RelativeRegisterLoc<T, B>
+where
+    T: RelativeRegister,
+    B: RegisterBase<T::BaseFamily> + ?Sized,
+{
+    type IoType = T::Storage;
+
+    #[inline(always)]
+    fn offset(self) -> usize {
+        RelativeRegisterLoc::offset(self)
+    }
+}
+
+/// Trait implemented by arrays of registers.
+pub trait RegisterArray: Register {
+    /// Number of elements in the registers array.
+    const SIZE: usize;
+    /// Number of bytes between the start of elements in the registers array.
+    const STRIDE: usize;
+}
+
+/// Location of an array register.
+pub struct RegisterArrayLoc<T: RegisterArray>(usize, PhantomData<T>);
+
+impl<T: RegisterArray> RegisterArrayLoc<T> {
+    /// Returns the location of register `T` at position `idx`, with build-time validation.
+    #[inline(always)]
+    pub fn new(idx: usize) -> Self {
+        build_assert!(idx < T::SIZE);
+
+        Self(idx, PhantomData)
+    }
+
+    /// Attempts to return the location of register `T` at position `idx`, with runtime validation.
+    #[inline(always)]
+    pub fn try_new(idx: usize) -> Option<Self> {
+        if idx < T::SIZE {
+            Some(Self(idx, PhantomData))
+        } else {
+            None
+        }
+    }
+}
+
+impl<T> IoLoc<T> for RegisterArrayLoc<T>
+where
+    T: RegisterArray,
+{
+    type IoType = T::Storage;
+
+    #[inline(always)]
+    fn offset(self) -> usize {
+        T::OFFSET + self.0 * T::STRIDE
+    }
+}
+
+/// Trait providing location builders for [`RegisterArray`]s.
+pub trait Array {
+    /// Returns the location of the register at position `idx`, with build-time validation.
+    #[inline(always)]
+    fn at(idx: usize) -> RegisterArrayLoc<Self>
+    where
+        Self: RegisterArray,
+    {
+        RegisterArrayLoc::new(idx)
+    }
+
+    /// Returns the location of the register at position `idx`, with runtime validation.
+    #[inline(always)]
+    fn try_at(idx: usize) -> Option<RegisterArrayLoc<Self>>
+    where
+        Self: RegisterArray,
+    {
+        RegisterArrayLoc::try_new(idx)
+    }
+}
+
+/// Trait implemented by arrays of relative registers.
+pub trait RelativeRegisterArray: RegisterArray + WithBase {}
+
+/// Location of a relative array register.
+pub struct RelativeRegisterArrayLoc<
+    T: RelativeRegisterArray,
+    B: RegisterBase<T::BaseFamily> + ?Sized,
+>(RelativeRegisterLoc<T, B>, usize);
+
+impl<T, B> RelativeRegisterArrayLoc<T, B>
+where
+    T: RelativeRegisterArray,
+    B: RegisterBase<T::BaseFamily> + ?Sized,
+{
+    /// Returns the location of register `T` from the base `B` at index `idx`, with build-time
+    /// validation.
+    #[inline(always)]
+    pub fn new(idx: usize) -> Self {
+        build_assert!(idx < T::SIZE);
+
+        Self(RelativeRegisterLoc::new(), idx)
+    }
+
+    /// Attempts to return the location of register `T` from the base `B` at index `idx`, with
+    /// runtime validation.
+    #[inline(always)]
+    pub fn try_new(idx: usize) -> Option<Self> {
+        if idx < T::SIZE {
+            Some(Self(RelativeRegisterLoc::new(), idx))
+        } else {
+            None
+        }
+    }
+}
+
+/// Methods exclusive to [`RelativeRegisterLoc`]s created with a [`RelativeRegisterArray`].
+impl<T, B> RelativeRegisterLoc<T, B>
+where
+    T: RelativeRegisterArray,
+    B: RegisterBase<T::BaseFamily> + ?Sized,
+{
+    /// Returns the location of the register at position `idx`, with build-time validation.
+    #[inline(always)]
+    pub fn at(self, idx: usize) -> RelativeRegisterArrayLoc<T, B> {
+        RelativeRegisterArrayLoc::new(idx)
+    }
+
+    /// Returns the location of the register at position `idx`, with runtime validation.
+    #[inline(always)]
+    pub fn try_at(self, idx: usize) -> Option<RelativeRegisterArrayLoc<T, B>> {
+        RelativeRegisterArrayLoc::try_new(idx)
+    }
+}
+
+impl<T, B> IoLoc<T> for RelativeRegisterArrayLoc<T, B>
+where
+    T: RelativeRegisterArray,
+    B: RegisterBase<T::BaseFamily> + ?Sized,
+{
+    type IoType = T::Storage;
+
+    #[inline(always)]
+    fn offset(self) -> usize {
+        self.0.offset() + self.1 * T::STRIDE
+    }
+}
+
+/// Trait implemented by items that contain both a register value and the absolute I/O location at
+/// which to write it.
+///
+/// Implementors can be used with [`Io::write_reg`](super::Io::write_reg).
+pub trait LocatedRegister {
+    /// Register value to write.
+    type Value: Register;
+    /// Full location information at which to write the value.
+    type Location: IoLoc<Self::Value>;
+
+    /// Consumes `self` and returns a `(location, value)` tuple describing a valid I/O write
+    /// operation.
+    fn into_io_op(self) -> (Self::Location, Self::Value);
+}
+
+impl<T> LocatedRegister for T
+where
+    T: FixedRegister,
+{
+    type Location = FixedRegisterLoc<Self::Value>;
+    type Value = T;
+
+    #[inline(always)]
+    fn into_io_op(self) -> (FixedRegisterLoc<T>, T) {
+        (FixedRegisterLoc::new(), self)
+    }
+}
+
+/// Defines a dedicated type for a register, including getter and setter methods for its fields and
+/// methods to read and write it from an [`Io`](kernel::io::Io) region.
+///
+/// This documentation focuses on how to declare registers. See the [module-level
+/// documentation](mod@kernel::io::register) for examples of how to access them.
+///
+/// There are 4 possible kinds of registers: fixed offset registers, relative registers, arrays of
+/// registers, and relative arrays of registers.
+///
+/// ## Fixed offset registers
+///
+/// These are the simplest kind of registers. Their location is simply an offset inside the I/O
+/// region. For instance:
+///
+/// ```ignore
+/// register! {
+///     pub FIXED_REG(u16) @ 0x80 {
+///         ...
+///     }
+/// }
+/// ```
+///
+/// This creates a 16-bit register named `FIXED_REG` located at offset `0x80` of an I/O region.
+///
+/// These registers' location can be built simply by referencing their name:
+///
+/// ```no_run
+/// use kernel::{
+///     io::{
+///         register,
+///         Io,
+///     },
+/// };
+/// # use kernel::io::Mmio;
+///
+/// register! {
+///     FIXED_REG(u32) @ 0x100 {
+///         16:8 high_byte;
+///         7:0  low_byte;
+///     }
+/// }
+///
+/// # fn test(io: &Mmio<0x1000>) {
+/// let val = io.read(FIXED_REG);
+///
+/// // Write from an already-existing value.
+/// io.write(FIXED_REG, val.with_low_byte(0xff));
+///
+/// // Create a register value from scratch.
+/// let val2 = FIXED_REG::zeroed().with_high_byte(0x80);
+///
+/// // The location of fixed offset registers is already contained in their type. Thus, the
+/// // `location` argument of `Io::write` is technically redundant and can be replaced by `()`.
+/// io.write((), val2);
+///
+/// // Or, the single-argument `Io::write_reg` can be used.
+/// io.write_reg(val2);
+/// # }
+///
+/// ```
+///
+/// It is possible to create an alias of an existing register with new field definitions by using
+/// the `=> ALIAS` syntax. This is useful for cases where a register's interpretation depends on
+/// the context:
+///
+/// ```no_run
+/// use kernel::io::register;
+///
+/// register! {
+///     /// Scratch register.
+///     pub SCRATCH(u32) @ 0x00000200 {
+///         31:0 value;
+///     }
+///
+///     /// Boot status of the firmware.
+///     pub SCRATCH_BOOT_STATUS(u32) => SCRATCH {
+///         0:0 completed;
+///     }
+/// }
+/// ```
+///
+/// In this example, `SCRATCH_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while providing
+/// its own `completed` field.
+///
+/// ## Relative registers
+///
+/// Relative registers can be instantiated several times at a relative offset of a group of bases.
+/// For instance, imagine the following I/O space:
+///
+/// ```text
+///           +-----------------------------+
+///           |             ...             |
+///           |                             |
+///  0x100--->+------------CPU0-------------+
+///           |                             |
+///  0x110--->+-----------------------------+
+///           |           CPU_CTL           |
+///           +-----------------------------+
+///           |             ...             |
+///           |                             |
+///           |                             |
+///  0x200--->+------------CPU1-------------+
+///           |                             |
+///  0x210--->+-----------------------------+
+///           |           CPU_CTL           |
+///           +-----------------------------+
+///           |             ...             |
+///           +-----------------------------+
+/// ```
+///
+/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O
+/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define
+/// them twice and would prefer a way to select which one to use from a single definition.
+///
+/// This can be done using the `Base + Offset` syntax when specifying the register's address:
+///
+/// ```ignore
+/// register! {
+///     pub RELATIVE_REG(u32) @ Base + 0x80 {
+///         ...
+///     }
+/// }
+/// ```
+///
+/// This creates a register with an offset of `0x80` from a given base.
+///
+/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the
+/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for
+/// this register needs to implement `RegisterBase<Base>`.
+///
+/// The location of relative registers can be built using the [`WithBase::of`] method to specify
+/// its base. All relative registers implement [`WithBase`].
+///
+/// Here is the above layout translated into code:
+///
+/// ```no_run
+/// use kernel::{
+///     io::{
+///         register,
+///         register::{
+///             RegisterBase,
+///             WithBase,
+///         },
+///         Io,
+///     },
+/// };
+/// # use kernel::io::Mmio;
+///
+/// // Type used to identify the base.
+/// pub struct CpuCtlBase;
+///
+/// // ZST describing `CPU0`.
+/// struct Cpu0;
+/// impl RegisterBase<CpuCtlBase> for Cpu0 {
+///     const BASE: usize = 0x100;
+/// }
+///
+/// // ZST describing `CPU1`.
+/// struct Cpu1;
+/// impl RegisterBase<CpuCtlBase> for Cpu1 {
+///     const BASE: usize = 0x200;
+/// }
+///
+/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase<CpuCtlBase>`.
+/// register! {
+///     /// CPU core control.
+///     pub CPU_CTL(u32) @ CpuCtlBase + 0x10 {
+///         0:0 start;
+///     }
+/// }
+///
+/// # fn test(io: Mmio<0x1000>) {
+/// // Read the status of `Cpu0`.
+/// let cpu0_started = io.read(CPU_CTL::of::<Cpu0>());
+///
+/// // Stop `Cpu0`.
+/// io.write(WithBase::of::<Cpu0>(), CPU_CTL::zeroed());
+/// # }
+///
+/// // Aliases can also be defined for relative register.
+/// register! {
+///     /// Alias to CPU core control.
+///     pub CPU_CTL_ALIAS(u32) => CpuCtlBase + CPU_CTL {
+///         /// Start the aliased CPU core.
+///         1:1 alias_start;
+///     }
+/// }
+///
+/// # fn test2(io: Mmio<0x1000>) {
+/// // Start the aliased `CPU0`, leaving its other fields untouched.
+/// io.update(CPU_CTL_ALIAS::of::<Cpu0>(), |r| r.with_alias_start(true));
+/// # }
+/// ```
+///
+/// ## Arrays of registers
+///
+/// Some I/O areas contain consecutive registers that share the same field layout. These areas can
+/// be defined as an array of identical registers, allowing them to be accessed by index with
+/// compile-time or runtime bound checking:
+///
+/// ```ignore
+/// register! {
+///     pub REGISTER_ARRAY(u8)[10, stride = 4] @ 0x100 {
+///         ...
+///     }
+/// }
+/// ```
+///
+/// This defines `REGISTER_ARRAY`, an array of 10 byte registers starting at offset `0x100`. Each
+/// register is separated from its neighbor by 4 bytes.
+///
+/// The `stride` parameter is optional; if unspecified, the registers are placed consecutively from
+/// each other.
+///
+/// A location for a register in a register array is built using the [`Array::at`] trait method.
+/// All arrays of registers implement [`Array`].
+///
+/// ```no_run
+/// use kernel::{
+///     io::{
+///         register,
+///         register::Array,
+///         Io,
+///     },
+/// };
+/// # use kernel::io::Mmio;
+/// # fn get_scratch_idx() -> usize {
+/// #   0x15
+/// # }
+///
+/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`.
+/// register! {
+///     /// Scratch registers.
+///     pub SCRATCH(u32)[64] @ 0x00000080 {
+///         31:0 value;
+///     }
+/// }
+///
+/// # fn test(io: &Mmio<0x1000>)
+/// #     -> Result<(), Error>{
+/// // Read scratch register 0, i.e. I/O address `0x80`.
+/// let scratch_0 = io.read(SCRATCH::at(0)).value();
+///
+/// // Write scratch register 15, i.e. I/O address `0x80 + (15 * 4)`.
+/// io.write(Array::at(15), SCRATCH::from(0xffeeaabb));
+///
+/// // This is out of bounds and won't build.
+/// // let scratch_128 = io.read(SCRATCH::at(128)).value();
+///
+/// // Runtime-obtained array index.
+/// let idx = get_scratch_idx();
+/// // Access on a runtime index returns an error if it is out-of-bounds.
+/// let some_scratch = io.read(SCRATCH::try_at(idx).ok_or(EINVAL)?).value();
+///
+/// // Alias to a specific register in an array.
+/// // Here `SCRATCH[8]` is used to convey the firmware exit code.
+/// register! {
+///     /// Firmware exit status code.
+///     pub FIRMWARE_STATUS(u32) => SCRATCH[8] {
+///         7:0 status;
+///     }
+/// }
+///
+/// let status = io.read(FIRMWARE_STATUS).status();
+///
+/// // Non-contiguous register arrays can be defined by adding a stride parameter.
+/// // Here, each of the 16 registers of the array is separated by 8 bytes, meaning that the
+/// // registers of the two declarations below are interleaved.
+/// register! {
+///     /// Scratch registers bank 0.
+///     pub SCRATCH_INTERLEAVED_0(u32)[16, stride = 8] @ 0x000000c0 {
+///         31:0 value;
+///     }
+///
+///     /// Scratch registers bank 1.
+///     pub SCRATCH_INTERLEAVED_1(u32)[16, stride = 8] @ 0x000000c4 {
+///         31:0 value;
+///     }
+/// }
+/// # Ok(())
+/// # }
+/// ```
+///
+/// ## Relative arrays of registers
+///
+/// Combining the two features described in the sections above, arrays of registers accessible from
+/// a base can also be defined:
+///
+/// ```ignore
+/// register! {
+///     pub RELATIVE_REGISTER_ARRAY(u8)[10, stride = 4] @ Base + 0x100 {
+///         ...
+///     }
+/// }
+/// ```
+///
+/// Like relative registers, they implement the [`WithBase`] trait. However the return value of
+/// [`WithBase::of`] cannot be used directly as a location and must be further specified using the
+/// [`at`](RelativeRegisterLoc::at) method.
+///
+/// ```no_run
+/// use kernel::{
+///     io::{
+///         register,
+///         register::{
+///             RegisterBase,
+///             WithBase,
+///         },
+///         Io,
+///     },
+/// };
+/// # use kernel::io::Mmio;
+/// # fn get_scratch_idx() -> usize {
+/// #   0x15
+/// # }
+///
+/// // Type used as parameter of `RegisterBase` to specify the base.
+/// pub struct CpuCtlBase;
+///
+/// // ZST describing `CPU0`.
+/// struct Cpu0;
+/// impl RegisterBase<CpuCtlBase> for Cpu0 {
+///     const BASE: usize = 0x100;
+/// }
+///
+/// // ZST describing `CPU1`.
+/// struct Cpu1;
+/// impl RegisterBase<CpuCtlBase> for Cpu1 {
+///     const BASE: usize = 0x200;
+/// }
+///
+/// // 64 per-cpu scratch registers, arranged as a contiguous array.
+/// register! {
+///     /// Per-CPU scratch registers.
+///     pub CPU_SCRATCH(u32)[64] @ CpuCtlBase + 0x00000080 {
+///         31:0 value;
+///     }
+/// }
+///
+/// # fn test(io: &Mmio<0x1000>) -> Result<(), Error> {
+/// // Read scratch register 0 of CPU0.
+/// let scratch = io.read(CPU_SCRATCH::of::<Cpu0>().at(0));
+///
+/// // Write the retrieved value into scratch register 15 of CPU1.
+/// io.write(WithBase::of::<Cpu1>().at(15), scratch);
+///
+/// // This won't build.
+/// // let cpu0_scratch_128 = io.read(CPU_SCRATCH::of::<Cpu0>().at(128)).value();
+///
+/// // Runtime-obtained array index.
+/// let scratch_idx = get_scratch_idx();
+/// // Access on a runtime index returns an error if it is out-of-bounds.
+/// let cpu0_scratch = io.read(
+///     CPU_SCRATCH::of::<Cpu0>().try_at(scratch_idx).ok_or(EINVAL)?
+/// ).value();
+/// # Ok(())
+/// # }
+///
+/// // Alias to `SCRATCH[8]` used to convey the firmware exit code.
+/// register! {
+///     /// Per-CPU firmware exit status code.
+///     pub CPU_FIRMWARE_STATUS(u32) => CpuCtlBase + CPU_SCRATCH[8] {
+///         7:0 status;
+///     }
+/// }
+///
+/// // Non-contiguous relative register arrays can be defined by adding a stride parameter.
+/// // Here, each of the 16 registers of the array is separated by 8 bytes, meaning that the
+/// // registers of the two declarations below are interleaved.
+/// register! {
+///     /// Scratch registers bank 0.
+///     pub CPU_SCRATCH_INTERLEAVED_0(u32)[16, stride = 8] @ CpuCtlBase + 0x00000d00 {
+///         31:0 value;
+///     }
+///
+///     /// Scratch registers bank 1.
+///     pub CPU_SCRATCH_INTERLEAVED_1(u32)[16, stride = 8] @ CpuCtlBase + 0x00000d04 {
+///         31:0 value;
+///     }
+/// }
+///
+/// # fn test2(io: &Mmio<0x1000>) -> Result<(), Error> {
+/// let cpu0_status = io.read(CPU_FIRMWARE_STATUS::of::<Cpu0>()).status();
+/// # Ok(())
+/// # }
+/// ```
+#[macro_export]
+macro_rules! register {
+    // Entry point for the macro, allowing multiple registers to be defined in one call.
+    // It matches all possible register declaration patterns to dispatch them to corresponding
+    // `@reg` rule that defines a single register.
+    (
+        $(
+            $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty)
+                $([ $size:expr $(, stride = $stride:expr)? ])?
+                $(@ $($base:ident +)? $offset:literal)?
+                $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )?
+            { $($fields:tt)* }
+        )*
+    ) => {
+        $(
+        $crate::register!(
+            @reg $(#[$attr])* $vis $name ($storage) $([$size $(, stride = $stride)?])?
+                $(@ $($base +)? $offset)?
+                $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )?
+            { $($fields)* }
+        );
+        )*
+    };
+
+    // All the rules below are private helpers.
+
+    // Creates a register at a fixed offset of the MMIO space.
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) @ $offset:literal
+            { $($fields:tt)* }
+    ) => {
+        $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* });
+        $crate::register!(@io_base $name($storage) @ $offset);
+        $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage));
+    };
+
+    // Creates an alias register of fixed offset register `alias` with its own fields.
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $alias:ident
+            { $($fields:tt)* }
+    ) => {
+        $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* });
+        $crate::register!(
+            @io_base $name($storage) @
+            <$alias as $crate::io::register::Register>::OFFSET
+        );
+        $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage));
+    };
+
+    // Creates a register at a relative offset from a base address provider.
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) @ $base:ident + $offset:literal
+            { $($fields:tt)* }
+    ) => {
+        $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* });
+        $crate::register!(@io_base $name($storage) @ $offset);
+        $crate::register!(@io_relative $vis $name($storage) @ $base);
+    };
+
+    // Creates an alias register of relative offset register `alias` with its own fields.
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $base:ident + $alias:ident
+            { $($fields:tt)* }
+    ) => {
+        $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* });
+        $crate::register!(
+            @io_base $name($storage) @ <$alias as $crate::io::register::Register>::OFFSET
+        );
+        $crate::register!(@io_relative $vis $name($storage) @ $base);
+    };
+
+    // Creates an array of registers at a fixed offset of the MMIO space.
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty)
+            [ $size:expr, stride = $stride:expr ] @ $offset:literal { $($fields:tt)* }
+    ) => {
+        ::kernel::static_assert!(::core::mem::size_of::<$storage>() <= $stride);
+
+        $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* });
+        $crate::register!(@io_base $name($storage) @ $offset);
+        $crate::register!(@io_array $vis $name($storage) [ $size, stride = $stride ]);
+    };
+
+    // Shortcut for contiguous array of registers (stride == size of element).
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) [ $size:expr ] @ $offset:literal
+            { $($fields:tt)* }
+    ) => {
+        $crate::register!(
+            $(#[$attr])* $vis $name($storage) [ $size, stride = ::core::mem::size_of::<$storage>() ]
+                @ $offset { $($fields)* }
+        );
+    };
+
+    // Creates an alias of register `idx` of array of registers `alias` with its own fields.
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $alias:ident [ $idx:expr ]
+            { $($fields:tt)* }
+    ) => {
+        ::kernel::static_assert!($idx < <$alias as $crate::io::register::RegisterArray>::SIZE);
+
+        $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* });
+        $crate::register!(
+            @io_base $name($storage) @
+            <$alias as $crate::io::register::Register>::OFFSET
+                + $idx * <$alias as $crate::io::register::RegisterArray>::STRIDE
+        );
+        $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage));
+    };
+
+    // Creates an array of registers at a relative offset from a base address provider.
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty)
+            [ $size:expr, stride = $stride:expr ]
+            @ $base:ident + $offset:literal { $($fields:tt)* }
+    ) => {
+        ::kernel::static_assert!(::core::mem::size_of::<$storage>() <= $stride);
+
+        $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* });
+        $crate::register!(@io_base $name($storage) @ $offset);
+        $crate::register!(
+            @io_relative_array $vis $name($storage) [ $size, stride = $stride ] @ $base + $offset
+        );
+    };
+
+    // Shortcut for contiguous array of relative registers (stride == size of element).
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) [ $size:expr ]
+            @ $base:ident + $offset:literal { $($fields:tt)* }
+    ) => {
+        $crate::register!(
+            $(#[$attr])* $vis $name($storage) [ $size, stride = ::core::mem::size_of::<$storage>() ]
+                @ $base + $offset { $($fields)* }
+        );
+    };
+
+    // Creates an alias of register `idx` of relative array of registers `alias` with its own
+    // fields.
+    (
+        @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty)
+            => $base:ident + $alias:ident [ $idx:expr ] { $($fields:tt)* }
+    ) => {
+        ::kernel::static_assert!($idx < <$alias as $crate::io::register::RegisterArray>::SIZE);
+
+        $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* });
+        $crate::register!(
+            @io_base $name($storage) @
+                <$alias as $crate::io::register::Register>::OFFSET +
+                $idx * <$alias as $crate::io::register::RegisterArray>::STRIDE
+        );
+        $crate::register!(@io_relative $vis $name($storage) @ $base);
+    };
+
+    // Generates the bitfield for the register.
+    //
+    // `#[allow(non_camel_case_types)]` is added since register names typically use
+    // `SCREAMING_CASE`.
+    (
+        @bitfield $(#[$attr:meta])* $vis:vis struct $name:ident($storage:ty) { $($fields:tt)* }
+    ) => {
+        $crate::register!(@bitfield_core
+            #[allow(non_camel_case_types)]
+            $(#[$attr])* $vis $name $storage
+        );
+        $crate::register!(@bitfield_fields $vis $name $storage { $($fields)* });
+    };
+
+    // Implementations shared by all registers types.
+    (@io_base $name:ident($storage:ty) @ $offset:expr) => {
+        impl $crate::io::register::Register for $name {
+            type Storage = $storage;
+
+            const OFFSET: usize = $offset;
+        }
+    };
+
+    // Implementations of fixed registers.
+    (@io_fixed $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty)) => {
+        impl $crate::io::register::FixedRegister for $name {}
+
+        $(#[$attr])*
+        $vis const $name: $crate::io::register::FixedRegisterLoc<$name> =
+            $crate::io::register::FixedRegisterLoc::<$name>::new();
+    };
+
+    // Implementations of relative registers.
+    (@io_relative $vis:vis $name:ident ($storage:ty) @ $base:ident) => {
+        impl $crate::io::register::WithBase for $name {
+            type BaseFamily = $base;
+        }
+
+        impl $crate::io::register::RelativeRegister for $name {}
+    };
+
+    // Implementations of register arrays.
+    (@io_array $vis:vis $name:ident ($storage:ty) [ $size:expr, stride = $stride:expr ]) => {
+        impl $crate::io::register::Array for $name {}
+
+        impl $crate::io::register::RegisterArray for $name {
+            const SIZE: usize = $size;
+            const STRIDE: usize = $stride;
+        }
+    };
+
+    // Implementations of relative array registers.
+    (
+        @io_relative_array $vis:vis $name:ident ($storage:ty) [ $size:expr, stride = $stride:expr ]
+            @ $base:ident + $offset:literal
+    ) => {
+        impl $crate::io::register::WithBase for $name {
+            type BaseFamily = $base;
+        }
+
+        impl $crate::io::register::RegisterArray for $name {
+            const SIZE: usize = $size;
+            const STRIDE: usize = $stride;
+        }
+
+        impl $crate::io::register::RelativeRegisterArray for $name {}
+    };
+
+    // Defines the wrapper `$name` type and its conversions from/to the storage type.
+    (@bitfield_core $(#[$attr:meta])* $vis:vis $name:ident $storage:ty) => {
+        $(#[$attr])*
+        #[repr(transparent)]
+        #[derive(Clone, Copy, PartialEq, Eq)]
+        $vis struct $name {
+            inner: $storage,
+        }
+
+        #[allow(dead_code)]
+        impl $name {
+            /// Creates a bitfield from a raw value.
+            #[inline(always)]
+            $vis const fn from_raw(value: $storage) -> Self {
+                Self{ inner: value }
+            }
+
+            /// Turns this bitfield into its raw value.
+            ///
+            /// This is similar to the [`From`] implementation, but is shorter to invoke in
+            /// most cases.
+            #[inline(always)]
+            $vis const fn into_raw(self) -> $storage {
+                self.inner
+            }
+        }
+
+        // SAFETY: `$storage` is `Zeroable` and `$name` is transparent.
+        unsafe impl ::pin_init::Zeroable for $name {}
+
+        impl ::core::convert::From<$name> for $storage {
+            #[inline(always)]
+            fn from(val: $name) -> $storage {
+                val.into_raw()
+            }
+        }
+
+        impl ::core::convert::From<$storage> for $name {
+            #[inline(always)]
+            fn from(val: $storage) -> $name {
+                Self::from_raw(val)
+            }
+        }
+    };
+
+    // Definitions requiring knowledge of individual fields: private and public field accessors,
+    // and `Debug` implementation.
+    (@bitfield_fields $vis:vis $name:ident $storage:ty {
+        $($(#[doc = $doc:expr])* $hi:literal:$lo:literal $field:ident
+            $(?=> $try_into_type:ty)?
+            $(=> $into_type:ty)?
+        ;
+        )*
+    }
+    ) => {
+        #[allow(dead_code)]
+        impl $name {
+        $(
+        $crate::register!(@private_field_accessors $vis $name $storage : $hi:$lo $field);
+        $crate::register!(
+            @public_field_accessors $(#[doc = $doc])* $vis $name $storage : $hi:$lo $field
+            $(?=> $try_into_type)?
+            $(=> $into_type)?
+        );
+        )*
+        }
+
+        $crate::register!(@debug $name { $($field;)* });
+    };
+
+    // Private field accessors working with the exact `Bounded` type for the field.
+    (
+        @private_field_accessors $vis:vis $name:ident $storage:ty : $hi:tt:$lo:tt $field:ident
+    ) => {
+        ::kernel::macros::paste!(
+        $vis const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi;
+        $vis const [<$field:upper _MASK>]: $storage =
+            ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1);
+        $vis const [<$field:upper _SHIFT>]: u32 = $lo;
+        );
+
+        ::kernel::macros::paste!(
+        fn [<__ $field>](self) ->
+            ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> {
+            // Left shift to align the field's MSB with the storage MSB.
+            const ALIGN_TOP: u32 = $storage::BITS - ($hi + 1);
+            // Right shift to move the top-aligned field to bit 0 of the storage.
+            const ALIGN_BOTTOM: u32 = ALIGN_TOP + $lo;
+
+            // Extract the field using two shifts. `Bounded::shr` produces the correctly-sized
+            // output type.
+            let val = ::kernel::num::Bounded::<$storage, { $storage::BITS }>::from(
+                self.inner << ALIGN_TOP
+            );
+            val.shr::<ALIGN_BOTTOM, { $hi + 1 - $lo } >()
+        }
+
+        const fn [<__with_ $field>](
+            mut self,
+            value: ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>,
+        ) -> Self
+        {
+            const MASK: $storage = <$name>::[<$field:upper _MASK>];
+            const SHIFT: u32 = <$name>::[<$field:upper _SHIFT>];
+
+            let value = value.get() << SHIFT;
+            self.inner = (self.inner & !MASK) | value;
+
+            self
+        }
+        );
+    };
+
+    // Public accessors for fields infallibly (`=>`) converted to a type.
+    (
+        @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty :
+            $hi:literal:$lo:literal $field:ident => $into_type:ty
+    ) => {
+        ::kernel::macros::paste!(
+
+        $(#[doc = $doc])*
+        #[doc = "Returns the value of this field."]
+        #[inline(always)]
+        $vis fn $field(self) -> $into_type
+        {
+            self.[<__ $field>]().into()
+        }
+
+        $(#[doc = $doc])*
+        #[doc = "Sets this field to the given `value`."]
+        #[inline(always)]
+        $vis fn [<with_ $field>](self, value: $into_type) -> Self
+        {
+            self.[<__with_ $field>](value.into())
+        }
+
+        );
+    };
+
+    // Public accessors for fields fallibly (`?=>`) converted to a type.
+    (
+        @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty :
+            $hi:tt:$lo:tt $field:ident ?=> $try_into_type:ty
+    ) => {
+        ::kernel::macros::paste!(
+
+        $(#[doc = $doc])*
+        #[doc = "Returns the value of this field."]
+        #[inline(always)]
+        $vis fn $field(self) ->
+            Result<
+                $try_into_type,
+                <$try_into_type as ::core::convert::TryFrom<
+                    ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>
+                >>::Error
+            >
+        {
+            self.[<__ $field>]().try_into()
+        }
+
+        $(#[doc = $doc])*
+        #[doc = "Sets this field to the given `value`."]
+        #[inline(always)]
+        $vis fn [<with_ $field>](self, value: $try_into_type) -> Self
+        {
+            self.[<__with_ $field>](value.into())
+        }
+
+        );
+    };
+
+    // Public accessors for fields not converted to a type.
+    (
+        @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty :
+            $hi:tt:$lo:tt $field:ident
+    ) => {
+        ::kernel::macros::paste!(
+
+        $(#[doc = $doc])*
+        #[doc = "Returns the value of this field."]
+        #[inline(always)]
+        $vis fn $field(self) ->
+            ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>
+        {
+            self.[<__ $field>]()
+        }
+
+        $(#[doc = $doc])*
+        #[doc = "Sets this field to the compile-time constant `VALUE`."]
+        #[inline(always)]
+        $vis const fn [<with_const_ $field>]<const VALUE: $storage>(self) -> Self {
+            self.[<__with_ $field>](
+                ::kernel::num::Bounded::<$storage, { $hi + 1 - $lo }>::new::<VALUE>()
+            )
+        }
+
+        $(#[doc = $doc])*
+        #[doc = "Sets this field to the given `value`."]
+        #[inline(always)]
+        $vis fn [<with_ $field>]<T>(
+            self,
+            value: T,
+        ) -> Self
+            where T: Into<::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>>,
+        {
+            self.[<__with_ $field>](value.into())
+        }
+
+        $(#[doc = $doc])*
+        #[doc = "Tries to set this field to `value`, returning an error if it is out of range."]
+        #[inline(always)]
+        $vis fn [<try_with_ $field>]<T>(
+            self,
+            value: T,
+        ) -> ::kernel::error::Result<Self>
+            where T: ::kernel::num::TryIntoBounded<$storage, { $hi + 1 - $lo }>,
+        {
+            Ok(
+                self.[<__with_ $field>](
+                    value.try_into_bounded().ok_or(::kernel::error::code::EOVERFLOW)?
+                )
+            )
+        }
+
+        );
+    };
+
+    // `Debug` implementation.
+    (@debug $name:ident { $($field:ident;)* }) => {
+        impl ::kernel::fmt::Debug for $name {
+            fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result {
+                f.debug_struct(stringify!($name))
+                    .field("<raw>", &::kernel::prelude::fmt!("{:#x}", self.inner))
+                $(
+                    .field(stringify!($field), &self.$field())
+                )*
+                    .finish()
+            }
+        }
+    };
+}
diff --git a/rust/kernel/irq/request.rs b/rust/kernel/irq/request.rs
index 7a36f790593e..f425fe12f7c8 100644
--- a/rust/kernel/irq/request.rs
+++ b/rust/kernel/irq/request.rs
@@ -27,7 +27,7 @@ pub enum IrqReturn {
 }
 
 /// Callbacks for an IRQ handler.
-pub trait Handler: Sync {
+pub trait Handler: Sync + 'static {
     /// The hard IRQ handler.
     ///
     /// This is executed in interrupt context, hence all corresponding
@@ -45,7 +45,7 @@ impl<T: ?Sized + Handler + Send> Handler for Arc<T> {
     }
 }
 
-impl<T: ?Sized + Handler, A: Allocator> Handler for Box<T, A> {
+impl<T: ?Sized + Handler, A: Allocator + 'static> Handler for Box<T, A> {
     fn handle(&self, device: &Device<Bound>) -> IrqReturn {
         T::handle(self, device)
     }
@@ -181,7 +181,7 @@ impl<'a> IrqRequest<'a> {
 ///
 /// * We own an irq handler whose cookie is a pointer to `Self`.
 #[pin_data]
-pub struct Registration<T: Handler + 'static> {
+pub struct Registration<T: Handler> {
     #[pin]
     inner: Devres<RegistrationInner>,
 
@@ -194,7 +194,7 @@ pub struct Registration<T: Handler + 'static> {
     _pin: PhantomPinned,
 }
 
-impl<T: Handler + 'static> Registration<T> {
+impl<T: Handler> Registration<T> {
     /// Registers the IRQ handler with the system for the given IRQ number.
     pub fn new<'a>(
         request: IrqRequest<'a>,
@@ -260,10 +260,7 @@ impl<T: Handler + 'static> Registration<T> {
 /// # Safety
 ///
 /// This function should be only used as the callback in `request_irq`.
-unsafe extern "C" fn handle_irq_callback<T: Handler + 'static>(
-    _irq: i32,
-    ptr: *mut c_void,
-) -> c_uint {
+unsafe extern "C" fn handle_irq_callback<T: Handler>(_irq: i32, ptr: *mut c_void) -> c_uint {
     // SAFETY: `ptr` is a pointer to `Registration<T>` set in `Registration::new`
     let registration = unsafe { &*(ptr as *const Registration<T>) };
     // SAFETY: The irq callback is removed before the device is unbound, so the fact that the irq
@@ -287,7 +284,7 @@ pub enum ThreadedIrqReturn {
 }
 
 /// Callbacks for a threaded IRQ handler.
-pub trait ThreadedHandler: Sync {
+pub trait ThreadedHandler: Sync + 'static {
     /// The hard IRQ handler.
     ///
     /// This is executed in interrupt context, hence all corresponding
@@ -318,7 +315,7 @@ impl<T: ?Sized + ThreadedHandler + Send> ThreadedHandler for Arc<T> {
     }
 }
 
-impl<T: ?Sized + ThreadedHandler, A: Allocator> ThreadedHandler for Box<T, A> {
+impl<T: ?Sized + ThreadedHandler, A: Allocator + 'static> ThreadedHandler for Box<T, A> {
     fn handle(&self, device: &Device<Bound>) -> ThreadedIrqReturn {
         T::handle(self, device)
     }
@@ -401,7 +398,7 @@ impl<T: ?Sized + ThreadedHandler, A: Allocator> ThreadedHandler for Box<T, A> {
 ///
 /// * We own an irq handler whose cookie is a pointer to `Self`.
 #[pin_data]
-pub struct ThreadedRegistration<T: ThreadedHandler + 'static> {
+pub struct ThreadedRegistration<T: ThreadedHandler> {
     #[pin]
     inner: Devres<RegistrationInner>,
 
@@ -414,7 +411,7 @@ pub struct ThreadedRegistration<T: ThreadedHandler + 'static> {
     _pin: PhantomPinned,
 }
 
-impl<T: ThreadedHandler + 'static> ThreadedRegistration<T> {
+impl<T: ThreadedHandler> ThreadedRegistration<T> {
     /// Registers the IRQ handler with the system for the given IRQ number.
     pub fn new<'a>(
         request: IrqRequest<'a>,
@@ -481,7 +478,7 @@ impl<T: ThreadedHandler + 'static> ThreadedRegistration<T> {
 /// # Safety
 ///
 /// This function should be only used as the callback in `request_threaded_irq`.
-unsafe extern "C" fn handle_threaded_irq_callback<T: ThreadedHandler + 'static>(
+unsafe extern "C" fn handle_threaded_irq_callback<T: ThreadedHandler>(
     _irq: i32,
     ptr: *mut c_void,
 ) -> c_uint {
@@ -497,10 +494,7 @@ unsafe extern "C" fn handle_threaded_irq_callback<T: ThreadedHandler + 'static>(
 /// # Safety
 ///
 /// This function should be only used as the callback in `request_threaded_irq`.
-unsafe extern "C" fn thread_fn_callback<T: ThreadedHandler + 'static>(
-    _irq: i32,
-    ptr: *mut c_void,
-) -> c_uint {
+unsafe extern "C" fn thread_fn_callback<T: ThreadedHandler>(_irq: i32, ptr: *mut c_void) -> c_uint {
     // SAFETY: `ptr` is a pointer to `ThreadedRegistration<T>` set in `ThreadedRegistration::new`
     let registration = unsafe { &*(ptr as *const ThreadedRegistration<T>) };
     // SAFETY: The irq callback is removed before the device is unbound, so the fact that the irq
diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs
index f93f24a60bdd..a1edf7491579 100644
--- a/rust/kernel/kunit.rs
+++ b/rust/kernel/kunit.rs
@@ -14,6 +14,10 @@ use crate::prelude::*;
 /// Public but hidden since it should only be used from KUnit generated code.
 #[doc(hidden)]
 pub fn err(args: fmt::Arguments<'_>) {
+    // `args` is unused if `CONFIG_PRINTK` is not set - this avoids a build-time warning.
+    #[cfg(not(CONFIG_PRINTK))]
+    let _ = args;
+
     // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we
     // are passing.
     #[cfg(CONFIG_PRINTK)]
@@ -30,6 +34,10 @@ pub fn err(args: fmt::Arguments<'_>) {
 /// Public but hidden since it should only be used from KUnit generated code.
 #[doc(hidden)]
 pub fn info(args: fmt::Arguments<'_>) {
+    // `args` is unused if `CONFIG_PRINTK` is not set - this avoids a build-time warning.
+    #[cfg(not(CONFIG_PRINTK))]
+    let _ = args;
+
     // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we
     // are passing.
     #[cfg(CONFIG_PRINTK)]
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 3da92f18f4ee..b72b2fbe046d 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -16,41 +16,17 @@
 // Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on
 // the unstable features in use.
 //
-// Stable since Rust 1.79.0.
-#![feature(generic_nonzero)]
-#![feature(inline_const)]
-#![feature(pointer_is_aligned)]
-//
-// Stable since Rust 1.80.0.
-#![feature(slice_flatten)]
-//
-// Stable since Rust 1.81.0.
-#![feature(lint_reasons)]
-//
-// Stable since Rust 1.82.0.
-#![feature(raw_ref_op)]
-//
-// Stable since Rust 1.83.0.
-#![feature(const_maybe_uninit_as_mut_ptr)]
-#![feature(const_mut_refs)]
-#![feature(const_option)]
-#![feature(const_ptr_write)]
-#![feature(const_refs_to_cell)]
+// Stable since Rust 1.89.0.
+#![feature(generic_arg_infer)]
 //
 // Expected to become stable.
 #![feature(arbitrary_self_types)]
+#![feature(derive_coerce_pointee)]
 //
 // To be determined.
 #![feature(used_with_arg)]
 //
-// `feature(derive_coerce_pointee)` is expected to become stable. Before Rust
-// 1.84.0, it did not exist, so enable the predecessor features.
-#![cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, feature(derive_coerce_pointee))]
-#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(coerce_unsized))]
-#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(dispatch_from_dyn))]
-#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(unsize))]
-//
-// `feature(file_with_nul)` is expected to become stable. Before Rust 1.89.0, it did not exist, so
+// `feature(file_with_nul)` is stable since Rust 1.92.0. Before Rust 1.89.0, it did not exist, so
 // enable it conditionally.
 #![cfg_attr(CONFIG_RUSTC_HAS_FILE_WITH_NUL, feature(file_with_nul))]
 
@@ -73,7 +49,6 @@ pub mod bits;
 #[cfg(CONFIG_BLOCK)]
 pub mod block;
 pub mod bug;
-#[doc(hidden)]
 pub mod build_assert;
 pub mod clk;
 #[cfg(CONFIG_CONFIGFS_FS)]
@@ -97,12 +72,15 @@ pub mod faux;
 pub mod firmware;
 pub mod fmt;
 pub mod fs;
+#[cfg(CONFIG_GPU_BUDDY = "y")]
+pub mod gpu;
 #[cfg(CONFIG_I2C = "y")]
 pub mod i2c;
 pub mod id_pool;
 #[doc(hidden)]
 pub mod impl_flags;
 pub mod init;
+pub mod interop;
 pub mod io;
 pub mod ioctl;
 pub mod iommu;
@@ -141,10 +119,8 @@ pub mod scatterlist;
 pub mod security;
 pub mod seq_file;
 pub mod sizes;
-pub mod slice;
 #[cfg(CONFIG_SOC_BUS)]
 pub mod soc;
-mod static_assert;
 #[doc(hidden)]
 pub mod std_vendor;
 pub mod str;
diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs
index 8349ff32fc37..406e3a028c55 100644
--- a/rust/kernel/list.rs
+++ b/rust/kernel/list.rs
@@ -12,15 +12,31 @@ use core::ptr;
 use pin_init::PinInit;
 
 mod impl_list_item_mod;
+#[doc(inline)]
 pub use self::impl_list_item_mod::{
-    impl_has_list_links, impl_has_list_links_self_ptr, impl_list_item, HasListLinks, HasSelfPtr,
+    impl_has_list_links,
+    impl_has_list_links_self_ptr,
+    impl_list_item,
+    HasListLinks,
+    HasSelfPtr, //
 };
 
 mod arc;
-pub use self::arc::{impl_list_arc_safe, AtomicTracker, ListArc, ListArcSafe, TryNewListArc};
+#[doc(inline)]
+pub use self::arc::{
+    impl_list_arc_safe,
+    AtomicTracker,
+    ListArc,
+    ListArcSafe,
+    TryNewListArc, //
+};
 
 mod arc_field;
-pub use self::arc_field::{define_list_arc_field_getter, ListArcField};
+#[doc(inline)]
+pub use self::arc_field::{
+    define_list_arc_field_getter,
+    ListArcField, //
+};
 
 /// A linked list.
 ///
diff --git a/rust/kernel/list/arc.rs b/rust/kernel/list/arc.rs
index 2282f33913ee..209b1173c826 100644
--- a/rust/kernel/list/arc.rs
+++ b/rust/kernel/list/arc.rs
@@ -6,7 +6,7 @@
 
 use crate::alloc::{AllocError, Flags};
 use crate::prelude::*;
-use crate::sync::atomic::{ordering, Atomic};
+use crate::sync::atomic::{ordering, AtomicFlag};
 use crate::sync::{Arc, ArcBorrow, UniqueArc};
 use core::marker::PhantomPinned;
 use core::ops::Deref;
@@ -82,6 +82,7 @@ pub unsafe trait TryNewListArc<const ID: u64 = 0>: ListArcSafe<ID> {
 /// [`AtomicTracker`]. However, it is also possible to defer the tracking to another struct
 /// using also using this macro.
 #[macro_export]
+#[doc(hidden)]
 macro_rules! impl_list_arc_safe {
     (impl$({$($generics:tt)*})? ListArcSafe<$num:tt> for $t:ty { untracked; } $($rest:tt)*) => {
         impl$(<$($generics)*>)? $crate::list::ListArcSafe<$num> for $t {
@@ -159,7 +160,7 @@ pub use impl_list_arc_safe;
 ///
 /// [`List`]: crate::list::List
 #[repr(transparent)]
-#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, derive(core::marker::CoercePointee))]
+#[derive(core::marker::CoercePointee)]
 pub struct ListArc<T, const ID: u64 = 0>
 where
     T: ListArcSafe<ID> + ?Sized,
@@ -442,26 +443,6 @@ where
     }
 }
 
-// This is to allow coercion from `ListArc<T>` to `ListArc<U>` if `T` can be converted to the
-// dynamically-sized type (DST) `U`.
-#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
-impl<T, U, const ID: u64> core::ops::CoerceUnsized<ListArc<U, ID>> for ListArc<T, ID>
-where
-    T: ListArcSafe<ID> + core::marker::Unsize<U> + ?Sized,
-    U: ListArcSafe<ID> + ?Sized,
-{
-}
-
-// This is to allow `ListArc<U>` to be dispatched on when `ListArc<T>` can be coerced into
-// `ListArc<U>`.
-#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
-impl<T, U, const ID: u64> core::ops::DispatchFromDyn<ListArc<U, ID>> for ListArc<T, ID>
-where
-    T: ListArcSafe<ID> + core::marker::Unsize<U> + ?Sized,
-    U: ListArcSafe<ID> + ?Sized,
-{
-}
-
 /// A utility for tracking whether a [`ListArc`] exists using an atomic.
 ///
 /// # Invariants
@@ -469,7 +450,7 @@ where
 /// If the boolean is `false`, then there is no [`ListArc`] for this value.
 #[repr(transparent)]
 pub struct AtomicTracker<const ID: u64 = 0> {
-    inner: Atomic<bool>,
+    inner: AtomicFlag,
     // This value needs to be pinned to justify the INVARIANT: comment in `AtomicTracker::new`.
     _pin: PhantomPinned,
 }
@@ -480,12 +461,12 @@ impl<const ID: u64> AtomicTracker<ID> {
         // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will
         // not be constructed in an `Arc` that already has a `ListArc`.
         Self {
-            inner: Atomic::new(false),
+            inner: AtomicFlag::new(false),
             _pin: PhantomPinned,
         }
     }
 
-    fn project_inner(self: Pin<&mut Self>) -> &mut Atomic<bool> {
+    fn project_inner(self: Pin<&mut Self>) -> &mut AtomicFlag {
         // SAFETY: The `inner` field is not structurally pinned, so we may obtain a mutable
         // reference to it even if we only have a pinned reference to `self`.
         unsafe { &mut Pin::into_inner_unchecked(self).inner }
diff --git a/rust/kernel/list/arc_field.rs b/rust/kernel/list/arc_field.rs
index c4b9dd503982..2ad8aea55993 100644
--- a/rust/kernel/list/arc_field.rs
+++ b/rust/kernel/list/arc_field.rs
@@ -66,6 +66,7 @@ impl<T, const ID: u64> ListArcField<T, ID> {
 
 /// Defines getters for a [`ListArcField`].
 #[macro_export]
+#[doc(hidden)]
 macro_rules! define_list_arc_field_getter {
     ($pub:vis fn $name:ident(&self $(<$id:tt>)?) -> &$typ:ty { $field:ident }
      $($rest:tt)*
diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs
index ee53d0387e63..5a3eac9f3cf0 100644
--- a/rust/kernel/list/impl_list_item_mod.rs
+++ b/rust/kernel/list/impl_list_item_mod.rs
@@ -29,6 +29,7 @@ pub unsafe trait HasListLinks<const ID: u64 = 0> {
 
 /// Implements the [`HasListLinks`] trait for the given type.
 #[macro_export]
+#[doc(hidden)]
 macro_rules! impl_has_list_links {
     ($(impl$({$($generics:tt)*})?
        HasListLinks$(<$id:tt>)?
@@ -74,6 +75,7 @@ where
 
 /// Implements the [`HasListLinks`] and [`HasSelfPtr`] traits for the given type.
 #[macro_export]
+#[doc(hidden)]
 macro_rules! impl_has_list_links_self_ptr {
     ($(impl$({$($generics:tt)*})?
        HasSelfPtr<$item_type:ty $(, $id:tt)?>
@@ -181,6 +183,7 @@ pub use impl_has_list_links_self_ptr;
 /// }
 /// ```
 #[macro_export]
+#[doc(hidden)]
 macro_rules! impl_list_item {
     (
         $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $self:ty {
diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs
index da21d65ccd20..63eb730b0b05 100644
--- a/rust/kernel/mm/virt.rs
+++ b/rust/kernel/mm/virt.rs
@@ -113,7 +113,7 @@ impl VmaRef {
     /// kernel goes further in freeing unused page tables, but for the purposes of this operation
     /// we must only assume that the leaf level is cleared.
     #[inline]
-    pub fn zap_page_range_single(&self, address: usize, size: usize) {
+    pub fn zap_vma_range(&self, address: usize, size: usize) {
         let (end, did_overflow) = address.overflowing_add(size);
         if did_overflow || address < self.start() || self.end() < end {
             // TODO: call WARN_ONCE once Rust version of it is added
@@ -123,9 +123,7 @@ impl VmaRef {
         // SAFETY: By the type invariants, the caller has read access to this VMA, which is
         // sufficient for this method call. This method has no requirements on the vma flags. The
         // address range is checked to be within the vma.
-        unsafe {
-            bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut())
-        };
+        unsafe { bindings::zap_vma_range(self.as_ptr(), address, size) };
     }
 
     /// If the [`VM_MIXEDMAP`] flag is set, returns a [`VmaMixedMap`] to this VMA, otherwise
diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs
index fa81acbdc8c2..f9f90d6ec482 100644
--- a/rust/kernel/num/bounded.rs
+++ b/rust/kernel/num/bounded.rs
@@ -255,9 +255,7 @@ macro_rules! impl_const_new {
             /// ```
             pub const fn new<const VALUE: $type>() -> Self {
                 // Statically assert that `VALUE` fits within the set number of bits.
-                const {
-                    assert!(fits_within!(VALUE, $type, N));
-                }
+                const_assert!(fits_within!(VALUE, $type, N));
 
                 // SAFETY: `fits_within` confirmed that `VALUE` can be represented within
                 // `N` bits.
@@ -287,12 +285,10 @@ where
     /// The caller must ensure that `value` can be represented within `N` bits.
     const unsafe fn __new(value: T) -> Self {
         // Enforce the type invariants.
-        const {
-            // `N` cannot be zero.
-            assert!(N != 0);
-            // The backing type is at least as large as `N` bits.
-            assert!(N <= T::BITS);
-        }
+        // `N` cannot be zero.
+        const_assert!(N != 0);
+        // The backing type is at least as large as `N` bits.
+        const_assert!(N <= T::BITS);
 
         // INVARIANT: The caller ensures `value` fits within `N` bits.
         Self(value)
@@ -379,6 +375,9 @@ where
 
     /// Returns the wrapped value as the backing type.
     ///
+    /// This is similar to the [`Deref`] implementation, but doesn't enforce the size invariant of
+    /// the [`Bounded`], which might produce slightly less optimal code.
+    ///
     /// # Examples
     ///
     /// ```
@@ -387,8 +386,8 @@ where
     /// let v = Bounded::<u32, 4>::new::<7>();
     /// assert_eq!(v.get(), 7u32);
     /// ```
-    pub fn get(self) -> T {
-        *self.deref()
+    pub const fn get(self) -> T {
+        self.0
     }
 
     /// Increases the number of bits usable for `self`.
@@ -406,12 +405,10 @@ where
     /// assert_eq!(larger_v, v);
     /// ```
     pub const fn extend<const M: u32>(self) -> Bounded<T, M> {
-        const {
-            assert!(
-                M >= N,
-                "Requested number of bits is less than the current representation."
-            );
-        }
+        const_assert!(
+            M >= N,
+            "Requested number of bits is less than the current representation."
+        );
 
         // SAFETY: The value did fit within `N` bits, so it will all the more fit within
         // the larger `M` bits.
@@ -473,6 +470,48 @@ where
         // `N` bits, and with the same signedness.
         unsafe { Bounded::__new(value) }
     }
+
+    /// Right-shifts `self` by `SHIFT` and returns the result as a `Bounded<_, RES>`, where `RES >=
+    /// N - SHIFT`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use kernel::num::Bounded;
+    ///
+    /// let v = Bounded::<u32, 16>::new::<0xff00>();
+    /// let v_shifted: Bounded::<u32, 8> = v.shr::<8, _>();
+    ///
+    /// assert_eq!(v_shifted.get(), 0xff);
+    /// ```
+    pub fn shr<const SHIFT: u32, const RES: u32>(self) -> Bounded<T, RES> {
+        const { assert!(RES + SHIFT >= N) }
+
+        // SAFETY: We shift the value right by `SHIFT`, reducing the number of bits needed to
+        // represent the shifted value by as much, and just asserted that `RES >= N - SHIFT`.
+        unsafe { Bounded::__new(self.0 >> SHIFT) }
+    }
+
+    /// Left-shifts `self` by `SHIFT` and returns the result as a `Bounded<_, RES>`, where `RES >=
+    /// N + SHIFT`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use kernel::num::Bounded;
+    ///
+    /// let v = Bounded::<u32, 8>::new::<0xff>();
+    /// let v_shifted: Bounded::<u32, 16> = v.shl::<8, _>();
+    ///
+    /// assert_eq!(v_shifted.get(), 0xff00);
+    /// ```
+    pub fn shl<const SHIFT: u32, const RES: u32>(self) -> Bounded<T, RES> {
+        const { assert!(RES >= N + SHIFT) }
+
+        // SAFETY: We shift the value left by `SHIFT`, augmenting the number of bits needed to
+        // represent the shifted value by as much, and just asserted that `RES >= N + SHIFT`.
+        unsafe { Bounded::__new(self.0 << SHIFT) }
+    }
 }
 
 impl<T, const N: u32> Deref for Bounded<T, N>
@@ -1059,3 +1098,24 @@ where
         unsafe { Self::__new(T::from(value)) }
     }
 }
+
+impl<T> Bounded<T, 1>
+where
+    T: Integer + Zeroable,
+{
+    /// Converts this [`Bounded`] into a [`bool`].
+    ///
+    /// This is a shorter way of writing `bool::from(self)`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use kernel::num::Bounded;
+    ///
+    /// assert_eq!(Bounded::<u8, 1>::new::<0>().into_bool(), false);
+    /// assert_eq!(Bounded::<u8, 1>::new::<1>().into_bool(), true);
+    /// ```
+    pub fn into_bool(self) -> bool {
+        self.into()
+    }
+}
diff --git a/rust/kernel/pci/io.rs b/rust/kernel/pci/io.rs
index 6ca4cf75594c..ae78676c927f 100644
--- a/rust/kernel/pci/io.rs
+++ b/rust/kernel/pci/io.rs
@@ -8,8 +8,6 @@ use crate::{
     device,
     devres::Devres,
     io::{
-        define_read,
-        define_write,
         Io,
         IoCapable,
         IoKnownSize,
@@ -85,67 +83,41 @@ pub struct ConfigSpace<'a, S: ConfigSpaceKind = Extended> {
     _marker: PhantomData<S>,
 }
 
-/// Internal helper macros used to invoke C PCI configuration space read functions.
-///
-/// This macro is intended to be used by higher-level PCI configuration space access macros
-/// (define_read) and provides a unified expansion for infallible vs. fallible read semantics. It
-/// emits a direct call into the corresponding C helper and performs the required cast to the Rust
-/// return type.
-///
-/// # Parameters
-///
-/// * `$c_fn` – The C function performing the PCI configuration space write.
-/// * `$self` – The I/O backend object.
-/// * `$ty` – The type of the value to read.
-/// * `$addr` – The PCI configuration space offset to read.
-///
-/// This macro does not perform any validation; all invariants must be upheld by the higher-level
-/// abstraction invoking it.
-macro_rules! call_config_read {
-    (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr) => {{
-        let mut val: $ty = 0;
-        // SAFETY: By the type invariant `$self.pdev` is a valid address.
-        // CAST: The offset is cast to `i32` because the C functions expect a 32-bit signed offset
-        // parameter. PCI configuration space size is at most 4096 bytes, so the value always fits
-        // within `i32` without truncation or sign change.
-        // Return value from C function is ignored in infallible accessors.
-        let _ret = unsafe { bindings::$c_fn($self.pdev.as_raw(), $addr as i32, &mut val) };
-        val
-    }};
-}
+/// Implements [`IoCapable`] on [`ConfigSpace`] for `$ty` using `$read_fn` and `$write_fn`.
+macro_rules! impl_config_space_io_capable {
+    ($ty:ty, $read_fn:ident, $write_fn:ident) => {
+        impl<'a, S: ConfigSpaceKind> IoCapable<$ty> for ConfigSpace<'a, S> {
+            unsafe fn io_read(&self, address: usize) -> $ty {
+                let mut val: $ty = 0;
+
+                // Return value from C function is ignored in infallible accessors.
+                let _ret =
+                    // SAFETY: By the type invariant `self.pdev` is a valid address.
+                    // CAST: The offset is cast to `i32` because the C functions expect a 32-bit
+                    // signed offset parameter. PCI configuration space size is at most 4096 bytes,
+                    // so the value always fits within `i32` without truncation or sign change.
+                    unsafe { bindings::$read_fn(self.pdev.as_raw(), address as i32, &mut val) };
+
+                val
+            }
 
-/// Internal helper macros used to invoke C PCI configuration space write functions.
-///
-/// This macro is intended to be used by higher-level PCI configuration space access macros
-/// (define_write) and provides a unified expansion for infallible vs. fallible read semantics. It
-/// emits a direct call into the corresponding C helper and performs the required cast to the Rust
-/// return type.
-///
-/// # Parameters
-///
-/// * `$c_fn` – The C function performing the PCI configuration space write.
-/// * `$self` – The I/O backend object.
-/// * `$ty` – The type of the written value.
-/// * `$addr` – The configuration space offset to write.
-/// * `$value` – The value to write.
-///
-/// This macro does not perform any validation; all invariants must be upheld by the higher-level
-/// abstraction invoking it.
-macro_rules! call_config_write {
-    (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => {
-        // SAFETY: By the type invariant `$self.pdev` is a valid address.
-        // CAST: The offset is cast to `i32` because the C functions expect a 32-bit signed offset
-        // parameter. PCI configuration space size is at most 4096 bytes, so the value always fits
-        // within `i32` without truncation or sign change.
-        // Return value from C function is ignored in infallible accessors.
-        let _ret = unsafe { bindings::$c_fn($self.pdev.as_raw(), $addr as i32, $value) };
+            unsafe fn io_write(&self, value: $ty, address: usize) {
+                // Return value from C function is ignored in infallible accessors.
+                let _ret =
+                    // SAFETY: By the type invariant `self.pdev` is a valid address.
+                    // CAST: The offset is cast to `i32` because the C functions expect a 32-bit
+                    // signed offset parameter. PCI configuration space size is at most 4096 bytes,
+                    // so the value always fits within `i32` without truncation or sign change.
+                    unsafe { bindings::$write_fn(self.pdev.as_raw(), address as i32, value) };
+            }
+        }
     };
 }
 
 // PCI configuration space supports 8, 16, and 32-bit accesses.
-impl<'a, S: ConfigSpaceKind> IoCapable<u8> for ConfigSpace<'a, S> {}
-impl<'a, S: ConfigSpaceKind> IoCapable<u16> for ConfigSpace<'a, S> {}
-impl<'a, S: ConfigSpaceKind> IoCapable<u32> for ConfigSpace<'a, S> {}
+impl_config_space_io_capable!(u8, pci_read_config_byte, pci_write_config_byte);
+impl_config_space_io_capable!(u16, pci_read_config_word, pci_write_config_word);
+impl_config_space_io_capable!(u32, pci_read_config_dword, pci_write_config_dword);
 
 impl<'a, S: ConfigSpaceKind> Io for ConfigSpace<'a, S> {
     /// Returns the base address of the I/O region. It is always 0 for configuration space.
@@ -159,17 +131,6 @@ impl<'a, S: ConfigSpaceKind> Io for ConfigSpace<'a, S> {
     fn maxsize(&self) -> usize {
         self.pdev.cfg_size().into_raw()
     }
-
-    // PCI configuration space does not support fallible operations.
-    // The default implementations from the Io trait are not used.
-
-    define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8);
-    define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16);
-    define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32);
-
-    define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8);
-    define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16);
-    define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32);
 }
 
 impl<'a, S: ConfigSpaceKind> IoKnownSize for ConfigSpace<'a, S> {
diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs
index 2877e3f7b6d3..44edf72a4a24 100644
--- a/rust/kernel/prelude.rs
+++ b/rust/kernel/prelude.rs
@@ -13,43 +13,97 @@
 
 #[doc(no_inline)]
 pub use core::{
-    mem::{align_of, align_of_val, size_of, size_of_val},
-    pin::Pin,
+    mem::{
+        align_of,
+        align_of_val,
+        size_of,
+        size_of_val, //
+    },
+    pin::Pin, //
 };
 
 pub use ::ffi::{
-    c_char, c_int, c_long, c_longlong, c_schar, c_short, c_uchar, c_uint, c_ulong, c_ulonglong,
-    c_ushort, c_void, CStr,
+    c_char,
+    c_int,
+    c_long,
+    c_longlong,
+    c_schar,
+    c_short,
+    c_uchar,
+    c_uint,
+    c_ulong,
+    c_ulonglong,
+    c_ushort,
+    c_void,
+    CStr, //
 };
 
-pub use crate::alloc::{flags::*, Box, KBox, KVBox, KVVec, KVec, VBox, VVec, Vec};
-
 #[doc(no_inline)]
-pub use macros::{export, fmt, kunit_tests, module, vtable};
+pub use macros::{
+    export,
+    fmt,
+    kunit_tests,
+    module,
+    vtable, //
+};
 
-pub use pin_init::{init, pin_data, pin_init, pinned_drop, InPlaceWrite, Init, PinInit, Zeroable};
+pub use pin_init::{
+    init,
+    pin_data,
+    pin_init,
+    pinned_drop,
+    InPlaceWrite,
+    Init,
+    PinInit,
+    Zeroable, //
+};
 
-pub use super::{build_assert, build_error};
+pub use super::{
+    alloc::{
+        flags::*,
+        Box,
+        KBox,
+        KVBox,
+        KVVec,
+        KVec,
+        VBox,
+        VVec,
+        Vec, //
+    },
+    build_assert,
+    build_error,
+    const_assert,
+    current,
+    dev_alert,
+    dev_crit,
+    dev_dbg,
+    dev_emerg,
+    dev_err,
+    dev_info,
+    dev_notice,
+    dev_warn,
+    error::{
+        code::*,
+        Error,
+        Result, //
+    },
+    init::InPlaceInit,
+    pr_alert,
+    pr_crit,
+    pr_debug,
+    pr_emerg,
+    pr_err,
+    pr_info,
+    pr_notice,
+    pr_warn,
+    static_assert,
+    str::CStrExt as _,
+    try_init,
+    try_pin_init,
+    uaccess::UserPtr,
+    ThisModule, //
+};
 
 // `super::std_vendor` is hidden, which makes the macro inline for some reason.
 #[doc(no_inline)]
 pub use super::dbg;
-pub use super::{dev_alert, dev_crit, dev_dbg, dev_emerg, dev_err, dev_info, dev_notice, dev_warn};
-pub use super::{pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn};
-
-pub use super::{try_init, try_pin_init};
-
-pub use super::static_assert;
-
-pub use super::error::{code::*, Error, Result};
-
-pub use super::{str::CStrExt as _, ThisModule};
-
-pub use super::init::InPlaceInit;
-
-pub use super::current;
-
-pub use super::uaccess::UserPtr;
-
-#[cfg(not(CONFIG_RUSTC_HAS_SLICE_AS_FLATTENED))]
-pub use super::slice::AsFlattened;
diff --git a/rust/kernel/ptr.rs b/rust/kernel/ptr.rs
index 5b6a382637fe..3f3e529e9f58 100644
--- a/rust/kernel/ptr.rs
+++ b/rust/kernel/ptr.rs
@@ -2,9 +2,17 @@
 
 //! Types and functions to work with pointers and addresses.
 
-use core::mem::align_of;
+pub mod projection;
+pub use crate::project_pointer as project;
+
+use core::mem::{
+    align_of,
+    size_of, //
+};
 use core::num::NonZero;
 
+use crate::const_assert;
+
 /// Type representing an alignment, which is always a power of two.
 ///
 /// It is used to validate that a given value is a valid alignment, and to perform masking and
@@ -38,12 +46,10 @@ impl Alignment {
     /// ```
     #[inline(always)]
     pub const fn new<const ALIGN: usize>() -> Self {
-        const {
-            assert!(
-                ALIGN.is_power_of_two(),
-                "Provided alignment is not a power of two."
-            );
-        }
+        const_assert!(
+            ALIGN.is_power_of_two(),
+            "Provided alignment is not a power of two."
+        );
 
         // INVARIANT: `align` is a power of two.
         // SAFETY: `align` is a power of two, and thus non-zero.
@@ -81,7 +87,6 @@ impl Alignment {
     /// This is equivalent to [`align_of`], but with the return value provided as an [`Alignment`].
     #[inline(always)]
     pub const fn of<T>() -> Self {
-        #![allow(clippy::incompatible_msrv)]
         // This cannot panic since alignments are always powers of two.
         //
         // We unfortunately cannot use `new` as it would require the `generic_const_exprs` feature.
@@ -225,3 +230,54 @@ macro_rules! impl_alignable_uint {
 }
 
 impl_alignable_uint!(u8, u16, u32, u64, usize);
+
+/// Trait to represent compile-time known size information.
+///
+/// This is a generalization of [`size_of`] that works for dynamically sized types.
+pub trait KnownSize {
+    /// Get the size of an object of this type in bytes, with the metadata of the given pointer.
+    fn size(p: *const Self) -> usize;
+}
+
+impl<T> KnownSize for T {
+    #[inline(always)]
+    fn size(_: *const Self) -> usize {
+        size_of::<T>()
+    }
+}
+
+impl<T> KnownSize for [T] {
+    #[inline(always)]
+    fn size(p: *const Self) -> usize {
+        p.len() * size_of::<T>()
+    }
+}
+
+/// Aligns `value` up to `align`.
+///
+/// This is the const-compatible equivalent of [`Alignable::align_up`].
+///
+/// Returns [`None`] on overflow.
+///
+/// # Examples
+///
+/// ```
+/// use kernel::{
+///     ptr::{
+///         const_align_up,
+///         Alignment, //
+///     },
+///     sizes::SZ_4K, //
+/// };
+///
+/// assert_eq!(const_align_up(0x4f, Alignment::new::<16>()), Some(0x50));
+/// assert_eq!(const_align_up(0x40, Alignment::new::<16>()), Some(0x40));
+/// assert_eq!(const_align_up(1, Alignment::new::<SZ_4K>()), Some(SZ_4K));
+/// ```
+#[inline(always)]
+pub const fn const_align_up(value: usize, align: Alignment) -> Option<usize> {
+    match value.checked_add(align.as_usize() - 1) {
+        Some(v) => Some(v & align.mask()),
+        None => None,
+    }
+}
diff --git a/rust/kernel/ptr/projection.rs b/rust/kernel/ptr/projection.rs
new file mode 100644
index 000000000000..140ea8e21617
--- /dev/null
+++ b/rust/kernel/ptr/projection.rs
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Infrastructure for handling projections.
+
+use core::{
+    mem::MaybeUninit,
+    ops::Deref, //
+};
+
+use crate::prelude::*;
+
+/// Error raised when a projection is attempted on an array or slice out of bounds.
+pub struct OutOfBound;
+
+impl From<OutOfBound> for Error {
+    #[inline(always)]
+    fn from(_: OutOfBound) -> Self {
+        ERANGE
+    }
+}
+
+/// A helper trait to perform index projection.
+///
+/// This is similar to [`core::slice::SliceIndex`], but operates on raw pointers safely and
+/// fallibly.
+///
+/// # Safety
+///
+/// The implementation of `index` and `get` (if [`Some`] is returned) must ensure that, if provided
+/// input pointer `slice` and returned pointer `output`, then:
+/// - `output` has the same provenance as `slice`;
+/// - `output.byte_offset_from(slice)` is between 0 to
+///   `KnownSize::size(slice) - KnownSize::size(output)`.
+///
+/// This means that if the input pointer is valid, then pointer returned by `get` or `index` is
+/// also valid.
+#[diagnostic::on_unimplemented(message = "`{Self}` cannot be used to index `{T}`")]
+#[doc(hidden)]
+pub unsafe trait ProjectIndex<T: ?Sized>: Sized {
+    type Output: ?Sized;
+
+    /// Returns an index-projected pointer, if in bounds.
+    fn get(self, slice: *mut T) -> Option<*mut Self::Output>;
+
+    /// Returns an index-projected pointer; fail the build if it cannot be proved to be in bounds.
+    #[inline(always)]
+    fn index(self, slice: *mut T) -> *mut Self::Output {
+        Self::get(self, slice).unwrap_or_else(|| build_error!())
+    }
+}
+
+// Forward array impl to slice impl.
+//
+// SAFETY: Safety requirement guaranteed by the forwarded impl.
+unsafe impl<T, I, const N: usize> ProjectIndex<[T; N]> for I
+where
+    I: ProjectIndex<[T]>,
+{
+    type Output = <I as ProjectIndex<[T]>>::Output;
+
+    #[inline(always)]
+    fn get(self, slice: *mut [T; N]) -> Option<*mut Self::Output> {
+        <I as ProjectIndex<[T]>>::get(self, slice)
+    }
+
+    #[inline(always)]
+    fn index(self, slice: *mut [T; N]) -> *mut Self::Output {
+        <I as ProjectIndex<[T]>>::index(self, slice)
+    }
+}
+
+// SAFETY: `get`-returned pointer has the same provenance as `slice` and the offset is checked to
+// not exceed the required bound.
+unsafe impl<T> ProjectIndex<[T]> for usize {
+    type Output = T;
+
+    #[inline(always)]
+    fn get(self, slice: *mut [T]) -> Option<*mut T> {
+        if self >= slice.len() {
+            None
+        } else {
+            Some(slice.cast::<T>().wrapping_add(self))
+        }
+    }
+}
+
+// SAFETY: `get`-returned pointer has the same provenance as `slice` and the offset is checked to
+// not exceed the required bound.
+unsafe impl<T> ProjectIndex<[T]> for core::ops::Range<usize> {
+    type Output = [T];
+
+    #[inline(always)]
+    fn get(self, slice: *mut [T]) -> Option<*mut [T]> {
+        let new_len = self.end.checked_sub(self.start)?;
+        if self.end > slice.len() {
+            return None;
+        }
+        Some(core::ptr::slice_from_raw_parts_mut(
+            slice.cast::<T>().wrapping_add(self.start),
+            new_len,
+        ))
+    }
+}
+
+// SAFETY: Safety requirement guaranteed by the forwarded impl.
+unsafe impl<T> ProjectIndex<[T]> for core::ops::RangeTo<usize> {
+    type Output = [T];
+
+    #[inline(always)]
+    fn get(self, slice: *mut [T]) -> Option<*mut [T]> {
+        (0..self.end).get(slice)
+    }
+}
+
+// SAFETY: Safety requirement guaranteed by the forwarded impl.
+unsafe impl<T> ProjectIndex<[T]> for core::ops::RangeFrom<usize> {
+    type Output = [T];
+
+    #[inline(always)]
+    fn get(self, slice: *mut [T]) -> Option<*mut [T]> {
+        (self.start..slice.len()).get(slice)
+    }
+}
+
+// SAFETY: `get` returned the pointer as is, so it always has the same provenance and offset of 0.
+unsafe impl<T> ProjectIndex<[T]> for core::ops::RangeFull {
+    type Output = [T];
+
+    #[inline(always)]
+    fn get(self, slice: *mut [T]) -> Option<*mut [T]> {
+        Some(slice)
+    }
+}
+
+/// A helper trait to perform field projection.
+///
+/// This trait has a `DEREF` generic parameter so it can be implemented twice for types that
+/// implement [`Deref`]. This will cause an ambiguity error and thus block [`Deref`] types being
+/// used as base of projection, as they can inject unsoundness. Users therefore must not specify
+/// `DEREF` and should always leave it to be inferred.
+///
+/// # Safety
+///
+/// `proj` may only invoke `f` with a valid allocation, as the documentation of [`Self::proj`]
+/// describes.
+#[doc(hidden)]
+pub unsafe trait ProjectField<const DEREF: bool> {
+    /// Project a pointer to a type to a pointer of a field.
+    ///
+    /// `f` may only be invoked with a valid allocation so it can safely obtain raw pointers to
+    /// fields using `&raw mut`.
+    ///
+    /// This is needed because `base` might not point to a valid allocation, while `&raw mut`
+    /// requires pointers to be in bounds of a valid allocation.
+    ///
+    /// # Safety
+    ///
+    /// `f` must return a pointer in bounds of the provided pointer.
+    unsafe fn proj<F>(base: *mut Self, f: impl FnOnce(*mut Self) -> *mut F) -> *mut F;
+}
+
+// NOTE: in theory, this API should work for `T: ?Sized` and `F: ?Sized`, too. However, we cannot
+// currently support that as we need to obtain a valid allocation that `&raw const` can operate on.
+//
+// SAFETY: `proj` invokes `f` with valid allocation.
+unsafe impl<T> ProjectField<false> for T {
+    #[inline(always)]
+    unsafe fn proj<F>(base: *mut Self, f: impl FnOnce(*mut Self) -> *mut F) -> *mut F {
+        // Create a valid allocation to start projection, as `base` is not necessarily so. The
+        // memory is never actually used so it will be optimized out, so it should work even for
+        // very large `T` (`memoffset` crate also relies on this). To be extra certain, we also
+        // annotate `f` closure with `#[inline(always)]` in the macro.
+        let mut place = MaybeUninit::uninit();
+        let place_base = place.as_mut_ptr();
+        let field = f(place_base);
+        // SAFETY: `field` is in bounds from `base` per safety requirement.
+        let offset = unsafe { field.byte_offset_from(place_base) };
+        // Use `wrapping_byte_offset` as `base` does not need to be of valid allocation.
+        base.wrapping_byte_offset(offset).cast()
+    }
+}
+
+// SAFETY: Vacuously satisfied.
+unsafe impl<T: Deref> ProjectField<true> for T {
+    #[inline(always)]
+    unsafe fn proj<F>(_: *mut Self, _: impl FnOnce(*mut Self) -> *mut F) -> *mut F {
+        build_error!("this function is a guard against `Deref` impl and is never invoked");
+    }
+}
+
+/// Create a projection from a raw pointer.
+///
+/// The projected pointer is within the memory region marked by the input pointer. There is no
+/// requirement that the input raw pointer needs to be valid, so this macro may be used for
+/// projecting pointers outside normal address space, e.g. I/O pointers. However, if the input
+/// pointer is valid, the projected pointer is also valid.
+///
+/// Supported projections include field projections and index projections.
+/// It is not allowed to project into types that implement custom [`Deref`] or
+/// [`Index`](core::ops::Index).
+///
+/// The macro has basic syntax of `kernel::ptr::project!(ptr, projection)`, where `ptr` is an
+/// expression that evaluates to a raw pointer which serves as the base of projection. `projection`
+/// can be a projection expression of form `.field` (normally identifier, or numeral in case of
+/// tuple structs) or of form `[index]`.
+///
+/// If a mutable pointer is needed, the macro input can be prefixed with the `mut` keyword, i.e.
+/// `kernel::ptr::project!(mut ptr, projection)`. By default, a const pointer is created.
+///
+/// `ptr::project!` macro can perform both fallible indexing and build-time checked indexing.
+/// `[index]` form performs build-time bounds checking; if compiler fails to prove `[index]` is in
+/// bounds, compilation will fail. `[index]?` can be used to perform runtime bounds checking;
+/// `OutOfBound` error is raised via `?` if the index is out of bounds.
+///
+/// # Examples
+///
+/// Field projections are performed with `.field_name`:
+///
+/// ```
+/// struct MyStruct { field: u32, }
+/// let ptr: *const MyStruct = core::ptr::dangling();
+/// let field_ptr: *const u32 = kernel::ptr::project!(ptr, .field);
+///
+/// struct MyTupleStruct(u32, u32);
+///
+/// fn proj(ptr: *const MyTupleStruct) {
+///     let field_ptr: *const u32 = kernel::ptr::project!(ptr, .1);
+/// }
+/// ```
+///
+/// Index projections are performed with `[index]`:
+///
+/// ```
+/// fn proj(ptr: *const [u8; 32]) -> Result {
+///     let field_ptr: *const u8 = kernel::ptr::project!(ptr, [1]);
+///     // The following invocation, if uncommented, would fail the build.
+///     //
+///     // kernel::ptr::project!(ptr, [128]);
+///
+///     // This will raise an `OutOfBound` error (which is convertible to `ERANGE`).
+///     kernel::ptr::project!(ptr, [128]?);
+///     Ok(())
+/// }
+/// ```
+///
+/// If you need to match on the error instead of propagate, put the invocation inside a closure:
+///
+/// ```
+/// let ptr: *const [u8; 32] = core::ptr::dangling();
+/// let field_ptr: Result<*const u8> = (|| -> Result<_> {
+///     Ok(kernel::ptr::project!(ptr, [128]?))
+/// })();
+/// assert!(field_ptr.is_err());
+/// ```
+///
+/// For mutable pointers, put `mut` as the first token in macro invocation.
+///
+/// ```
+/// let ptr: *mut [(u8, u16); 32] = core::ptr::dangling_mut();
+/// let field_ptr: *mut u16 = kernel::ptr::project!(mut ptr, [1].1);
+/// ```
+#[macro_export]
+macro_rules! project_pointer {
+    (@gen $ptr:ident, ) => {};
+    // Field projection. `$field` needs to be `tt` to support tuple index like `.0`.
+    (@gen $ptr:ident, .$field:tt $($rest:tt)*) => {
+        // SAFETY: The provided closure always returns an in-bounds pointer.
+        let $ptr = unsafe {
+            $crate::ptr::projection::ProjectField::proj($ptr, #[inline(always)] |ptr| {
+                // Check unaligned field. Not all users (e.g. DMA) can handle unaligned
+                // projections.
+                if false {
+                    let _ = &(*ptr).$field;
+                }
+                // SAFETY: `$field` is in bounds, and no implicit `Deref` is possible (if the
+                // type implements `Deref`, Rust cannot infer the generic parameter `DEREF`).
+                &raw mut (*ptr).$field
+            })
+        };
+        $crate::ptr::project!(@gen $ptr, $($rest)*)
+    };
+    // Fallible index projection.
+    (@gen $ptr:ident, [$index:expr]? $($rest:tt)*) => {
+        let $ptr = $crate::ptr::projection::ProjectIndex::get($index, $ptr)
+            .ok_or($crate::ptr::projection::OutOfBound)?;
+        $crate::ptr::project!(@gen $ptr, $($rest)*)
+    };
+    // Build-time checked index projection.
+    (@gen $ptr:ident, [$index:expr] $($rest:tt)*) => {
+        let $ptr = $crate::ptr::projection::ProjectIndex::index($index, $ptr);
+        $crate::ptr::project!(@gen $ptr, $($rest)*)
+    };
+    (mut $ptr:expr, $($proj:tt)*) => {{
+        let ptr: *mut _ = $ptr;
+        $crate::ptr::project!(@gen ptr, $($proj)*);
+        ptr
+    }};
+    ($ptr:expr, $($proj:tt)*) => {{
+        let ptr = <*const _>::cast_mut($ptr);
+        // We currently always project using mutable pointer, as it is not decided whether `&raw
+        // const` allows the resulting pointer to be mutated (see documentation of `addr_of!`).
+        $crate::ptr::project!(@gen ptr, $($proj)*);
+        ptr.cast_const()
+    }};
+}
diff --git a/rust/kernel/regulator.rs b/rust/kernel/regulator.rs
index 4f7837c7e53a..41e730cedc81 100644
--- a/rust/kernel/regulator.rs
+++ b/rust/kernel/regulator.rs
@@ -23,7 +23,10 @@ use crate::{
     prelude::*,
 };
 
-use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull};
+use core::{
+    marker::PhantomData,
+    mem::ManuallyDrop, //
+};
 
 mod private {
     pub trait Sealed {}
@@ -229,15 +232,17 @@ pub fn devm_enable_optional(dev: &Device<Bound>, name: &CStr) -> Result {
 ///
 /// # Invariants
 ///
-/// - `inner` is a non-null wrapper over a pointer to a `struct
-///   regulator` obtained from [`regulator_get()`].
+/// - `inner` is a pointer obtained from a successful call to
+///   [`regulator_get()`]. It is treated as an opaque token that may only be
+///   accessed using C API methods (e.g., it may be `NULL` if the C API returns
+///   `NULL`).
 ///
 /// [`regulator_get()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_get
 pub struct Regulator<State>
 where
     State: RegulatorState,
 {
-    inner: NonNull<bindings::regulator>,
+    inner: *mut bindings::regulator,
     _phantom: PhantomData<State>,
 }
 
@@ -249,7 +254,7 @@ impl<T: RegulatorState> Regulator<T> {
         // SAFETY: Safe as per the type invariants of `Regulator`.
         to_result(unsafe {
             bindings::regulator_set_voltage(
-                self.inner.as_ptr(),
+                self.inner,
                 min_voltage.as_microvolts(),
                 max_voltage.as_microvolts(),
             )
@@ -259,7 +264,7 @@ impl<T: RegulatorState> Regulator<T> {
     /// Gets the current voltage of the regulator.
     pub fn get_voltage(&self) -> Result<Voltage> {
         // SAFETY: Safe as per the type invariants of `Regulator`.
-        let voltage = unsafe { bindings::regulator_get_voltage(self.inner.as_ptr()) };
+        let voltage = unsafe { bindings::regulator_get_voltage(self.inner) };
 
         to_result(voltage).map(|()| Voltage::from_microvolts(voltage))
     }
@@ -270,10 +275,8 @@ impl<T: RegulatorState> Regulator<T> {
             // received from the C code.
             from_err_ptr(unsafe { bindings::regulator_get(dev.as_raw(), name.as_char_ptr()) })?;
 
-        // SAFETY: We can safely trust `inner` to be a pointer to a valid
-        // regulator if `ERR_PTR` was not returned.
-        let inner = unsafe { NonNull::new_unchecked(inner) };
-
+        // INVARIANT: `inner` is a pointer obtained from `regulator_get()`, and
+        // the call was successful.
         Ok(Self {
             inner,
             _phantom: PhantomData,
@@ -282,12 +285,12 @@ impl<T: RegulatorState> Regulator<T> {
 
     fn enable_internal(&self) -> Result {
         // SAFETY: Safe as per the type invariants of `Regulator`.
-        to_result(unsafe { bindings::regulator_enable(self.inner.as_ptr()) })
+        to_result(unsafe { bindings::regulator_enable(self.inner) })
     }
 
     fn disable_internal(&self) -> Result {
         // SAFETY: Safe as per the type invariants of `Regulator`.
-        to_result(unsafe { bindings::regulator_disable(self.inner.as_ptr()) })
+        to_result(unsafe { bindings::regulator_disable(self.inner) })
     }
 }
 
@@ -349,7 +352,7 @@ impl<T: IsEnabled> Regulator<T> {
     /// Checks if the regulator is enabled.
     pub fn is_enabled(&self) -> bool {
         // SAFETY: Safe as per the type invariants of `Regulator`.
-        unsafe { bindings::regulator_is_enabled(self.inner.as_ptr()) != 0 }
+        unsafe { bindings::regulator_is_enabled(self.inner) != 0 }
     }
 }
 
@@ -359,11 +362,11 @@ impl<T: RegulatorState> Drop for Regulator<T> {
             // SAFETY: By the type invariants, we know that `self` owns a
             // reference on the enabled refcount, so it is safe to relinquish it
             // now.
-            unsafe { bindings::regulator_disable(self.inner.as_ptr()) };
+            unsafe { bindings::regulator_disable(self.inner) };
         }
         // SAFETY: By the type invariants, we know that `self` owns a reference,
         // so it is safe to relinquish it now.
-        unsafe { bindings::regulator_put(self.inner.as_ptr()) };
+        unsafe { bindings::regulator_put(self.inner) };
     }
 }
 
diff --git a/rust/kernel/sizes.rs b/rust/kernel/sizes.rs
index 661e680d9330..521b2b38bfe7 100644
--- a/rust/kernel/sizes.rs
+++ b/rust/kernel/sizes.rs
@@ -3,48 +3,132 @@
 //! Commonly used sizes.
 //!
 //! C headers: [`include/linux/sizes.h`](srctree/include/linux/sizes.h).
+//!
+//! The top-level `SZ_*` constants are [`usize`]-typed, for use in kernel page
+//! arithmetic and similar CPU-side work.
+//!
+//! The [`SizeConstants`] trait provides the same constants as associated constants
+//! on [`u32`], [`u64`], and [`usize`], for use in device address spaces where
+//! the address width depends on the hardware. Device drivers frequently need
+//! these constants as [`u64`] (or [`u32`]) rather than [`usize`], because
+//! device address spaces are sized independently of the CPU pointer width.
+//!
+//! # Examples
+//!
+//! ```
+//! use kernel::{
+//!     page::PAGE_SIZE,
+//!     sizes::{
+//!         SizeConstants,
+//!         SZ_1M, //
+//!     }, //
+//! };
+//!
+//! // Module-level constants continue to work without a type qualifier.
+//! let num_pages_in_1m = SZ_1M / PAGE_SIZE;
+//!
+//! // Trait associated constants require a type qualifier.
+//! let heap_size = 14 * u64::SZ_1M;
+//! let small = u32::SZ_4K;
+//! ```
+
+macro_rules! define_sizes {
+    ($($type:ty),* $(,)?) => {
+        define_sizes!(@internal [$($type),*]
+            /// `0x0000_0400`.
+            SZ_1K,
+            /// `0x0000_0800`.
+            SZ_2K,
+            /// `0x0000_1000`.
+            SZ_4K,
+            /// `0x0000_2000`.
+            SZ_8K,
+            /// `0x0000_4000`.
+            SZ_16K,
+            /// `0x0000_8000`.
+            SZ_32K,
+            /// `0x0001_0000`.
+            SZ_64K,
+            /// `0x0002_0000`.
+            SZ_128K,
+            /// `0x0004_0000`.
+            SZ_256K,
+            /// `0x0008_0000`.
+            SZ_512K,
+            /// `0x0010_0000`.
+            SZ_1M,
+            /// `0x0020_0000`.
+            SZ_2M,
+            /// `0x0040_0000`.
+            SZ_4M,
+            /// `0x0080_0000`.
+            SZ_8M,
+            /// `0x0100_0000`.
+            SZ_16M,
+            /// `0x0200_0000`.
+            SZ_32M,
+            /// `0x0400_0000`.
+            SZ_64M,
+            /// `0x0800_0000`.
+            SZ_128M,
+            /// `0x1000_0000`.
+            SZ_256M,
+            /// `0x2000_0000`.
+            SZ_512M,
+            /// `0x4000_0000`.
+            SZ_1G,
+            /// `0x8000_0000`.
+            SZ_2G,
+        );
+    };
+
+    (@internal [$($type:ty),*] $($names_and_metas:tt)*) => {
+        define_sizes!(@consts_and_trait $($names_and_metas)*);
+        define_sizes!(@impls [$($type),*] $($names_and_metas)*);
+    };
+
+    (@consts_and_trait $($(#[$meta:meta])* $name:ident,)*) => {
+        $(
+            $(#[$meta])*
+            pub const $name: usize = bindings::$name as usize;
+        )*
+
+        /// Size constants for device address spaces.
+        ///
+        /// Implemented for [`u32`], [`u64`], and [`usize`] so drivers can
+        /// choose the width that matches their hardware. All `SZ_*` values fit
+        /// in a [`u32`], so all implementations are lossless.
+        ///
+        /// # Examples
+        ///
+        /// ```
+        /// use kernel::sizes::SizeConstants;
+        ///
+        /// let gpu_heap = 14 * u64::SZ_1M;
+        /// let mmio_window = u32::SZ_16M;
+        /// ```
+        pub trait SizeConstants {
+            $(
+                $(#[$meta])*
+                const $name: Self;
+            )*
+        }
+    };
+
+    (@impls [] $($(#[$meta:meta])* $name:ident,)*) => {};
+
+    (@impls [$first:ty $(, $rest:ty)*] $($(#[$meta:meta])* $name:ident,)*) => {
+        impl SizeConstants for $first {
+            $(
+                const $name: Self = {
+                    assert!((self::$name as u128) <= (<$first>::MAX as u128));
+                    self::$name as $first
+                };
+            )*
+        }
+
+        define_sizes!(@impls [$($rest),*] $($(#[$meta])* $name,)*);
+    };
+}
 
-/// 0x00000400
-pub const SZ_1K: usize = bindings::SZ_1K as usize;
-/// 0x00000800
-pub const SZ_2K: usize = bindings::SZ_2K as usize;
-/// 0x00001000
-pub const SZ_4K: usize = bindings::SZ_4K as usize;
-/// 0x00002000
-pub const SZ_8K: usize = bindings::SZ_8K as usize;
-/// 0x00004000
-pub const SZ_16K: usize = bindings::SZ_16K as usize;
-/// 0x00008000
-pub const SZ_32K: usize = bindings::SZ_32K as usize;
-/// 0x00010000
-pub const SZ_64K: usize = bindings::SZ_64K as usize;
-/// 0x00020000
-pub const SZ_128K: usize = bindings::SZ_128K as usize;
-/// 0x00040000
-pub const SZ_256K: usize = bindings::SZ_256K as usize;
-/// 0x00080000
-pub const SZ_512K: usize = bindings::SZ_512K as usize;
-/// 0x00100000
-pub const SZ_1M: usize = bindings::SZ_1M as usize;
-/// 0x00200000
-pub const SZ_2M: usize = bindings::SZ_2M as usize;
-/// 0x00400000
-pub const SZ_4M: usize = bindings::SZ_4M as usize;
-/// 0x00800000
-pub const SZ_8M: usize = bindings::SZ_8M as usize;
-/// 0x01000000
-pub const SZ_16M: usize = bindings::SZ_16M as usize;
-/// 0x02000000
-pub const SZ_32M: usize = bindings::SZ_32M as usize;
-/// 0x04000000
-pub const SZ_64M: usize = bindings::SZ_64M as usize;
-/// 0x08000000
-pub const SZ_128M: usize = bindings::SZ_128M as usize;
-/// 0x10000000
-pub const SZ_256M: usize = bindings::SZ_256M as usize;
-/// 0x20000000
-pub const SZ_512M: usize = bindings::SZ_512M as usize;
-/// 0x40000000
-pub const SZ_1G: usize = bindings::SZ_1G as usize;
-/// 0x80000000
-pub const SZ_2G: usize = bindings::SZ_2G as usize;
+define_sizes!(u32, u64, usize);
diff --git a/rust/kernel/slice.rs b/rust/kernel/slice.rs
deleted file mode 100644
index ca2cde135061..000000000000
--- a/rust/kernel/slice.rs
+++ /dev/null
@@ -1,49 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-//! Additional (and temporary) slice helpers.
-
-/// Extension trait providing a portable version of [`as_flattened`] and
-/// [`as_flattened_mut`].
-///
-/// In Rust 1.80, the previously unstable `slice::flatten` family of methods
-/// have been stabilized and renamed from `flatten` to `as_flattened`.
-///
-/// This creates an issue for as long as the MSRV is < 1.80, as the same functionality is provided
-/// by different methods depending on the compiler version.
-///
-/// This extension trait solves this by abstracting `as_flatten` and calling the correct method
-/// depending on the Rust version.
-///
-/// This trait can be removed once the MSRV passes 1.80.
-///
-/// [`as_flattened`]: https://doc.rust-lang.org/std/primitive.slice.html#method.as_flattened
-/// [`as_flattened_mut`]: https://doc.rust-lang.org/std/primitive.slice.html#method.as_flattened_mut
-#[cfg(not(CONFIG_RUSTC_HAS_SLICE_AS_FLATTENED))]
-pub trait AsFlattened<T> {
-    /// Takes a `&[[T; N]]` and flattens it to a `&[T]`.
-    ///
-    /// This is an portable layer on top of [`as_flattened`]; see its documentation for details.
-    ///
-    /// [`as_flattened`]: https://doc.rust-lang.org/std/primitive.slice.html#method.as_flattened
-    fn as_flattened(&self) -> &[T];
-
-    /// Takes a `&mut [[T; N]]` and flattens it to a `&mut [T]`.
-    ///
-    /// This is an portable layer on top of [`as_flattened_mut`]; see its documentation for details.
-    ///
-    /// [`as_flattened_mut`]: https://doc.rust-lang.org/std/primitive.slice.html#method.as_flattened_mut
-    fn as_flattened_mut(&mut self) -> &mut [T];
-}
-
-#[cfg(not(CONFIG_RUSTC_HAS_SLICE_AS_FLATTENED))]
-impl<T, const N: usize> AsFlattened<T> for [[T; N]] {
-    #[allow(clippy::incompatible_msrv)]
-    fn as_flattened(&self) -> &[T] {
-        self.flatten()
-    }
-
-    #[allow(clippy::incompatible_msrv)]
-    fn as_flattened_mut(&mut self) -> &mut [T] {
-        self.flatten_mut()
-    }
-}
diff --git a/rust/kernel/static_assert.rs b/rust/kernel/static_assert.rs
deleted file mode 100644
index a57ba14315a0..000000000000
--- a/rust/kernel/static_assert.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-//! Static assert.
-
-/// Static assert (i.e. compile-time assert).
-///
-/// Similar to C11 [`_Static_assert`] and C++11 [`static_assert`].
-///
-/// An optional panic message can be supplied after the expression.
-/// Currently only a string literal without formatting is supported
-/// due to constness limitations of the [`assert!`] macro.
-///
-/// The feature may be added to Rust in the future: see [RFC 2790].
-///
-/// [`_Static_assert`]: https://en.cppreference.com/w/c/language/_Static_assert
-/// [`static_assert`]: https://en.cppreference.com/w/cpp/language/static_assert
-/// [RFC 2790]: https://github.com/rust-lang/rfcs/issues/2790
-///
-/// # Examples
-///
-/// ```
-/// static_assert!(42 > 24);
-/// static_assert!(core::mem::size_of::<u8>() == 1);
-///
-/// const X: &[u8] = b"bar";
-/// static_assert!(X[1] == b'a');
-///
-/// const fn f(x: i32) -> i32 {
-///     x + 2
-/// }
-/// static_assert!(f(40) == 42);
-/// static_assert!(f(40) == 42, "f(x) must add 2 to the given input.");
-/// ```
-#[macro_export]
-macro_rules! static_assert {
-    ($condition:expr $(,$arg:literal)?) => {
-        const _: () = ::core::assert!($condition $(,$arg)?);
-    };
-}
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index fa87779d2253..8311d91549e1 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -189,6 +189,7 @@ macro_rules! b_str {
 //
 // - error[E0379]: functions in trait impls cannot be declared const
 #[inline]
+#[expect(clippy::disallowed_methods, reason = "internal implementation")]
 pub const fn as_char_ptr_in_const_context(c_str: &CStr) -> *const c_char {
     c_str.as_ptr().cast()
 }
@@ -319,6 +320,7 @@ unsafe fn to_bytes_mut(s: &mut CStr) -> &mut [u8] {
 
 impl CStrExt for CStr {
     #[inline]
+    #[expect(clippy::disallowed_methods, reason = "internal implementation")]
     unsafe fn from_char_ptr<'a>(ptr: *const c_char) -> &'a Self {
         // SAFETY: The safety preconditions are the same as for `CStr::from_ptr`.
         unsafe { CStr::from_ptr(ptr.cast()) }
@@ -334,6 +336,7 @@ impl CStrExt for CStr {
     }
 
     #[inline]
+    #[expect(clippy::disallowed_methods, reason = "internal implementation")]
     fn as_char_ptr(&self) -> *const c_char {
         self.as_ptr().cast()
     }
@@ -376,19 +379,32 @@ impl AsRef<BStr> for CStr {
     }
 }
 
-/// Creates a new [`CStr`] from a string literal.
+/// Creates a new [`CStr`] at compile time.
 ///
-/// The string literal should not contain any `NUL` bytes.
+/// Rust supports C string literals since Rust 1.77, and they should be used instead of this macro
+/// where possible. This macro exists to allow static *non-literal* C strings to be created at
+/// compile time. This is most often used in other macros.
+///
+/// # Panics
+///
+/// This macro panics if the operand contains an interior `NUL` byte.
 ///
 /// # Examples
 ///
 /// ```
 /// # use kernel::c_str;
 /// # use kernel::str::CStr;
-/// const MY_CSTR: &CStr = c_str!("My awesome CStr!");
+/// // This is allowed, but `c"literal"` should be preferred for literals.
+/// const BAD: &CStr = c_str!("literal");
+///
+/// // `c_str!` is still needed for static non-literal C strings.
+/// const GOOD: &CStr = c_str!(concat!(file!(), ":", line!(), ": My CStr!"));
 /// ```
 #[macro_export]
 macro_rules! c_str {
+    // NB: We could write `($str:lit) => compile_error!("use a C string literal instead");` here but
+    // that would trigger when the literal is at the top of several macro expansions. That would be
+    // too limiting to macro authors.
     ($str:expr) => {{
         const S: &str = concat!($str, "\0");
         const C: &$crate::str::CStr = match $crate::str::CStr::from_bytes_with_nul(S.as_bytes()) {
@@ -664,13 +680,13 @@ impl fmt::Write for Formatter<'_> {
 ///
 /// * The first byte of `buffer` is always zero.
 /// * The length of `buffer` is at least 1.
-pub(crate) struct NullTerminatedFormatter<'a> {
+pub struct NullTerminatedFormatter<'a> {
     buffer: &'a mut [u8],
 }
 
 impl<'a> NullTerminatedFormatter<'a> {
     /// Create a new [`Self`] instance.
-    pub(crate) fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> {
+    pub fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> {
         *(buffer.first_mut()?) = 0;
 
         // INVARIANT:
@@ -828,7 +844,10 @@ impl CString {
         f.write_str("\0")?;
 
         // SAFETY: The number of bytes that can be written to `f` is bounded by `size`, which is
-        // `buf`'s capacity. The contents of the buffer have been initialised by writes to `f`.
+        // `buf`'s capacity. The `Formatter` is created with `size` as its limit, and the `?`
+        // operators on `write_fmt` and `write_str` above ensure that if writing exceeds this
+        // limit, an error is returned early. The contents of the buffer have been initialised
+        // by writes to `f`.
         unsafe { buf.inc_len(f.bytes_written()) };
 
         // Check that there are no `NUL` bytes before the end.
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
index 921e19333b89..18d6c0d62ce0 100644
--- a/rust/kernel/sync/arc.rs
+++ b/rust/kernel/sync/arc.rs
@@ -128,7 +128,7 @@ mod std_vendor;
 /// # Ok::<(), Error>(())
 /// ```
 #[repr(transparent)]
-#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, derive(core::marker::CoercePointee))]
+#[derive(core::marker::CoercePointee)]
 pub struct Arc<T: ?Sized> {
     ptr: NonNull<ArcInner<T>>,
     // NB: this informs dropck that objects of type `ArcInner<T>` may be used in `<Arc<T> as
@@ -182,15 +182,6 @@ impl<T: ?Sized> ArcInner<T> {
     }
 }
 
-// This is to allow coercion from `Arc<T>` to `Arc<U>` if `T` can be converted to the
-// dynamically-sized type (DST) `U`.
-#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
-impl<T: ?Sized + core::marker::Unsize<U>, U: ?Sized> core::ops::CoerceUnsized<Arc<U>> for Arc<T> {}
-
-// This is to allow `Arc<U>` to be dispatched on when `Arc<T>` can be coerced into `Arc<U>`.
-#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
-impl<T: ?Sized + core::marker::Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<Arc<U>> for Arc<T> {}
-
 // SAFETY: It is safe to send `Arc<T>` to another thread when the underlying `T` is `Sync` because
 // it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs
 // `T` to be `Send` because any thread that has an `Arc<T>` may ultimately access `T` using a
@@ -547,20 +538,12 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
 /// # Ok::<(), Error>(())
 /// ```
 #[repr(transparent)]
-#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, derive(core::marker::CoercePointee))]
+#[derive(core::marker::CoercePointee)]
 pub struct ArcBorrow<'a, T: ?Sized + 'a> {
     inner: NonNull<ArcInner<T>>,
     _p: PhantomData<&'a ()>,
 }
 
-// This is to allow `ArcBorrow<U>` to be dispatched on when `ArcBorrow<T>` can be coerced into
-// `ArcBorrow<U>`.
-#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
-impl<T: ?Sized + core::marker::Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<ArcBorrow<'_, U>>
-    for ArcBorrow<'_, T>
-{
-}
-
 impl<T: ?Sized> Clone for ArcBorrow<'_, T> {
     fn clone(&self) -> Self {
         *self
diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs
index 4aebeacb961a..9cd009d57e35 100644
--- a/rust/kernel/sync/atomic.rs
+++ b/rust/kernel/sync/atomic.rs
@@ -51,6 +51,10 @@ use ordering::OrderingType;
 #[repr(transparent)]
 pub struct Atomic<T: AtomicType>(AtomicRepr<T::Repr>);
 
+// SAFETY: `Atomic<T>` is safe to transfer between execution contexts because of the safety
+// requirement of `AtomicType`.
+unsafe impl<T: AtomicType> Send for Atomic<T> {}
+
 // SAFETY: `Atomic<T>` is safe to share among execution contexts because all accesses are atomic.
 unsafe impl<T: AtomicType> Sync for Atomic<T> {}
 
@@ -68,6 +72,11 @@ unsafe impl<T: AtomicType> Sync for Atomic<T> {}
 ///
 /// - [`Self`] must have the same size and alignment as [`Self::Repr`].
 /// - [`Self`] must be [round-trip transmutable] to  [`Self::Repr`].
+/// - [`Self`] must be safe to transfer between execution contexts, if it's [`Send`], this is
+///   automatically satisfied. The exception is pointer types that are even though marked as
+///   `!Send` (e.g. raw pointers and [`NonNull<T>`]) but requiring `unsafe` to do anything
+///   meaningful on them. This is because transferring pointer values between execution contexts is
+///   safe as long as the actual `unsafe` dereferencing is justified.
 ///
 /// Note that this is more relaxed than requiring the bi-directional transmutability (i.e.
 /// [`transmute()`] is always sound between `U` and `T`) because of the support for atomic
@@ -108,7 +117,8 @@ unsafe impl<T: AtomicType> Sync for Atomic<T> {}
 /// [`transmute()`]: core::mem::transmute
 /// [round-trip transmutable]: AtomicType#round-trip-transmutability
 /// [Examples]: AtomicType#examples
-pub unsafe trait AtomicType: Sized + Send + Copy {
+/// [`NonNull<T>`]: core::ptr::NonNull
+pub unsafe trait AtomicType: Sized + Copy {
     /// The backing atomic implementation type.
     type Repr: AtomicImpl;
 }
@@ -204,10 +214,7 @@ impl<T: AtomicType> Atomic<T> {
     /// // no data race.
     /// unsafe { Atomic::from_ptr(foo_a_ptr) }.store(2, Release);
     /// ```
-    pub unsafe fn from_ptr<'a>(ptr: *mut T) -> &'a Self
-    where
-        T: Sync,
-    {
+    pub unsafe fn from_ptr<'a>(ptr: *mut T) -> &'a Self {
         // CAST: `T` and `Atomic<T>` have the same size, alignment and bit validity.
         // SAFETY: Per function safety requirement, `ptr` is a valid pointer and the object will
         // live long enough. It's safe to return a `&Atomic<T>` because function safety requirement
@@ -235,6 +242,17 @@ impl<T: AtomicType> Atomic<T> {
     /// Returns a mutable reference to the underlying atomic `T`.
     ///
     /// This is safe because the mutable reference of the atomic `T` guarantees exclusive access.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use kernel::sync::atomic::{Atomic, Relaxed};
+    ///
+    /// let mut atomic_val = Atomic::new(0u32);
+    /// let val_mut = atomic_val.get_mut();
+    /// *val_mut = 101;
+    /// assert_eq!(101, atomic_val.load(Relaxed));
+    /// ```
     pub fn get_mut(&mut self) -> &mut T {
         // CAST: `T` and `T::Repr` has the same size and alignment per the safety requirement of
         // `AtomicType`, and per the type invariants `self.0` is a valid `T`, therefore the casting
@@ -527,16 +545,14 @@ where
     /// use kernel::sync::atomic::{Atomic, Acquire, Full, Relaxed};
     ///
     /// let x = Atomic::new(42);
-    ///
     /// assert_eq!(42, x.load(Relaxed));
-    ///
-    /// assert_eq!(54, { x.fetch_add(12, Acquire); x.load(Relaxed) });
+    /// assert_eq!(42, x.fetch_add(12, Acquire));
+    /// assert_eq!(54, x.load(Relaxed));
     ///
     /// let x = Atomic::new(42);
-    ///
     /// assert_eq!(42, x.load(Relaxed));
-    ///
-    /// assert_eq!(54, { x.fetch_add(12, Full); x.load(Relaxed) } );
+    /// assert_eq!(42, x.fetch_add(12, Full));
+    /// assert_eq!(54, x.load(Relaxed));
     /// ```
     #[inline(always)]
     pub fn fetch_add<Rhs, Ordering: ordering::Ordering>(&self, v: Rhs, _: Ordering) -> T
@@ -559,4 +575,276 @@ where
         // SAFETY: `ret` comes from reading `self.0`, which is a valid `T` per type invariants.
         unsafe { from_repr(ret) }
     }
+
+    /// Atomic fetch and subtract.
+    ///
+    /// Atomically updates `*self` to `(*self).wrapping_sub(v)`, and returns the value of `*self`
+    /// before the update.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use kernel::sync::atomic::{Atomic, Acquire, Full, Relaxed};
+    ///
+    /// let x = Atomic::new(42);
+    /// assert_eq!(42, x.load(Relaxed));
+    /// assert_eq!(42, x.fetch_sub(12, Acquire));
+    /// assert_eq!(30, x.load(Relaxed));
+    ///
+    /// let x = Atomic::new(42);
+    /// assert_eq!(42, x.load(Relaxed));
+    /// assert_eq!(42, x.fetch_sub(12, Full));
+    /// assert_eq!(30, x.load(Relaxed));
+    /// ```
+    #[inline(always)]
+    pub fn fetch_sub<Rhs, Ordering: ordering::Ordering>(&self, v: Rhs, _: Ordering) -> T
+    where
+        // Types that support addition also support subtraction.
+        T: AtomicAdd<Rhs>,
+    {
+        let v = T::rhs_into_delta(v);
+
+        // INVARIANT: `self.0` is a valid `T` after `atomic_fetch_sub*()` due to safety requirement
+        // of `AtomicAdd`.
+        let ret = {
+            match Ordering::TYPE {
+                OrderingType::Full => T::Repr::atomic_fetch_sub(&self.0, v),
+                OrderingType::Acquire => T::Repr::atomic_fetch_sub_acquire(&self.0, v),
+                OrderingType::Release => T::Repr::atomic_fetch_sub_release(&self.0, v),
+                OrderingType::Relaxed => T::Repr::atomic_fetch_sub_relaxed(&self.0, v),
+            }
+        };
+
+        // SAFETY: `ret` comes from reading `self.0`, which is a valid `T` per type invariants.
+        unsafe { from_repr(ret) }
+    }
+}
+
+#[cfg(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64))]
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct Flag {
+    bool_field: bool,
+}
+
+/// # Invariants
+///
+/// `padding` must be all zeroes.
+#[cfg(not(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64)))]
+#[repr(C, align(4))]
+#[derive(Clone, Copy)]
+struct Flag {
+    #[cfg(target_endian = "big")]
+    padding: [u8; 3],
+    bool_field: bool,
+    #[cfg(target_endian = "little")]
+    padding: [u8; 3],
+}
+
+impl Flag {
+    #[inline(always)]
+    const fn new(b: bool) -> Self {
+        // INVARIANT: `padding` is all zeroes.
+        Self {
+            bool_field: b,
+            #[cfg(not(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64)))]
+            padding: [0; 3],
+        }
+    }
+}
+
+// SAFETY: `Flag` and `Repr` have the same size and alignment, and `Flag` is round-trip
+// transmutable to the selected representation (`i8` or `i32`).
+unsafe impl AtomicType for Flag {
+    #[cfg(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64))]
+    type Repr = i8;
+    #[cfg(not(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64)))]
+    type Repr = i32;
+}
+
+/// An atomic flag type intended to be backed by performance-optimal integer type.
+///
+/// The backing integer type is an implementation detail; it may vary by architecture and change
+/// in the future.
+///
+/// [`AtomicFlag`] is generally preferable to [`Atomic<bool>`] when you need read-modify-write
+/// (RMW) operations (e.g. [`Atomic::xchg()`]/[`Atomic::cmpxchg()`]) or when [`Atomic<bool>`] does
+/// not save memory due to padding. On some architectures that do not support byte-sized atomic
+/// RMW operations, RMW operations on [`Atomic<bool>`] are slower.
+///
+/// If you only use [`Atomic::load()`]/[`Atomic::store()`], [`Atomic<bool>`] is fine.
+///
+/// # Examples
+///
+/// ```
+/// use kernel::sync::atomic::{AtomicFlag, Relaxed};
+///
+/// let flag = AtomicFlag::new(false);
+/// assert_eq!(false, flag.load(Relaxed));
+/// flag.store(true, Relaxed);
+/// assert_eq!(true, flag.load(Relaxed));
+/// ```
+pub struct AtomicFlag(Atomic<Flag>);
+
+impl AtomicFlag {
+    /// Creates a new atomic flag.
+    #[inline(always)]
+    pub const fn new(b: bool) -> Self {
+        Self(Atomic::new(Flag::new(b)))
+    }
+
+    /// Returns a mutable reference to the underlying flag as a [`bool`].
+    ///
+    /// This is safe because the mutable reference of the atomic flag guarantees exclusive access.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use kernel::sync::atomic::{AtomicFlag, Relaxed};
+    ///
+    /// let mut atomic_flag = AtomicFlag::new(false);
+    /// assert_eq!(false, atomic_flag.load(Relaxed));
+    /// *atomic_flag.get_mut() = true;
+    /// assert_eq!(true, atomic_flag.load(Relaxed));
+    /// ```
+    #[inline(always)]
+    pub fn get_mut(&mut self) -> &mut bool {
+        &mut self.0.get_mut().bool_field
+    }
+
+    /// Loads the value from the atomic flag.
+    #[inline(always)]
+    pub fn load<Ordering: ordering::AcquireOrRelaxed>(&self, o: Ordering) -> bool {
+        self.0.load(o).bool_field
+    }
+
+    /// Stores a value to the atomic flag.
+    #[inline(always)]
+    pub fn store<Ordering: ordering::ReleaseOrRelaxed>(&self, v: bool, o: Ordering) {
+        self.0.store(Flag::new(v), o);
+    }
+
+    /// Stores a value to the atomic flag and returns the previous value.
+    #[inline(always)]
+    pub fn xchg<Ordering: ordering::Ordering>(&self, new: bool, o: Ordering) -> bool {
+        self.0.xchg(Flag::new(new), o).bool_field
+    }
+
+    /// Store a value to the atomic flag if the current value is equal to `old`.
+    #[inline(always)]
+    pub fn cmpxchg<Ordering: ordering::Ordering>(
+        &self,
+        old: bool,
+        new: bool,
+        o: Ordering,
+    ) -> Result<bool, bool> {
+        match self.0.cmpxchg(Flag::new(old), Flag::new(new), o) {
+            Ok(_) => Ok(old),
+            Err(f) => Err(f.bool_field),
+        }
+    }
+}
+
+/// Atomic load over raw pointers.
+///
+/// This function provides a short-cut of `Atomic::from_ptr().load(..)`, and can be used to work
+/// with C side on synchronizations:
+///
+/// - `atomic_load(.., Relaxed)` maps to `READ_ONCE()` when used for inter-thread communication.
+/// - `atomic_load(.., Acquire)` maps to `smp_load_acquire()`.
+///
+/// # Safety
+///
+/// - `ptr` is a valid pointer to `T` and aligned to `align_of::<T>()`.
+/// - If there is a concurrent store from kernel (C or Rust), it has to be atomic.
+#[doc(alias("READ_ONCE", "smp_load_acquire"))]
+#[inline(always)]
+pub unsafe fn atomic_load<T: AtomicType, Ordering: ordering::AcquireOrRelaxed>(
+    ptr: *mut T,
+    o: Ordering,
+) -> T
+where
+    T::Repr: AtomicBasicOps,
+{
+    // SAFETY: Per the function safety requirement, `ptr` is valid and aligned to
+    // `align_of::<T>()`, and all concurrent stores from kernel are atomic, hence no data race per
+    // LKMM.
+    unsafe { Atomic::from_ptr(ptr) }.load(o)
+}
+
+/// Atomic store over raw pointers.
+///
+/// This function provides a short-cut of `Atomic::from_ptr().load(..)`, and can be used to work
+/// with C side on synchronizations:
+///
+/// - `atomic_store(.., Relaxed)` maps to `WRITE_ONCE()` when used for inter-thread communication.
+/// - `atomic_load(.., Release)` maps to `smp_store_release()`.
+///
+/// # Safety
+///
+/// - `ptr` is a valid pointer to `T` and aligned to `align_of::<T>()`.
+/// - If there is a concurrent access from kernel (C or Rust), it has to be atomic.
+#[doc(alias("WRITE_ONCE", "smp_store_release"))]
+#[inline(always)]
+pub unsafe fn atomic_store<T: AtomicType, Ordering: ordering::ReleaseOrRelaxed>(
+    ptr: *mut T,
+    v: T,
+    o: Ordering,
+) where
+    T::Repr: AtomicBasicOps,
+{
+    // SAFETY: Per the function safety requirement, `ptr` is valid and aligned to
+    // `align_of::<T>()`, and all concurrent accesses from kernel are atomic, hence no data race
+    // per LKMM.
+    unsafe { Atomic::from_ptr(ptr) }.store(v, o);
+}
+
+/// Atomic exchange over raw pointers.
+///
+/// This function provides a short-cut of `Atomic::from_ptr().xchg(..)`, and can be used to work
+/// with C side on synchronizations.
+///
+/// # Safety
+///
+/// - `ptr` is a valid pointer to `T` and aligned to `align_of::<T>()`.
+/// - If there is a concurrent access from kernel (C or Rust), it has to be atomic.
+#[inline(always)]
+pub unsafe fn xchg<T: AtomicType, Ordering: ordering::Ordering>(
+    ptr: *mut T,
+    new: T,
+    o: Ordering,
+) -> T
+where
+    T::Repr: AtomicExchangeOps,
+{
+    // SAFETY: Per the function safety requirement, `ptr` is valid and aligned to
+    // `align_of::<T>()`, and all concurrent accesses from kernel are atomic, hence no data race
+    // per LKMM.
+    unsafe { Atomic::from_ptr(ptr) }.xchg(new, o)
+}
+
+/// Atomic compare and exchange over raw pointers.
+///
+/// This function provides a short-cut of `Atomic::from_ptr().cmpxchg(..)`, and can be used to work
+/// with C side on synchronizations.
+///
+/// # Safety
+///
+/// - `ptr` is a valid pointer to `T` and aligned to `align_of::<T>()`.
+/// - If there is a concurrent access from kernel (C or Rust), it has to be atomic.
+#[doc(alias("try_cmpxchg"))]
+#[inline(always)]
+pub unsafe fn cmpxchg<T: AtomicType, Ordering: ordering::Ordering>(
+    ptr: *mut T,
+    old: T,
+    new: T,
+    o: Ordering,
+) -> Result<T, T>
+where
+    T::Repr: AtomicExchangeOps,
+{
+    // SAFETY: Per the function safety requirement, `ptr` is valid and aligned to
+    // `align_of::<T>()`, and all concurrent accesses from kernel are atomic, hence no data race
+    // per LKMM.
+    unsafe { Atomic::from_ptr(ptr) }.cmpxchg(old, new, o)
 }
diff --git a/rust/kernel/sync/atomic/internal.rs b/rust/kernel/sync/atomic/internal.rs
index 0dac58bca2b3..ad810c2172ec 100644
--- a/rust/kernel/sync/atomic/internal.rs
+++ b/rust/kernel/sync/atomic/internal.rs
@@ -7,6 +7,7 @@
 use crate::bindings;
 use crate::macros::paste;
 use core::cell::UnsafeCell;
+use ffi::c_void;
 
 mod private {
     /// Sealed trait marker to disable customized impls on atomic implementation traits.
@@ -14,10 +15,11 @@ mod private {
 }
 
 // The C side supports atomic primitives only for `i32` and `i64` (`atomic_t` and `atomic64_t`),
-// while the Rust side also layers provides atomic support for `i8` and `i16`
-// on top of lower-level C primitives.
+// while the Rust side also provides atomic support for `i8`, `i16` and `*const c_void` on top of
+// lower-level C primitives.
 impl private::Sealed for i8 {}
 impl private::Sealed for i16 {}
+impl private::Sealed for *const c_void {}
 impl private::Sealed for i32 {}
 impl private::Sealed for i64 {}
 
@@ -26,10 +28,10 @@ impl private::Sealed for i64 {}
 /// This trait is sealed, and only types that map directly to the C side atomics
 /// or can be implemented with lower-level C primitives are allowed to implement this:
 ///
-/// - `i8` and `i16` are implemented with lower-level C primitives.
+/// - `i8`, `i16` and `*const c_void` are implemented with lower-level C primitives.
 /// - `i32` map to `atomic_t`
 /// - `i64` map to `atomic64_t`
-pub trait AtomicImpl: Sized + Send + Copy + private::Sealed {
+pub trait AtomicImpl: Sized + Copy + private::Sealed {
     /// The type of the delta in arithmetic or logical operations.
     ///
     /// For example, in `atomic_add(ptr, v)`, it's the type of `v`. Usually it's the same type of
@@ -37,20 +39,31 @@ pub trait AtomicImpl: Sized + Send + Copy + private::Sealed {
     type Delta;
 }
 
-// The current helpers of load/store uses `{WRITE,READ}_ONCE()` hence the atomicity is only
-// guaranteed against read-modify-write operations if the architecture supports native atomic RmW.
-#[cfg(CONFIG_ARCH_SUPPORTS_ATOMIC_RMW)]
+// The current helpers of load/store of atomic `i8`, `i16` and pointers use `{WRITE,READ}_ONCE()`
+// hence the atomicity is only guaranteed against read-modify-write operations if the architecture
+// supports native atomic RmW.
+//
+// In the future when a CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=n architecture plans to support Rust, the
+// load/store helpers that guarantee atomicity against RmW operations (usually via a lock) need to
+// be added.
+crate::static_assert!(
+    cfg!(CONFIG_ARCH_SUPPORTS_ATOMIC_RMW),
+    "The current implementation of atomic i8/i16/ptr relies on the architecure being \
+    ARCH_SUPPORTS_ATOMIC_RMW"
+);
+
 impl AtomicImpl for i8 {
     type Delta = Self;
 }
 
-// The current helpers of load/store uses `{WRITE,READ}_ONCE()` hence the atomicity is only
-// guaranteed against read-modify-write operations if the architecture supports native atomic RmW.
-#[cfg(CONFIG_ARCH_SUPPORTS_ATOMIC_RMW)]
 impl AtomicImpl for i16 {
     type Delta = Self;
 }
 
+impl AtomicImpl for *const c_void {
+    type Delta = isize;
+}
+
 // `atomic_t` implements atomic operations on `i32`.
 impl AtomicImpl for i32 {
     type Delta = Self;
@@ -262,7 +275,7 @@ macro_rules! declare_and_impl_atomic_methods {
 }
 
 declare_and_impl_atomic_methods!(
-    [ i8 => atomic_i8, i16 => atomic_i16, i32 => atomic, i64 => atomic64 ]
+    [ i8 => atomic_i8, i16 => atomic_i16, *const c_void => atomic_ptr, i32 => atomic, i64 => atomic64 ]
     /// Basic atomic operations
     pub trait AtomicBasicOps {
         /// Atomic read (load).
@@ -280,7 +293,7 @@ declare_and_impl_atomic_methods!(
 );
 
 declare_and_impl_atomic_methods!(
-    [ i8 => atomic_i8, i16 => atomic_i16, i32 => atomic, i64 => atomic64 ]
+    [ i8 => atomic_i8, i16 => atomic_i16, *const c_void => atomic_ptr, i32 => atomic, i64 => atomic64 ]
     /// Exchange and compare-and-exchange atomic operations
     pub trait AtomicExchangeOps {
         /// Atomic exchange.
@@ -324,7 +337,12 @@ declare_and_impl_atomic_methods!(
         /// Atomically updates `*a` to `(*a).wrapping_add(v)`, and returns the value of `*a`
         /// before the update.
         fn fetch_add[acquire, release, relaxed](a: &AtomicRepr<Self>, v: Self::Delta) -> Self {
-            // SAFETY: `a.as_ptr()` is valid and properly aligned.
+            // SAFETY: `a.as_ptr()` guarantees the returned pointer is valid and properly aligned.
+            unsafe { bindings::#call(v, a.as_ptr().cast()) }
+        }
+
+        fn fetch_sub[acquire, release, relaxed](a: &AtomicRepr<Self>, v: Self::Delta) -> Self {
+            // SAFETY: `a.as_ptr()` guarantees the returned pointer is valid and properly aligned.
             unsafe { bindings::#call(v, a.as_ptr().cast()) }
         }
     }
diff --git a/rust/kernel/sync/atomic/predefine.rs b/rust/kernel/sync/atomic/predefine.rs
index 67a0406d3ea4..1d53834fcb12 100644
--- a/rust/kernel/sync/atomic/predefine.rs
+++ b/rust/kernel/sync/atomic/predefine.rs
@@ -4,6 +4,7 @@
 
 use crate::static_assert;
 use core::mem::{align_of, size_of};
+use ffi::c_void;
 
 // Ensure size and alignment requirements are checked.
 static_assert!(size_of::<bool>() == size_of::<i8>());
@@ -28,6 +29,26 @@ unsafe impl super::AtomicType for i16 {
     type Repr = i16;
 }
 
+// SAFETY:
+//
+// - `*mut T` has the same size and alignment with `*const c_void`, and is round-trip
+//   transmutable to `*const c_void`.
+// - `*mut T` is safe to transfer between execution contexts. See the safety requirement of
+//   [`AtomicType`].
+unsafe impl<T: Sized> super::AtomicType for *mut T {
+    type Repr = *const c_void;
+}
+
+// SAFETY:
+//
+// - `*const T` has the same size and alignment with `*const c_void`, and is round-trip
+//   transmutable to `*const c_void`.
+// - `*const T` is safe to transfer between execution contexts. See the safety requirement of
+//   [`AtomicType`].
+unsafe impl<T: Sized> super::AtomicType for *const T {
+    type Repr = *const c_void;
+}
+
 // SAFETY: `i32` has the same size and alignment with itself, and is round-trip transmutable to
 // itself.
 unsafe impl super::AtomicType for i32 {
@@ -157,6 +178,14 @@ mod tests {
 
             assert_eq!(v, x.load(Relaxed));
         });
+
+        for_each_type!(42 in [i8, i16, i32, i64, u32, u64, isize, usize] |v| {
+            let x = Atomic::new(v);
+            let ptr = x.as_ptr();
+
+            // SAFETY: `ptr` is a valid pointer and no concurrent access.
+            assert_eq!(v, unsafe { atomic_load(ptr, Relaxed) });
+        });
     }
 
     #[test]
@@ -167,6 +196,17 @@ mod tests {
             x.store(v, Release);
             assert_eq!(v, x.load(Acquire));
         });
+
+        for_each_type!(42 in [i8, i16, i32, i64, u32, u64, isize, usize] |v| {
+            let x = Atomic::new(0);
+            let ptr = x.as_ptr();
+
+            // SAFETY: `ptr` is a valid pointer and no concurrent access.
+            unsafe { atomic_store(ptr, v, Release) };
+
+            // SAFETY: `ptr` is a valid pointer and no concurrent access.
+            assert_eq!(v, unsafe { atomic_load(ptr, Acquire) });
+        });
     }
 
     #[test]
@@ -180,6 +220,18 @@ mod tests {
             assert_eq!(old, x.xchg(new, Full));
             assert_eq!(new, x.load(Relaxed));
         });
+
+        for_each_type!(42 in [i8, i16, i32, i64, u32, u64, isize, usize] |v| {
+            let x = Atomic::new(v);
+            let ptr = x.as_ptr();
+
+            let old = v;
+            let new = v + 1;
+
+            // SAFETY: `ptr` is a valid pointer and no concurrent access.
+            assert_eq!(old, unsafe { xchg(ptr, new, Full) });
+            assert_eq!(new, x.load(Relaxed));
+        });
     }
 
     #[test]
@@ -195,6 +247,21 @@ mod tests {
             assert_eq!(Ok(old), x.cmpxchg(old, new, Relaxed));
             assert_eq!(new, x.load(Relaxed));
         });
+
+        for_each_type!(42 in [i8, i16, i32, i64, u32, u64, isize, usize] |v| {
+            let x = Atomic::new(v);
+            let ptr = x.as_ptr();
+
+            let old = v;
+            let new = v + 1;
+
+            // SAFETY: `ptr` is a valid pointer and no concurrent access.
+            assert_eq!(Err(old), unsafe { cmpxchg(ptr, new, new, Full) });
+            assert_eq!(old, x.load(Relaxed));
+            // SAFETY: `ptr` is a valid pointer and no concurrent access.
+            assert_eq!(Ok(old), unsafe { cmpxchg(ptr, old, new, Relaxed) });
+            assert_eq!(new, x.load(Relaxed));
+        });
     }
 
     #[test]
@@ -226,4 +293,46 @@ mod tests {
         assert_eq!(false, x.load(Relaxed));
         assert_eq!(Ok(false), x.cmpxchg(false, true, Full));
     }
+
+    #[test]
+    fn atomic_ptr_tests() {
+        let mut v = 42;
+        let mut u = 43;
+        let x = Atomic::new(&raw mut v);
+
+        assert_eq!(x.load(Acquire), &raw mut v);
+        assert_eq!(x.cmpxchg(&raw mut u, &raw mut u, Relaxed), Err(&raw mut v));
+        assert_eq!(x.cmpxchg(&raw mut v, &raw mut u, Relaxed), Ok(&raw mut v));
+        assert_eq!(x.load(Relaxed), &raw mut u);
+
+        let x = Atomic::new(&raw const v);
+
+        assert_eq!(x.load(Acquire), &raw const v);
+        assert_eq!(
+            x.cmpxchg(&raw const u, &raw const u, Relaxed),
+            Err(&raw const v)
+        );
+        assert_eq!(
+            x.cmpxchg(&raw const v, &raw const u, Relaxed),
+            Ok(&raw const v)
+        );
+        assert_eq!(x.load(Relaxed), &raw const u);
+    }
+
+    #[test]
+    fn atomic_flag_tests() {
+        let mut flag = AtomicFlag::new(false);
+
+        assert_eq!(false, flag.load(Relaxed));
+
+        *flag.get_mut() = true;
+        assert_eq!(true, flag.load(Relaxed));
+
+        assert_eq!(true, flag.xchg(false, Relaxed));
+        assert_eq!(false, flag.load(Relaxed));
+
+        *flag.get_mut() = true;
+        assert_eq!(Ok(true), flag.cmpxchg(true, false, Full));
+        assert_eq!(false, flag.load(Relaxed));
+    }
 }
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index cc907fb531bc..049c8a4d45d8 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -6,16 +6,15 @@
 
 use crate::{
     bindings,
-    ffi::{c_int, c_long, c_uint},
     mm::MmWithUser,
     pid_namespace::PidNamespace,
+    prelude::*,
     sync::aref::ARef,
     types::{NotThreadSafe, Opaque},
 };
 use core::{
-    cmp::{Eq, PartialEq},
     ops::Deref,
-    ptr,
+    ptr, //
 };
 
 /// A sentinel value used for infinite timeouts.
@@ -419,7 +418,7 @@ pub fn might_sleep() {
         let file = kernel::file_from_location(loc);
 
         // SAFETY: `file.as_ptr()` is valid for reading and guaranteed to be nul-terminated.
-        unsafe { crate::bindings::__might_sleep(file.as_ptr().cast(), loc.line() as i32) }
+        unsafe { crate::bindings::__might_sleep(file.as_char_ptr(), loc.line() as i32) }
     }
 
     // SAFETY: Always safe to call.
diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs
index 6ea98dfcd027..363e93cbb139 100644
--- a/rust/kernel/time.rs
+++ b/rust/kernel/time.rs
@@ -60,7 +60,13 @@ pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies {
 /// cases the user of the clock has to decide which clock is best suited for the
 /// purpose. In most scenarios clock [`Monotonic`] is the best choice as it
 /// provides a accurate monotonic notion of time (leap second smearing ignored).
-pub trait ClockSource {
+///
+/// # Safety
+///
+/// Implementers must ensure that `ktime_get()` returns a value in the inclusive range
+/// `0..=KTIME_MAX` (i.e., greater than or equal to 0 and less than or equal to
+/// `KTIME_MAX`, where `KTIME_MAX` equals `i64::MAX`).
+pub unsafe trait ClockSource {
     /// The kernel clock ID associated with this clock source.
     ///
     /// This constant corresponds to the C side `clockid_t` value.
@@ -68,7 +74,7 @@ pub trait ClockSource {
 
     /// Get the current time from the clock source.
     ///
-    /// The function must return a value in the range from 0 to `KTIME_MAX`.
+    /// The function must return a value in the range `0..=KTIME_MAX`.
     fn ktime_get() -> bindings::ktime_t;
 }
 
@@ -85,7 +91,9 @@ pub trait ClockSource {
 /// count time that the system is suspended.
 pub struct Monotonic;
 
-impl ClockSource for Monotonic {
+// SAFETY: The kernel's `ktime_get()` is guaranteed to return a value
+// in `0..=KTIME_MAX`.
+unsafe impl ClockSource for Monotonic {
     const ID: bindings::clockid_t = bindings::CLOCK_MONOTONIC as bindings::clockid_t;
 
     fn ktime_get() -> bindings::ktime_t {
@@ -110,7 +118,9 @@ impl ClockSource for Monotonic {
 /// the clock will experience discontinuity around leap second adjustment.
 pub struct RealTime;
 
-impl ClockSource for RealTime {
+// SAFETY: The kernel's `ktime_get_real()` is guaranteed to return a value
+// in `0..=KTIME_MAX`.
+unsafe impl ClockSource for RealTime {
     const ID: bindings::clockid_t = bindings::CLOCK_REALTIME as bindings::clockid_t;
 
     fn ktime_get() -> bindings::ktime_t {
@@ -128,7 +138,9 @@ impl ClockSource for RealTime {
 /// discontinuities if the time is changed using settimeofday(2) or similar.
 pub struct BootTime;
 
-impl ClockSource for BootTime {
+// SAFETY: The kernel's `ktime_get_boottime()` is guaranteed to return a value
+// in `0..=KTIME_MAX`.
+unsafe impl ClockSource for BootTime {
     const ID: bindings::clockid_t = bindings::CLOCK_BOOTTIME as bindings::clockid_t;
 
     fn ktime_get() -> bindings::ktime_t {
@@ -150,7 +162,9 @@ impl ClockSource for BootTime {
 /// The acronym TAI refers to International Atomic Time.
 pub struct Tai;
 
-impl ClockSource for Tai {
+// SAFETY: The kernel's `ktime_get_clocktai()` is guaranteed to return a value
+// in `0..=KTIME_MAX`.
+unsafe impl ClockSource for Tai {
     const ID: bindings::clockid_t = bindings::CLOCK_TAI as bindings::clockid_t;
 
     fn ktime_get() -> bindings::ktime_t {
@@ -363,6 +377,12 @@ impl Delta {
     /// A span of time equal to zero.
     pub const ZERO: Self = Self { nanos: 0 };
 
+    /// Create a new [`Delta`] from a number of nanoseconds.
+    #[inline]
+    pub const fn from_nanos(nanos: i64) -> Self {
+        Self { nanos }
+    }
+
     /// Create a new [`Delta`] from a number of microseconds.
     ///
     /// The `micros` can range from -9_223_372_036_854_775 to 9_223_372_036_854_775.
diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs
index 856d2d929a00..2d7f1131a813 100644
--- a/rust/kernel/time/hrtimer.rs
+++ b/rust/kernel/time/hrtimer.rs
@@ -66,6 +66,342 @@
 //!
 //! A `restart` operation on a timer in the **stopped** state is equivalent to a
 //! `start` operation.
+//!
+//! When a type implements both `HrTimerPointer` and `Clone`, it is possible to
+//! issue the `start` operation while the timer is in the **started** state. In
+//! this case the `start` operation is equivalent to the `restart` operation.
+//!
+//! # Examples
+//!
+//! ## Using an intrusive timer living in a [`Box`]
+//!
+//! ```
+//! # use kernel::{
+//! #     alloc::flags,
+//! #     impl_has_hr_timer,
+//! #     prelude::*,
+//! #     sync::{
+//! #         atomic::{ordering, Atomic},
+//! #         completion::Completion,
+//! #         Arc,
+//! #     },
+//! #     time::{
+//! #         hrtimer::{
+//! #             RelativeMode, HrTimer, HrTimerCallback, HrTimerPointer,
+//! #             HrTimerRestart, HrTimerCallbackContext
+//! #         },
+//! #         Delta, Monotonic,
+//! #     },
+//! # };
+//!
+//! #[pin_data]
+//! struct Shared {
+//!     #[pin]
+//!     flag: Atomic<u64>,
+//!     #[pin]
+//!     cond: Completion,
+//! }
+//!
+//! impl Shared {
+//!     fn new() -> impl PinInit<Self> {
+//!         pin_init!(Self {
+//!             flag <- Atomic::new(0),
+//!             cond <- Completion::new(),
+//!         })
+//!     }
+//! }
+//!
+//! #[pin_data]
+//! struct BoxIntrusiveHrTimer {
+//!     #[pin]
+//!     timer: HrTimer<Self>,
+//!     shared: Arc<Shared>,
+//! }
+//!
+//! impl BoxIntrusiveHrTimer {
+//!     fn new() -> impl PinInit<Self, kernel::error::Error> {
+//!         try_pin_init!(Self {
+//!             timer <- HrTimer::new(),
+//!             shared: Arc::pin_init(Shared::new(), flags::GFP_KERNEL)?,
+//!         })
+//!     }
+//! }
+//!
+//! impl HrTimerCallback for BoxIntrusiveHrTimer {
+//!     type Pointer<'a> = Pin<KBox<Self>>;
+//!
+//!     fn run(this: Pin<&mut Self>, _ctx: HrTimerCallbackContext<'_, Self>) -> HrTimerRestart {
+//!         pr_info!("Timer called\n");
+//!
+//!         let flag = this.shared.flag.fetch_add(1, ordering::Full);
+//!         this.shared.cond.complete_all();
+//!
+//!         if flag == 4 {
+//!             HrTimerRestart::NoRestart
+//!         } else {
+//!             HrTimerRestart::Restart
+//!         }
+//!     }
+//! }
+//!
+//! impl_has_hr_timer! {
+//!     impl HasHrTimer<Self> for BoxIntrusiveHrTimer {
+//!         mode: RelativeMode<Monotonic>, field: self.timer
+//!     }
+//! }
+//!
+//! let has_timer = Box::pin_init(BoxIntrusiveHrTimer::new(), GFP_KERNEL)?;
+//! let shared = has_timer.shared.clone();
+//! let _handle = has_timer.start(Delta::from_micros(200));
+//!
+//! while shared.flag.load(ordering::Relaxed) != 5 {
+//!     shared.cond.wait_for_completion();
+//! }
+//!
+//! pr_info!("Counted to 5\n");
+//! # Ok::<(), kernel::error::Error>(())
+//! ```
+//!
+//! ## Using an intrusive timer in an [`Arc`]
+//!
+//! ```
+//! # use kernel::{
+//! #     alloc::flags,
+//! #     impl_has_hr_timer,
+//! #     prelude::*,
+//! #     sync::{
+//! #         atomic::{ordering, Atomic},
+//! #         completion::Completion,
+//! #         Arc, ArcBorrow,
+//! #     },
+//! #     time::{
+//! #         hrtimer::{
+//! #             RelativeMode, HrTimer, HrTimerCallback, HrTimerPointer, HrTimerRestart,
+//! #             HasHrTimer, HrTimerCallbackContext
+//! #         },
+//! #         Delta, Monotonic,
+//! #     },
+//! # };
+//!
+//! #[pin_data]
+//! struct ArcIntrusiveHrTimer {
+//!     #[pin]
+//!     timer: HrTimer<Self>,
+//!     #[pin]
+//!     flag: Atomic<u64>,
+//!     #[pin]
+//!     cond: Completion,
+//! }
+//!
+//! impl ArcIntrusiveHrTimer {
+//!     fn new() -> impl PinInit<Self> {
+//!         pin_init!(Self {
+//!             timer <- HrTimer::new(),
+//!             flag <- Atomic::new(0),
+//!             cond <- Completion::new(),
+//!         })
+//!     }
+//! }
+//!
+//! impl HrTimerCallback for ArcIntrusiveHrTimer {
+//!     type Pointer<'a> = Arc<Self>;
+//!
+//!     fn run(
+//!         this: ArcBorrow<'_, Self>,
+//!         _ctx: HrTimerCallbackContext<'_, Self>,
+//!     ) -> HrTimerRestart {
+//!         pr_info!("Timer called\n");
+//!
+//!         let flag = this.flag.fetch_add(1, ordering::Full);
+//!         this.cond.complete_all();
+//!
+//!         if flag == 4 {
+//!             HrTimerRestart::NoRestart
+//!         } else {
+//!             HrTimerRestart::Restart
+//!         }
+//!     }
+//! }
+//!
+//! impl_has_hr_timer! {
+//!     impl HasHrTimer<Self> for ArcIntrusiveHrTimer {
+//!         mode: RelativeMode<Monotonic>, field: self.timer
+//!     }
+//! }
+//!
+//! let has_timer = Arc::pin_init(ArcIntrusiveHrTimer::new(), GFP_KERNEL)?;
+//! let _handle = has_timer.clone().start(Delta::from_micros(200));
+//!
+//! while has_timer.flag.load(ordering::Relaxed) != 5 {
+//!     has_timer.cond.wait_for_completion();
+//! }
+//!
+//! pr_info!("Counted to 5\n");
+//! # Ok::<(), kernel::error::Error>(())
+//! ```
+//!
+//! ## Using a stack-based timer
+//!
+//! ```
+//! # use kernel::{
+//! #     impl_has_hr_timer,
+//! #     prelude::*,
+//! #     sync::{
+//! #         atomic::{ordering, Atomic},
+//! #         completion::Completion,
+//! #     },
+//! #     time::{
+//! #         hrtimer::{
+//! #             ScopedHrTimerPointer, HrTimer, HrTimerCallback, HrTimerPointer, HrTimerRestart,
+//! #             HasHrTimer, RelativeMode, HrTimerCallbackContext
+//! #         },
+//! #         Delta, Monotonic,
+//! #     },
+//! # };
+//! # use pin_init::stack_pin_init;
+//!
+//! #[pin_data]
+//! struct IntrusiveHrTimer {
+//!     #[pin]
+//!     timer: HrTimer<Self>,
+//!     #[pin]
+//!     flag: Atomic<u64>,
+//!     #[pin]
+//!     cond: Completion,
+//! }
+//!
+//! impl IntrusiveHrTimer {
+//!     fn new() -> impl PinInit<Self> {
+//!         pin_init!(Self {
+//!             timer <- HrTimer::new(),
+//!             flag <- Atomic::new(0),
+//!             cond <- Completion::new(),
+//!         })
+//!     }
+//! }
+//!
+//! impl HrTimerCallback for IntrusiveHrTimer {
+//!     type Pointer<'a> = Pin<&'a Self>;
+//!
+//!     fn run(this: Pin<&Self>, _ctx: HrTimerCallbackContext<'_, Self>) -> HrTimerRestart {
+//!         pr_info!("Timer called\n");
+//!
+//!         this.flag.store(1, ordering::Release);
+//!         this.cond.complete_all();
+//!
+//!         HrTimerRestart::NoRestart
+//!     }
+//! }
+//!
+//! impl_has_hr_timer! {
+//!     impl HasHrTimer<Self> for IntrusiveHrTimer {
+//!         mode: RelativeMode<Monotonic>, field: self.timer
+//!     }
+//! }
+//!
+//! stack_pin_init!( let has_timer = IntrusiveHrTimer::new() );
+//! has_timer.as_ref().start_scoped(Delta::from_micros(200), || {
+//!     while has_timer.flag.load(ordering::Relaxed) != 1 {
+//!         has_timer.cond.wait_for_completion();
+//!     }
+//! });
+//!
+//! pr_info!("Flag raised\n");
+//! # Ok::<(), kernel::error::Error>(())
+//! ```
+//!
+//! ## Using a mutable stack-based timer
+//!
+//! ```
+//! # use kernel::{
+//! #     alloc::flags,
+//! #     impl_has_hr_timer,
+//! #     prelude::*,
+//! #     sync::{
+//! #         atomic::{ordering, Atomic},
+//! #         completion::Completion,
+//! #         Arc,
+//! #     },
+//! #     time::{
+//! #         hrtimer::{
+//! #             ScopedHrTimerPointer, HrTimer, HrTimerCallback, HrTimerPointer, HrTimerRestart,
+//! #             HasHrTimer, RelativeMode, HrTimerCallbackContext
+//! #         },
+//! #         Delta, Monotonic,
+//! #     },
+//! # };
+//! # use pin_init::stack_try_pin_init;
+//!
+//! #[pin_data]
+//! struct Shared {
+//!     #[pin]
+//!     flag: Atomic<u64>,
+//!     #[pin]
+//!     cond: Completion,
+//! }
+//!
+//! impl Shared {
+//!     fn new() -> impl PinInit<Self> {
+//!         pin_init!(Self {
+//!             flag <- Atomic::new(0),
+//!             cond <- Completion::new(),
+//!         })
+//!     }
+//! }
+//!
+//! #[pin_data]
+//! struct IntrusiveHrTimer {
+//!     #[pin]
+//!     timer: HrTimer<Self>,
+//!     shared: Arc<Shared>,
+//! }
+//!
+//! impl IntrusiveHrTimer {
+//!     fn new() -> impl PinInit<Self, kernel::error::Error> {
+//!         try_pin_init!(Self {
+//!             timer <- HrTimer::new(),
+//!             shared: Arc::pin_init(Shared::new(), flags::GFP_KERNEL)?,
+//!         })
+//!     }
+//! }
+//!
+//! impl HrTimerCallback for IntrusiveHrTimer {
+//!     type Pointer<'a> = Pin<&'a mut Self>;
+//!
+//!     fn run(this: Pin<&mut Self>, _ctx: HrTimerCallbackContext<'_, Self>) -> HrTimerRestart {
+//!         pr_info!("Timer called\n");
+//!
+//!         let flag = this.shared.flag.fetch_add(1, ordering::Full);
+//!         this.shared.cond.complete_all();
+//!
+//!         if flag == 4 {
+//!             HrTimerRestart::NoRestart
+//!         } else {
+//!             HrTimerRestart::Restart
+//!         }
+//!     }
+//! }
+//!
+//! impl_has_hr_timer! {
+//!     impl HasHrTimer<Self> for IntrusiveHrTimer {
+//!         mode: RelativeMode<Monotonic>, field: self.timer
+//!     }
+//! }
+//!
+//! stack_try_pin_init!( let has_timer =? IntrusiveHrTimer::new() );
+//! let shared = has_timer.shared.clone();
+//!
+//! has_timer.as_mut().start_scoped(Delta::from_micros(200), || {
+//!     while shared.flag.load(ordering::Relaxed) != 5 {
+//!         shared.cond.wait_for_completion();
+//!     }
+//! });
+//!
+//! pr_info!("Counted to 5\n");
+//! # Ok::<(), kernel::error::Error>(())
+//! ```
+//!
+//! [`Arc`]: kernel::sync::Arc
 
 use super::{ClockSource, Delta, Instant};
 use crate::{prelude::*, types::Opaque};
diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs
index 5711580c9f9b..654b5ede2fe2 100644
--- a/rust/kernel/transmute.rs
+++ b/rust/kernel/transmute.rs
@@ -49,7 +49,6 @@ pub unsafe trait FromBytes {
         let slice_ptr = bytes.as_ptr().cast::<Self>();
         let size = size_of::<Self>();
 
-        #[allow(clippy::incompatible_msrv)]
         if bytes.len() == size && slice_ptr.is_aligned() {
             // SAFETY: Size and alignment were just checked.
             unsafe { Some(&*slice_ptr) }
@@ -67,16 +66,9 @@ pub unsafe trait FromBytes {
     where
         Self: Sized,
     {
-        if bytes.len() < size_of::<Self>() {
-            None
-        } else {
-            // PANIC: We checked that `bytes.len() >= size_of::<Self>`, thus `split_at` cannot
-            // panic.
-            // TODO: replace with `split_at_checked` once the MSRV is >= 1.80.
-            let (prefix, remainder) = bytes.split_at(size_of::<Self>());
+        let (prefix, remainder) = bytes.split_at_checked(size_of::<Self>())?;
 
-            Self::from_bytes(prefix).map(|s| (s, remainder))
-        }
+        Self::from_bytes(prefix).map(|s| (s, remainder))
     }
 
     /// Converts a mutable slice of bytes to a reference to `Self`.
@@ -92,7 +84,6 @@ pub unsafe trait FromBytes {
         let slice_ptr = bytes.as_mut_ptr().cast::<Self>();
         let size = size_of::<Self>();
 
-        #[allow(clippy::incompatible_msrv)]
         if bytes.len() == size && slice_ptr.is_aligned() {
             // SAFETY: Size and alignment were just checked.
             unsafe { Some(&mut *slice_ptr) }
@@ -110,16 +101,9 @@ pub unsafe trait FromBytes {
     where
         Self: AsBytes + Sized,
     {
-        if bytes.len() < size_of::<Self>() {
-            None
-        } else {
-            // PANIC: We checked that `bytes.len() >= size_of::<Self>`, thus `split_at_mut` cannot
-            // panic.
-            // TODO: replace with `split_at_mut_checked` once the MSRV is >= 1.80.
-            let (prefix, remainder) = bytes.split_at_mut(size_of::<Self>());
+        let (prefix, remainder) = bytes.split_at_mut_checked(size_of::<Self>())?;
 
-            Self::from_bytes_mut(prefix).map(|s| (s, remainder))
-        }
+        Self::from_bytes_mut(prefix).map(|s| (s, remainder))
     }
 
     /// Creates an owned instance of `Self` by copying `bytes`.
@@ -149,16 +133,9 @@ pub unsafe trait FromBytes {
     where
         Self: Sized,
     {
-        if bytes.len() < size_of::<Self>() {
-            None
-        } else {
-            // PANIC: We checked that `bytes.len() >= size_of::<Self>`, thus `split_at` cannot
-            // panic.
-            // TODO: replace with `split_at_checked` once the MSRV is >= 1.80.
-            let (prefix, remainder) = bytes.split_at(size_of::<Self>());
+        let (prefix, remainder) = bytes.split_at_checked(size_of::<Self>())?;
 
-            Self::from_bytes_copy(prefix).map(|s| (s, remainder))
-        }
+        Self::from_bytes_copy(prefix).map(|s| (s, remainder))
     }
 }
 
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index 9c5e7dbf1632..4329d3c2c2e5 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -11,8 +11,6 @@ use core::{
 };
 use pin_init::{PinInit, Wrapper, Zeroable};
 
-pub use crate::sync::aref::{ARef, AlwaysRefCounted};
-
 /// Used to transfer ownership to and from foreign (non-Rust) languages.
 ///
 /// Ownership is transferred from Rust to a foreign language by calling [`Self::into_foreign`] and
diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs
index f989539a31b4..6c9c1cce3c63 100644
--- a/rust/kernel/uaccess.rs
+++ b/rust/kernel/uaccess.rs
@@ -7,10 +7,12 @@
 use crate::{
     alloc::{Allocator, Flags},
     bindings,
+    dma::Coherent,
     error::Result,
     ffi::{c_char, c_void},
     fs::file,
     prelude::*,
+    ptr::KnownSize,
     transmute::{AsBytes, FromBytes},
 };
 use core::mem::{size_of, MaybeUninit};
@@ -459,20 +461,19 @@ impl UserSliceWriter {
         self.length == 0
     }
 
-    /// Writes raw data to this user pointer from a kernel buffer.
+    /// Low-level write from a raw pointer.
     ///
-    /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of
-    /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even
-    /// if it returns an error.
-    pub fn write_slice(&mut self, data: &[u8]) -> Result {
-        let len = data.len();
-        let data_ptr = data.as_ptr().cast::<c_void>();
+    /// # Safety
+    ///
+    /// The caller must ensure that `from` is valid for reads of `len` bytes.
+    unsafe fn write_raw(&mut self, from: *const u8, len: usize) -> Result {
         if len > self.length {
             return Err(EFAULT);
         }
-        // SAFETY: `data_ptr` points into an immutable slice of length `len`, so we may read
-        // that many bytes from it.
-        let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), data_ptr, len) };
+
+        // SAFETY: Caller guarantees `from` is valid for `len` bytes (see this function's
+        // safety contract).
+        let res = unsafe { bindings::copy_to_user(self.ptr.as_mut_ptr(), from.cast(), len) };
         if res != 0 {
             return Err(EFAULT);
         }
@@ -481,6 +482,76 @@ impl UserSliceWriter {
         Ok(())
     }
 
+    /// Writes raw data to this user pointer from a kernel buffer.
+    ///
+    /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of
+    /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even
+    /// if it returns an error.
+    pub fn write_slice(&mut self, data: &[u8]) -> Result {
+        // SAFETY: `data` is a valid slice, so `data.as_ptr()` is valid for
+        // reading `data.len()` bytes.
+        unsafe { self.write_raw(data.as_ptr(), data.len()) }
+    }
+
+    /// Writes raw data to this user pointer from a DMA coherent allocation.
+    ///
+    /// Copies `count` bytes from `alloc` starting from `offset` into this userspace slice.
+    ///
+    /// # Errors
+    ///
+    /// - [`EOVERFLOW`]: `offset + count` overflows.
+    /// - [`ERANGE`]: `offset + count` exceeds the size of `alloc`, or `count` exceeds the
+    ///   size of the user-space buffer.
+    /// - [`EFAULT`]: the write hits a bad address or goes out of bounds of this
+    ///   [`UserSliceWriter`].
+    ///
+    /// This call may modify the associated userspace slice even if it returns an error.
+    ///
+    /// Note: The memory may be concurrently modified by hardware (e.g., DMA). In such cases,
+    /// the copied data may be inconsistent, but this does not cause undefined behavior.
+    ///
+    /// # Example
+    ///
+    /// Copy the first 256 bytes of a DMA coherent allocation into a userspace buffer:
+    ///
+    /// ```no_run
+    /// use kernel::uaccess::UserSliceWriter;
+    /// use kernel::dma::Coherent;
+    ///
+    /// fn copy_dma_to_user(
+    ///     mut writer: UserSliceWriter,
+    ///     alloc: &Coherent<[u8]>,
+    /// ) -> Result {
+    ///     writer.write_dma(alloc, 0, 256)
+    /// }
+    /// ```
+    pub fn write_dma<T: KnownSize + AsBytes + ?Sized>(
+        &mut self,
+        alloc: &Coherent<T>,
+        offset: usize,
+        count: usize,
+    ) -> Result {
+        let len = alloc.size();
+        if offset.checked_add(count).ok_or(EOVERFLOW)? > len {
+            return Err(ERANGE);
+        }
+
+        if count > self.len() {
+            return Err(ERANGE);
+        }
+
+        // SAFETY: `as_ptr()` returns a valid pointer to a memory region of `count()` bytes, as
+        // guaranteed by the `Coherent` invariants. The check above ensures `offset + count <= len`.
+        let src_ptr = unsafe { alloc.as_ptr().cast::<u8>().add(offset) };
+
+        // Note: Use `write_raw` instead of `write_slice` because the allocation is coherent
+        // memory that hardware may modify (e.g., DMA); we cannot form a `&[u8]` slice over
+        // such volatile memory.
+        //
+        // SAFETY: `src_ptr` points into the allocation and is valid for `count` bytes (see above).
+        unsafe { self.write_raw(src_ptr, count) }
+    }
+
     /// Writes raw data to this user pointer from a kernel buffer partially.
     ///
     /// This is the same as [`Self::write_slice`] but considers the given `offset` into `data` and
diff --git a/rust/kernel/usb.rs b/rust/kernel/usb.rs
index 0e1b9a88f4f1..9c17a672cd27 100644
--- a/rust/kernel/usb.rs
+++ b/rust/kernel/usb.rs
@@ -18,10 +18,8 @@ use crate::{
         to_result, //
     },
     prelude::*,
-    types::{
-        AlwaysRefCounted,
-        Opaque, //
-    },
+    sync::aref::AlwaysRefCounted,
+    types::Opaque,
     ThisModule, //
 };
 use core::{
diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs
index 706e833e9702..7e253b6f299c 100644
--- a/rust/kernel/workqueue.rs
+++ b/rust/kernel/workqueue.rs
@@ -189,12 +189,18 @@ use crate::{
     alloc::{AllocError, Flags},
     container_of,
     prelude::*,
-    sync::Arc,
-    sync::LockClassKey,
+    sync::{
+        aref::{
+            ARef,
+            AlwaysRefCounted, //
+        },
+        Arc,
+        LockClassKey, //
+    },
     time::Jiffies,
     types::Opaque,
 };
-use core::marker::PhantomData;
+use core::{marker::PhantomData, ptr::NonNull};
 
 /// Creates a [`Work`] initialiser with the given name and a newly-created lock class.
 #[macro_export]
@@ -425,10 +431,11 @@ pub unsafe trait RawDelayedWorkItem<const ID: u64>: RawWorkItem<ID> {}
 
 /// Defines the method that should be called directly when a work item is executed.
 ///
-/// This trait is implemented by `Pin<KBox<T>>` and [`Arc<T>`], and is mainly intended to be
-/// implemented for smart pointer types. For your own structs, you would implement [`WorkItem`]
-/// instead. The [`run`] method on this trait will usually just perform the appropriate
-/// `container_of` translation and then call into the [`run`][WorkItem::run] method from the
+/// This trait is implemented by `Pin<KBox<T>>`, [`Arc<T>`] and [`ARef<T>`], and
+/// is mainly intended to be implemented for smart pointer types. For your own
+/// structs, you would implement [`WorkItem`] instead. The [`run`] method on
+/// this trait will usually just perform the appropriate `container_of`
+/// translation and then call into the [`run`][WorkItem::run] method from the
 /// [`WorkItem`] trait.
 ///
 /// This trait is used when the `work_struct` field is defined using the [`Work`] helper.
@@ -934,6 +941,89 @@ where
 {
 }
 
+// SAFETY: Like the `Arc<T>` implementation, the `__enqueue` implementation for
+// `ARef<T>` obtains a `work_struct` from the `Work` field using
+// `T::raw_get_work`, so the same safety reasoning applies:
+//
+//   - `__enqueue` gets the `work_struct` from the `Work` field, using `T::raw_get_work`.
+//   - The only safe way to create a `Work` object is through `Work::new`.
+//   - `Work::new` makes sure that `T::Pointer::run` is passed to `init_work_with_key`.
+//   - Finally `Work` and `RawWorkItem` guarantee that the correct `Work` field
+//     will be used because of the ID const generic bound. This makes sure that `T::raw_get_work`
+//     uses the correct offset for the `Work` field, and `Work::new` picks the correct
+//     implementation of `WorkItemPointer` for `ARef<T>`.
+unsafe impl<T, const ID: u64> WorkItemPointer<ID> for ARef<T>
+where
+    T: AlwaysRefCounted,
+    T: WorkItem<ID, Pointer = Self>,
+    T: HasWork<T, ID>,
+{
+    unsafe extern "C" fn run(ptr: *mut bindings::work_struct) {
+        // The `__enqueue` method always uses a `work_struct` stored in a `Work<T, ID>`.
+        let ptr = ptr.cast::<Work<T, ID>>();
+
+        // SAFETY: This computes the pointer that `__enqueue` got from
+        // `ARef::into_raw`.
+        let ptr = unsafe { T::work_container_of(ptr) };
+
+        // SAFETY: The safety contract of `work_container_of` ensures that it
+        // returns a valid non-null pointer.
+        let ptr = unsafe { NonNull::new_unchecked(ptr) };
+
+        // SAFETY: This pointer comes from `ARef::into_raw` and we've been given
+        // back ownership.
+        let aref = unsafe { ARef::from_raw(ptr) };
+
+        T::run(aref)
+    }
+}
+
+// SAFETY: The `work_struct` raw pointer is guaranteed to be valid for the duration of the call to
+// the closure because we get it from an `ARef`, which means that the ref count will be at least 1,
+// and we don't drop the `ARef` ourselves. If `queue_work_on` returns true, it is further guaranteed
+// to be valid until a call to the function pointer in `work_struct` because we leak the memory it
+// points to, and only reclaim it if the closure returns false, or in `WorkItemPointer::run`, which
+// is what the function pointer in the `work_struct` must be pointing to, according to the safety
+// requirements of `WorkItemPointer`.
+unsafe impl<T, const ID: u64> RawWorkItem<ID> for ARef<T>
+where
+    T: AlwaysRefCounted,
+    T: WorkItem<ID, Pointer = Self>,
+    T: HasWork<T, ID>,
+{
+    type EnqueueOutput = Result<(), Self>;
+
+    unsafe fn __enqueue<F>(self, queue_work_on: F) -> Self::EnqueueOutput
+    where
+        F: FnOnce(*mut bindings::work_struct) -> bool,
+    {
+        let ptr = ARef::into_raw(self);
+
+        // SAFETY: Pointers from ARef::into_raw are valid and non-null.
+        let work_ptr = unsafe { T::raw_get_work(ptr.as_ptr()) };
+        // SAFETY: `raw_get_work` returns a pointer to a valid value.
+        let work_ptr = unsafe { Work::raw_get(work_ptr) };
+
+        if queue_work_on(work_ptr) {
+            Ok(())
+        } else {
+            // SAFETY: The work queue has not taken ownership of the pointer.
+            Err(unsafe { ARef::from_raw(ptr) })
+        }
+    }
+}
+
+// SAFETY: By the safety requirements of `HasDelayedWork`, the `work_struct` returned by methods in
+// `HasWork` provides a `work_struct` that is the `work` field of a `delayed_work`, and the rest of
+// the `delayed_work` has the same access rules as its `work` field.
+unsafe impl<T, const ID: u64> RawDelayedWorkItem<ID> for ARef<T>
+where
+    T: WorkItem<ID, Pointer = Self>,
+    T: HasDelayedWork<T, ID>,
+    T: AlwaysRefCounted,
+{
+}
+
 /// Returns the system work queue (`system_wq`).
 ///
 /// It is the one used by `schedule[_delayed]_work[_on]()`. Multi-CPU multi-threaded. There are
diff --git a/rust/kernel/xarray.rs b/rust/kernel/xarray.rs
index a49d6db28845..46e5f43223fe 100644
--- a/rust/kernel/xarray.rs
+++ b/rust/kernel/xarray.rs
@@ -172,6 +172,7 @@ pub struct StoreError<T> {
 }
 
 impl<T> From<StoreError<T>> for Error {
+    #[inline]
     fn from(value: StoreError<T>) -> Self {
         value.error
     }