// SPDX-License-Identifier: GPL-2.0 //! GPU buddy allocator bindings. //! //! C header: [`include/linux/gpu_buddy.h`](srctree/include/linux/gpu_buddy.h) //! //! This module provides Rust abstractions over the Linux kernel's GPU buddy //! allocator, which implements a binary buddy memory allocator. //! //! The buddy allocator manages a contiguous address space and allocates blocks //! in power-of-two sizes, useful for GPU physical memory management. //! //! # Examples //! //! Create a buddy allocator and perform a basic range allocation: //! //! ``` //! use kernel::{ //! gpu::buddy::{ //! GpuBuddy, //! GpuBuddyAllocFlags, //! GpuBuddyAllocMode, //! GpuBuddyParams, // //! }, //! prelude::*, //! ptr::Alignment, //! sizes::*, // //! }; //! //! // Create a 1GB buddy allocator with 4KB minimum chunk size. //! let buddy = GpuBuddy::new(GpuBuddyParams { //! base_offset: 0, //! size: SZ_1G as u64, //! chunk_size: Alignment::new::(), //! })?; //! //! assert_eq!(buddy.size(), SZ_1G as u64); //! assert_eq!(buddy.chunk_size(), Alignment::new::()); //! let initial_free = buddy.avail(); //! //! // Allocate 16MB. Block lands at the top of the address range. //! let allocated = KBox::pin_init( //! buddy.alloc_blocks( //! GpuBuddyAllocMode::Simple, //! SZ_16M as u64, //! Alignment::new::(), //! GpuBuddyAllocFlags::default(), //! ), //! GFP_KERNEL, //! )?; //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); //! //! let block = allocated.iter().next().expect("expected one block"); //! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64); //! assert_eq!(block.order(), 12); // 2^12 pages = 16MB //! assert_eq!(block.size(), SZ_16M as u64); //! assert_eq!(allocated.iter().count(), 1); //! //! // Dropping the allocation returns the range to the buddy allocator. //! drop(allocated); //! assert_eq!(buddy.avail(), initial_free); //! # Ok::<(), Error>(()) //! ``` //! //! Top-down allocation allocates from the highest addresses: //! //! ``` //! # use kernel::{ //! # gpu::buddy::{GpuBuddy, GpuBuddyAllocMode, GpuBuddyAllocFlags, GpuBuddyParams}, //! # prelude::*, //! # ptr::Alignment, //! # sizes::*, // //! # }; //! # let buddy = GpuBuddy::new(GpuBuddyParams { //! # base_offset: 0, //! # size: SZ_1G as u64, //! # chunk_size: Alignment::new::(), //! # })?; //! # let initial_free = buddy.avail(); //! let topdown = KBox::pin_init( //! buddy.alloc_blocks( //! GpuBuddyAllocMode::TopDown, //! SZ_16M as u64, //! Alignment::new::(), //! GpuBuddyAllocFlags::default(), //! ), //! GFP_KERNEL, //! )?; //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); //! //! let block = topdown.iter().next().expect("expected one block"); //! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64); //! assert_eq!(block.order(), 12); //! assert_eq!(block.size(), SZ_16M as u64); //! //! // Dropping the allocation returns the range to the buddy allocator. //! drop(topdown); //! assert_eq!(buddy.avail(), initial_free); //! # Ok::<(), Error>(()) //! ``` //! //! Non-contiguous allocation can fill fragmented memory by returning multiple //! blocks: //! //! ``` //! # use kernel::{ //! # gpu::buddy::{ //! # GpuBuddy, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams, //! # }, //! # prelude::*, //! # ptr::Alignment, //! # sizes::*, // //! # }; //! # let buddy = GpuBuddy::new(GpuBuddyParams { //! # base_offset: 0, //! # size: SZ_1G as u64, //! # chunk_size: Alignment::new::(), //! # })?; //! # let initial_free = buddy.avail(); //! // Create fragmentation by allocating 4MB blocks at [0,4M) and [8M,12M). //! let frag1 = KBox::pin_init( //! buddy.alloc_blocks( //! GpuBuddyAllocMode::Range(0..SZ_4M as u64), //! SZ_4M as u64, //! Alignment::new::(), //! GpuBuddyAllocFlags::default(), //! ), //! GFP_KERNEL, //! )?; //! assert_eq!(buddy.avail(), initial_free - SZ_4M as u64); //! //! let frag2 = KBox::pin_init( //! buddy.alloc_blocks( //! GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64), //! SZ_4M as u64, //! Alignment::new::(), //! GpuBuddyAllocFlags::default(), //! ), //! GFP_KERNEL, //! )?; //! assert_eq!(buddy.avail(), initial_free - SZ_8M as u64); //! //! // Allocate 8MB, this returns 2 blocks from the holes. //! let fragmented = KBox::pin_init( //! buddy.alloc_blocks( //! GpuBuddyAllocMode::Range(0..SZ_16M as u64), //! SZ_8M as u64, //! Alignment::new::(), //! GpuBuddyAllocFlags::default(), //! ), //! GFP_KERNEL, //! )?; //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); //! //! let (mut count, mut total) = (0u32, 0u64); //! for block in fragmented.iter() { //! assert_eq!(block.size(), SZ_4M as u64); //! total += block.size(); //! count += 1; //! } //! assert_eq!(total, SZ_8M as u64); //! assert_eq!(count, 2); //! # Ok::<(), Error>(()) //! ``` //! //! Contiguous allocation fails when only fragmented space is available: //! //! ``` //! # use kernel::{ //! # gpu::buddy::{ //! # GpuBuddy, GpuBuddyAllocFlag, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams, //! # }, //! # prelude::*, //! # ptr::Alignment, //! # sizes::*, // //! # }; //! // Create a small 16MB buddy allocator with fragmented memory. //! let small = GpuBuddy::new(GpuBuddyParams { //! base_offset: 0, //! size: SZ_16M as u64, //! chunk_size: Alignment::new::(), //! })?; //! //! let _hole1 = KBox::pin_init( //! small.alloc_blocks( //! GpuBuddyAllocMode::Range(0..SZ_4M as u64), //! SZ_4M as u64, //! Alignment::new::(), //! GpuBuddyAllocFlags::default(), //! ), //! GFP_KERNEL, //! )?; //! //! let _hole2 = KBox::pin_init( //! small.alloc_blocks( //! GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64), //! SZ_4M as u64, //! Alignment::new::(), //! GpuBuddyAllocFlags::default(), //! ), //! GFP_KERNEL, //! )?; //! //! // 8MB contiguous should fail, only two non-contiguous 4MB holes exist. //! let result = KBox::pin_init( //! small.alloc_blocks( //! GpuBuddyAllocMode::Simple, //! SZ_8M as u64, //! Alignment::new::(), //! GpuBuddyAllocFlag::Contiguous, //! ), //! GFP_KERNEL, //! ); //! assert!(result.is_err()); //! # Ok::<(), Error>(()) //! ``` use core::ops::Range; use crate::{ bindings, clist_create, error::to_result, interop::list::CListHead, new_mutex, prelude::*, ptr::Alignment, sync::{ lock::mutex::MutexGuard, Arc, Mutex, // }, types::Opaque, // }; /// Allocation mode for the GPU buddy allocator. /// /// The mode determines the primary allocation strategy. Modes are mutually /// exclusive: an allocation is either simple, range-constrained, or top-down. /// /// Orthogonal modifier flags (e.g., contiguous, clear) are specified separately /// via [`GpuBuddyAllocFlags`]. #[derive(Clone, Debug, PartialEq, Eq)] pub enum GpuBuddyAllocMode { /// Simple allocation without constraints. Simple, /// Range-based allocation within the given address range. Range(Range), /// Allocate from top of address space downward. TopDown, } impl GpuBuddyAllocMode { /// Returns the C flags corresponding to the allocation mode. fn as_flags(&self) -> usize { match self { Self::Simple => 0, Self::Range(_) => bindings::GPU_BUDDY_RANGE_ALLOCATION, Self::TopDown => bindings::GPU_BUDDY_TOPDOWN_ALLOCATION, } } /// Extracts the range start/end, defaulting to `(0, 0)` for non-range modes. fn range(&self) -> (u64, u64) { match self { Self::Range(range) => (range.start, range.end), _ => (0, 0), } } } crate::impl_flags!( /// Modifier flags for GPU buddy allocation. /// /// These flags can be combined with any [`GpuBuddyAllocMode`] to control /// additional allocation behavior. #[derive(Clone, Copy, Default, PartialEq, Eq)] pub struct GpuBuddyAllocFlags(usize); /// Individual modifier flag for GPU buddy allocation. #[derive(Clone, Copy, PartialEq, Eq)] pub enum GpuBuddyAllocFlag { /// Allocate physically contiguous blocks. Contiguous = bindings::GPU_BUDDY_CONTIGUOUS_ALLOCATION, /// Request allocation from cleared (zeroed) memory. Clear = bindings::GPU_BUDDY_CLEAR_ALLOCATION, /// Disable trimming of partially used blocks. TrimDisable = bindings::GPU_BUDDY_TRIM_DISABLE, } ); /// Parameters for creating a GPU buddy allocator. pub struct GpuBuddyParams { /// Base offset (in bytes) where the managed memory region starts. /// Allocations will be offset by this value. pub base_offset: u64, /// Total size (in bytes) of the address space managed by the allocator. pub size: u64, /// Minimum allocation unit / chunk size; must be >= 4KB. pub chunk_size: Alignment, } /// Inner structure holding the actual buddy allocator. /// /// # Synchronization /// /// The C `gpu_buddy` API requires synchronization (see `include/linux/gpu_buddy.h`). /// Internal locking ensures all allocator and free operations are properly /// synchronized, preventing races between concurrent allocations and the /// freeing that occurs when [`AllocatedBlocks`] is dropped. /// /// # Invariants /// /// The inner [`Opaque`] contains an initialized buddy allocator. #[pin_data(PinnedDrop)] struct GpuBuddyInner { #[pin] inner: Opaque, // TODO: Replace `Mutex<()>` with `Mutex>` once `Mutex::new()` // accepts `impl PinInit`. #[pin] lock: Mutex<()>, /// Cached creation parameters (do not change after init). params: GpuBuddyParams, } impl GpuBuddyInner { /// Create a pin-initializer for the buddy allocator. fn new(params: GpuBuddyParams) -> impl PinInit { let size = params.size; let chunk_size = params.chunk_size; // INVARIANT: `gpu_buddy_init` returns 0 on success, at which point the // `gpu_buddy` structure is initialized and ready for use with all // `gpu_buddy_*` APIs. `try_pin_init!` only completes if all fields succeed, // so the invariant holds when construction finishes. try_pin_init!(Self { inner <- Opaque::try_ffi_init(|ptr| { // SAFETY: `ptr` points to valid uninitialized memory from the pin-init // infrastructure. `gpu_buddy_init` will initialize the structure. to_result(unsafe { bindings::gpu_buddy_init(ptr, size, chunk_size.as_usize() as u64) }) }), lock <- new_mutex!(()), params, }) } /// Lock the mutex and return a guard for accessing the allocator. fn lock(&self) -> GpuBuddyGuard<'_> { GpuBuddyGuard { inner: self, _guard: self.lock.lock(), } } } #[pinned_drop] impl PinnedDrop for GpuBuddyInner { fn drop(self: Pin<&mut Self>) { let guard = self.lock(); // SAFETY: Per the type invariant, `inner` contains an initialized // allocator. `guard` provides exclusive access. unsafe { bindings::gpu_buddy_fini(guard.as_raw()) }; } } // SAFETY: `GpuBuddyInner` can be sent between threads. unsafe impl Send for GpuBuddyInner {} // SAFETY: `GpuBuddyInner` is `Sync` because `GpuBuddyInner::lock` // serializes all access to the C allocator, preventing data races. unsafe impl Sync for GpuBuddyInner {} /// Guard that proves the lock is held, enabling access to the allocator. /// /// The `_guard` holds the lock for the duration of this guard's lifetime. struct GpuBuddyGuard<'a> { inner: &'a GpuBuddyInner, _guard: MutexGuard<'a, ()>, } impl GpuBuddyGuard<'_> { /// Get a raw pointer to the underlying C `gpu_buddy` structure. fn as_raw(&self) -> *mut bindings::gpu_buddy { self.inner.inner.get() } } /// GPU buddy allocator instance. /// /// This structure wraps the C `gpu_buddy` allocator using reference counting. /// The allocator is automatically cleaned up when all references are dropped. /// /// Refer to the module-level documentation for usage examples. pub struct GpuBuddy(Arc); impl GpuBuddy { /// Create a new buddy allocator. /// /// The allocator manages a contiguous address space of the given size, with the /// specified minimum allocation unit (chunk_size must be at least 4KB). pub fn new(params: GpuBuddyParams) -> Result { Arc::pin_init(GpuBuddyInner::new(params), GFP_KERNEL).map(Self) } /// Get the base offset for allocations. pub fn base_offset(&self) -> u64 { self.0.params.base_offset } /// Get the chunk size (minimum allocation unit). pub fn chunk_size(&self) -> Alignment { self.0.params.chunk_size } /// Get the total managed size. pub fn size(&self) -> u64 { self.0.params.size } /// Get the available (free) memory in bytes. pub fn avail(&self) -> u64 { let guard = self.0.lock(); // SAFETY: Per the type invariant, `inner` contains an initialized allocator. // `guard` provides exclusive access. unsafe { (*guard.as_raw()).avail } } /// Allocate blocks from the buddy allocator. /// /// Returns a pin-initializer for [`AllocatedBlocks`]. pub fn alloc_blocks( &self, mode: GpuBuddyAllocMode, size: u64, min_block_size: Alignment, flags: impl Into, ) -> impl PinInit { let buddy_arc = Arc::clone(&self.0); let (start, end) = mode.range(); let mode_flags = mode.as_flags(); let modifier_flags = flags.into(); // Create pin-initializer that initializes list and allocates blocks. try_pin_init!(AllocatedBlocks { buddy: buddy_arc, list <- CListHead::new(), _: { // Reject zero-sized or inverted ranges. if let GpuBuddyAllocMode::Range(range) = &mode { if range.is_empty() { Err::<(), Error>(EINVAL)?; } } // Lock while allocating to serialize with concurrent frees. let guard = buddy.lock(); // SAFETY: Per the type invariant, `inner` contains an initialized // allocator. `guard` provides exclusive access. to_result(unsafe { bindings::gpu_buddy_alloc_blocks( guard.as_raw(), start, end, size, min_block_size.as_usize() as u64, list.as_raw(), mode_flags | usize::from(modifier_flags), ) })? } }) } } /// Allocated blocks from the buddy allocator with automatic cleanup. /// /// This structure owns a list of allocated blocks and ensures they are /// automatically freed when dropped. Use `iter()` to iterate over all /// allocated blocks. /// /// # Invariants /// /// - `list` is an initialized, valid list head containing allocated blocks. #[pin_data(PinnedDrop)] pub struct AllocatedBlocks { #[pin] list: CListHead, buddy: Arc, } impl AllocatedBlocks { /// Check if the block list is empty. pub fn is_empty(&self) -> bool { // An empty list head points to itself. !self.list.is_linked() } /// Iterate over allocated blocks. /// /// Returns an iterator yielding [`AllocatedBlock`] values. Each [`AllocatedBlock`] /// borrows `self` and is only valid for the duration of that borrow. pub fn iter(&self) -> impl Iterator> + '_ { let head = self.list.as_raw(); // SAFETY: Per the type invariant, `list` is an initialized sentinel `list_head` // and is not concurrently modified (we hold a `&self` borrow). The list contains // `gpu_buddy_block` items linked via `__bindgen_anon_1.link`. `Block` is // `#[repr(transparent)]` over `gpu_buddy_block`. let clist = unsafe { clist_create!( head, Block, bindings::gpu_buddy_block, __bindgen_anon_1.link ) }; clist .iter() .map(|this| AllocatedBlock { this, blocks: self }) } } #[pinned_drop] impl PinnedDrop for AllocatedBlocks { fn drop(self: Pin<&mut Self>) { let guard = self.buddy.lock(); // SAFETY: // - list is valid per the type's invariants. // - guard provides exclusive access to the allocator. unsafe { bindings::gpu_buddy_free_list(guard.as_raw(), self.list.as_raw(), 0); } } } /// A GPU buddy block. /// /// Transparent wrapper over C `gpu_buddy_block` structure. This type is returned /// as references during iteration over [`AllocatedBlocks`]. /// /// # Invariants /// /// The inner [`Opaque`] contains a valid, allocated `gpu_buddy_block`. #[repr(transparent)] struct Block(Opaque); impl Block { /// Get a raw pointer to the underlying C block. fn as_raw(&self) -> *mut bindings::gpu_buddy_block { self.0.get() } /// Get the block's raw offset in the buddy address space (without base offset). fn offset(&self) -> u64 { // SAFETY: `self.as_raw()` is valid per the type's invariants. unsafe { bindings::gpu_buddy_block_offset(self.as_raw()) } } /// Get the block order. fn order(&self) -> u32 { // SAFETY: `self.as_raw()` is valid per the type's invariants. unsafe { bindings::gpu_buddy_block_order(self.as_raw()) } } } // SAFETY: `Block` is a wrapper around `gpu_buddy_block` which can be // sent across threads safely. unsafe impl Send for Block {} // SAFETY: `Block` is only accessed through shared references after // allocation, and thus safe to access concurrently across threads. unsafe impl Sync for Block {} /// A buddy block paired with its owning [`AllocatedBlocks`] context. /// /// Unlike a raw block, which only knows its offset within the buddy address /// space, an [`AllocatedBlock`] also has access to the allocator's `base_offset` /// and `chunk_size`, enabling it to compute absolute offsets and byte sizes. /// /// Returned by [`AllocatedBlocks::iter()`]. pub struct AllocatedBlock<'a> { this: &'a Block, blocks: &'a AllocatedBlocks, } impl AllocatedBlock<'_> { /// Get the block's offset in the address space. /// /// Returns the absolute offset including the allocator's base offset. /// This is the actual address to use for accessing the allocated memory. pub fn offset(&self) -> u64 { self.blocks.buddy.params.base_offset + self.this.offset() } /// Get the block order (size = chunk_size << order). pub fn order(&self) -> u32 { self.this.order() } /// Get the block's size in bytes. pub fn size(&self) -> u64 { (self.blocks.buddy.params.chunk_size.as_usize() as u64) << self.this.order() } }