Skip to main content

MultiGpuCoordinator

Struct MultiGpuCoordinator 

pub struct MultiGpuCoordinator {
    config: MultiGpuConfig,
    devices: RwLock<RawRwLock, Vec<DeviceInfo>>,
    kernel_device_map: RwLock<RawRwLock, HashMap<KernelId, usize>>,
    device_kernel_counts: RwLock<RawRwLock, Vec<AtomicUsize>>,
    round_robin_counter: AtomicUsize,
    total_kernels: AtomicU64,
    custom_selector: RwLock<RawRwLock, Option<Arc<dyn Fn(&[DeviceStatus], &LaunchOptions) -> usize + Send + Sync>>>,
    topology: RwLock<RawRwLock, Option<GpuTopology>>,
}
Expand description

Multi-GPU coordinator for managing kernels across devices.

Fields§

§config: MultiGpuConfig§devices: RwLock<RawRwLock, Vec<DeviceInfo>>§kernel_device_map: RwLock<RawRwLock, HashMap<KernelId, usize>>§device_kernel_counts: RwLock<RawRwLock, Vec<AtomicUsize>>§round_robin_counter: AtomicUsize§total_kernels: AtomicU64§custom_selector: RwLock<RawRwLock, Option<Arc<dyn Fn(&[DeviceStatus], &LaunchOptions) -> usize + Send + Sync>>>§topology: RwLock<RawRwLock, Option<GpuTopology>>

Implementations§

§

impl MultiGpuCoordinator

pub fn new(config: MultiGpuConfig) -> Arc<MultiGpuCoordinator>

Create a new multi-GPU coordinator.

pub fn register_device(&self, device: DeviceInfo)

Register a device with the coordinator.

pub fn unregister_device(&self, index: usize) -> DeviceUnregisterResult

Unregister a device and plan kernel migrations.

This method:

  1. Identifies all kernels on the device being removed
  2. Finds target devices for each kernel using load balancing
  3. Creates migration plans for kernels that can be moved
  4. Marks orphaned kernels that have no migration target
  5. Updates internal routing tables

The caller is responsible for executing the actual migrations using KernelMigrator with the returned KernelMigrationPlan entries.

pub fn devices(&self) -> Vec<DeviceInfo>

Get all registered devices.

pub fn device(&self, index: usize) -> Option<DeviceInfo>

Get device info by index.

pub fn device_count(&self) -> usize

Get number of devices.

pub fn select_device( &self, options: &LaunchOptions, ) -> Result<usize, RingKernelError>

Select a device for launching a kernel.

pub fn assign_kernel(&self, kernel_id: KernelId, device_index: usize)

Assign a kernel to a device.

pub fn remove_kernel(&self, kernel_id: &KernelId)

Remove a kernel assignment.

pub fn get_kernel_device(&self, kernel_id: &KernelId) -> Option<usize>

Get device for a kernel.

pub fn kernels_on_device(&self, device_index: usize) -> Vec<KernelId>

Get all kernels on a device.

pub fn get_all_status(&self) -> Vec<DeviceStatus>

Get status of all devices.

pub fn get_device_status(&self, device_index: usize) -> Option<DeviceStatus>

Get status of a specific device.

pub fn set_custom_selector<F>(&self, selector: F)
where F: Fn(&[DeviceStatus], &LaunchOptions) -> usize + Send + Sync + 'static,

Set custom device selector.

pub fn stats(&self) -> MultiGpuStats

Get coordinator statistics.

pub fn can_p2p(&self, device_a: usize, device_b: usize) -> bool

Check if P2P is available between two devices.

pub fn update_device_memory(&self, device_index: usize, available_memory: u64)

Update device memory info.

pub fn discover_topology(&self) -> GpuTopology

Discover GPU topology (estimates if probing not available).

pub fn topology(&self) -> GpuTopology

Get current topology (discovers if not cached).

pub fn set_topology(&self, topology: GpuTopology)

Set custom topology (for testing or manual configuration).

pub fn select_device_for_k2k( &self, source_kernel: &KernelId, ) -> Result<usize, RingKernelError>

Get best device for communicating with a source kernel.

pub fn request_migration( &self, kernel_id: &KernelId, target_device: usize, ) -> Result<MigrationRequest, RingKernelError>

Request to migrate a kernel to another device.

pub fn complete_migration( &self, request: &MigrationRequest, ) -> Result<(), RingKernelError>

Complete a migration (updates internal mappings).

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
§

impl<T> ArchivePointee for T

§

type ArchivedMetadata = ()

The archived version of the pointer metadata for this type.
§

fn pointer_metadata( _: &<T as ArchivePointee>::ArchivedMetadata, ) -> <T as Pointee>::Metadata

Converts some archived metadata to the pointer metadata for itself.
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
§

impl<F, W, T, D> Deserialize<With<T, W>, D> for F
where W: DeserializeWith<F, T, D>, D: Fallible + ?Sized, F: ?Sized,

§

fn deserialize( &self, deserializer: &mut D, ) -> Result<With<T, W>, <D as Fallible>::Error>

Deserializes using the given deserializer
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

§

impl<T> Instrument for T

§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more
§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
§

impl<T> LayoutRaw for T

§

fn layout_raw(_: <T as Pointee>::Metadata) -> Result<Layout, LayoutError>

Gets the layout of the type.
§

impl<T> Pointable for T

§

const ALIGN: usize

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
§

impl<T> Pointee for T

§

type Metadata = ()

The type for metadata in pointers and references to Self.
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

fn vzip(self) -> V

§

impl<T> WithSubscriber for T

§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a [WithDispatch] wrapper. Read more