CheckpointManager

Struct CheckpointManager 

pub struct CheckpointManager {
    config: CheckpointConfig,
    storage: Box<dyn CheckpointStorage>,
    actors: HashMap<u32, (String, String)>,
    last_snapshot: HashMap<u32, Instant>,
    pending: HashMap<u64, PendingSnapshot>,
    next_request_id: u64,
    checkpoint_history: HashMap<u32, Vec<String>>,
    total_completed: u64,
    total_failed: u64,
}
Expand description

Manages periodic checkpointing for persistent GPU actors.

The CheckpointManager orchestrates the checkpoint lifecycle:

  1. Periodically determines when a snapshot is due
  2. Issues SnapshotRequests (caller sends as H2K commands)
  3. Processes SnapshotResponses (caller feeds from K2H responses)
  4. Persists completed checkpoints to storage
  5. Enforces retention policy (deletes old checkpoints)

§Usage

use ringkernel_core::checkpoint::{CheckpointConfig, CheckpointManager};
use std::time::Duration;

let config = CheckpointConfig::new(Duration::from_secs(10))
    .with_max_snapshots(3)
    .with_storage_path("/tmp/checkpoints");

let mut manager = CheckpointManager::new(config);
manager.register_actor(0, "wave_sim_0", "fdtd_3d");

// In your poll loop:
for request in manager.poll_due_snapshots() {
    // Send as H2K SnapshotActor command
    h2k_queue.send(H2KMessage::snapshot_actor(
        request.request_id,
        request.actor_slot,
        request.buffer_offset,
    ));
}

// When K2H SnapshotComplete arrives:
manager.complete_snapshot(SnapshotResponse { ... })?;

Fields§

§config: CheckpointConfig§storage: Box<dyn CheckpointStorage>§actors: HashMap<u32, (String, String)>§last_snapshot: HashMap<u32, Instant>§pending: HashMap<u64, PendingSnapshot>§next_request_id: u64§checkpoint_history: HashMap<u32, Vec<String>>§total_completed: u64§total_failed: u64

Implementations§

§

impl CheckpointManager

pub fn new(config: CheckpointConfig) -> CheckpointManager

Create a new checkpoint manager with file storage at the configured path.

pub fn with_storage( config: CheckpointConfig, storage: Box<dyn CheckpointStorage>, ) -> CheckpointManager

Create a checkpoint manager with a custom storage backend.

pub fn register_actor( &mut self, actor_slot: u32, kernel_id: impl Into<String>, kernel_type: impl Into<String>, )

Register an actor for periodic checkpointing.

pub fn unregister_actor(&mut self, actor_slot: u32)

Unregister an actor from checkpointing.

pub fn is_enabled(&self) -> bool

Check if checkpointing is enabled.

pub fn config(&self) -> &CheckpointConfig

Get the checkpoint configuration.

pub fn pending_count(&self) -> usize

Get the number of pending snapshot requests.

pub fn total_completed(&self) -> u64

Get total completed snapshots.

pub fn total_failed(&self) -> u64

Get total failed snapshots.

pub fn poll_due_snapshots(&mut self) -> Vec<SnapshotRequest>

Poll for actors that are due for a snapshot.

Returns a list of SnapshotRequests that should be sent to the device as H2K SnapshotActor commands.

Each actor is only requested once per interval, and only if no prior request for that actor is still pending.

pub fn complete_snapshot( &mut self, response: SnapshotResponse, ) -> Result<Option<String>, RingKernelError>

Process a completed snapshot response from the device.

If the snapshot succeeded, the data is persisted to storage and the retention policy is enforced.

Returns the checkpoint name on success.

pub fn request_snapshot(&mut self, actor_slot: u32) -> Option<SnapshotRequest>

Manually request a snapshot for a specific actor, bypassing the interval timer.

This is useful for on-demand snapshots (e.g., before a risky operation) or in tests. Returns None if the actor is not registered.

pub fn cancel_pending(&mut self, request_id: u64) -> bool

Cancel a pending snapshot request.

Returns true if the request was found and cancelled.

pub fn cancel_all_pending(&mut self)

Cancel all pending snapshot requests.

pub fn load_latest( &self, actor_slot: u32, ) -> Result<Option<Checkpoint>, RingKernelError>

Load the most recent checkpoint for an actor.

pub fn list_checkpoints( &self, actor_slot: u32, ) -> Result<Vec<String>, RingKernelError>

List all checkpoint names for an actor.

pub fn storage(&self) -> &dyn CheckpointStorage

Get a reference to the storage backend.

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
§

impl<T> ArchivePointee for T

§

type ArchivedMetadata = ()

The archived version of the pointer metadata for this type.
§

fn pointer_metadata( _: &<T as ArchivePointee>::ArchivedMetadata, ) -> <T as Pointee>::Metadata

Converts some archived metadata to the pointer metadata for itself.
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
§

impl<F, W, T, D> Deserialize<With<T, W>, D> for F
where W: DeserializeWith<F, T, D>, D: Fallible + ?Sized, F: ?Sized,

§

fn deserialize( &self, deserializer: &mut D, ) -> Result<With<T, W>, <D as Fallible>::Error>

Deserializes using the given deserializer
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

§

impl<T> Instrument for T

§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more
§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
§

impl<T> LayoutRaw for T

§

fn layout_raw(_: <T as Pointee>::Metadata) -> Result<Layout, LayoutError>

Gets the layout of the type.
§

impl<T> Pointable for T

§

const ALIGN: usize

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
§

impl<T> Pointee for T

§

type Metadata = ()

The type for metadata in pointers and references to Self.
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<T> WithSubscriber for T

§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a [WithDispatch] wrapper. Read more