1use thiserror::Error;
4
5pub type Result<T> = std::result::Result<T, RingKernelError>;
7
8#[derive(Error, Debug)]
10pub enum RingKernelError {
11 #[error("kernel not found: {0}")]
14 KernelNotFound(String),
15
16 #[error("kernel already active: {0}")]
18 KernelAlreadyActive(String),
19
20 #[error("kernel not active: {0}")]
22 KernelNotActive(String),
23
24 #[error("kernel already terminated: {0}")]
26 KernelTerminated(String),
27
28 #[error("invalid state transition from {from:?} to {to:?}")]
30 InvalidStateTransition {
31 from: String,
33 to: String,
35 },
36
37 #[error("invalid state: expected {expected}, got {actual}")]
39 InvalidState {
40 expected: String,
42 actual: String,
44 },
45
46 #[error("kernel launch failed: {0}")]
48 LaunchFailed(String),
49
50 #[error("kernel compilation failed: {0}")]
52 CompilationError(String),
53
54 #[error("queue full: capacity {capacity}, attempted to enqueue message")]
57 QueueFull {
58 capacity: usize,
60 },
61
62 #[error("queue empty")]
64 QueueEmpty,
65
66 #[error("serialization error: {0}")]
68 SerializationError(String),
69
70 #[error("deserialization error: {0}")]
72 DeserializationError(String),
73
74 #[error("message validation failed: {0}")]
76 ValidationError(String),
77
78 #[error("message too large: {size} bytes (max: {max} bytes)")]
80 MessageTooLarge {
81 size: usize,
83 max: usize,
85 },
86
87 #[error("message timeout after {0:?}")]
89 Timeout(std::time::Duration),
90
91 #[error("GPU memory allocation failed: {size} bytes - {reason}")]
94 AllocationFailed {
95 size: usize,
97 reason: String,
99 },
100
101 #[error("host memory allocation failed: {size} bytes")]
103 HostAllocationFailed {
104 size: usize,
106 },
107
108 #[error("memory transfer failed: {0}")]
110 TransferFailed(String),
111
112 #[error("invalid alignment: expected {expected}, got {actual}")]
114 InvalidAlignment {
115 expected: usize,
117 actual: usize,
119 },
120
121 #[error("out of GPU memory: requested {requested} bytes, available {available} bytes")]
123 OutOfMemory {
124 requested: usize,
126 available: usize,
128 },
129
130 #[error("memory pool exhausted")]
132 PoolExhausted,
133
134 #[error("invalid index: {0}")]
136 InvalidIndex(usize),
137
138 #[error("memory error: {0}")]
140 MemoryError(String),
141
142 #[error("backend not available: {0}")]
145 BackendUnavailable(String),
146
147 #[error("backend initialization failed: {0}")]
149 BackendInitFailed(String),
150
151 #[error("no GPU device found")]
153 NoDeviceFound,
154
155 #[error("device selection failed: {0}")]
157 DeviceSelectionFailed(String),
158
159 #[error("backend error: {0}")]
161 BackendError(String),
162
163 #[error("deadlock detected")]
166 DeadlockDetected,
167
168 #[error("lock poisoned")]
170 LockPoisoned,
171
172 #[error("channel closed")]
174 ChannelClosed,
175
176 #[error("clock skew too large: {skew_ms}ms (max: {max_ms}ms)")]
179 ClockSkew {
180 skew_ms: u64,
182 max_ms: u64,
184 },
185
186 #[error("invalid timestamp")]
188 InvalidTimestamp,
189
190 #[error("K2K error: {0}")]
193 K2KError(String),
194
195 #[error("K2K destination not found: {0}")]
197 K2KDestinationNotFound(String),
198
199 #[error("K2K delivery failed: {0}")]
201 K2KDeliveryFailed(String),
202
203 #[error("pub/sub error: {0}")]
206 PubSubError(String),
207
208 #[error("topic not found: {0}")]
210 TopicNotFound(String),
211
212 #[error("subscription error: {0}")]
214 SubscriptionError(String),
215
216 #[error("multi-GPU error: {0}")]
219 MultiGpuError(String),
220
221 #[error("device not available: {0}")]
223 DeviceNotAvailable(String),
224
225 #[error("cross-device transfer failed: {0}")]
227 CrossDeviceTransferFailed(String),
228
229 #[error("telemetry error: {0}")]
232 TelemetryError(String),
233
234 #[error("metrics collection failed: {0}")]
236 MetricsCollectionFailed(String),
237
238 #[error("invalid configuration: {0}")]
241 InvalidConfig(String),
242
243 #[error("missing configuration: {0}")]
245 MissingConfig(String),
246
247 #[error("I/O error: {0}")]
250 IoError(#[from] std::io::Error),
251
252 #[error("internal error: {0}")]
255 Internal(String),
256
257 #[error("feature not supported: {0}")]
259 NotSupported(String),
260
261 #[error("operation cancelled")]
263 Cancelled,
264}
265
266impl RingKernelError {
267 pub fn is_recoverable(&self) -> bool {
269 matches!(
270 self,
271 RingKernelError::QueueFull { .. }
272 | RingKernelError::QueueEmpty
273 | RingKernelError::Timeout(_)
274 | RingKernelError::PoolExhausted
275 )
276 }
277
278 pub fn is_resource_error(&self) -> bool {
280 matches!(
281 self,
282 RingKernelError::AllocationFailed { .. }
283 | RingKernelError::HostAllocationFailed { .. }
284 | RingKernelError::OutOfMemory { .. }
285 | RingKernelError::PoolExhausted
286 )
287 }
288
289 pub fn is_fatal(&self) -> bool {
291 matches!(
292 self,
293 RingKernelError::BackendInitFailed(_)
294 | RingKernelError::NoDeviceFound
295 | RingKernelError::LockPoisoned
296 | RingKernelError::Internal(_)
297 )
298 }
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304
305 #[test]
306 fn test_error_display() {
307 let err = RingKernelError::KernelNotFound("test_kernel".to_string());
308 assert_eq!(format!("{}", err), "kernel not found: test_kernel");
309
310 let err = RingKernelError::QueueFull { capacity: 1024 };
311 assert!(format!("{}", err).contains("1024"));
312 }
313
314 #[test]
315 fn test_error_classification() {
316 assert!(RingKernelError::QueueFull { capacity: 1024 }.is_recoverable());
317 assert!(RingKernelError::OutOfMemory {
318 requested: 1000,
319 available: 100
320 }
321 .is_resource_error());
322 assert!(RingKernelError::LockPoisoned.is_fatal());
323 }
324}