1#![warn(missing_docs)]
33
34#[cfg(feature = "cooperative")]
35pub mod cooperative;
36#[cfg(feature = "cuda")]
37mod device;
38#[cfg(feature = "cuda")]
39pub mod driver_api;
40#[cfg(feature = "cuda")]
41pub mod k2k_gpu;
42#[cfg(feature = "cuda")]
43mod kernel;
44#[cfg(feature = "cuda")]
45mod memory;
46#[cfg(feature = "cuda")]
47pub mod persistent;
48#[cfg(feature = "cuda")]
49mod runtime;
50#[cfg(feature = "cuda")]
51mod stencil;
52
53#[cfg(feature = "cuda")]
54pub use device::CudaDevice;
55#[cfg(feature = "cuda")]
56pub use kernel::CudaKernel;
57#[cfg(feature = "cuda")]
58pub use memory::{CudaBuffer, CudaControlBlock, CudaMemoryPool, CudaMessageQueue};
59#[cfg(feature = "cuda")]
60pub use runtime::CudaRuntime;
61#[cfg(feature = "cuda")]
62pub use stencil::{CompiledStencilKernel, LaunchConfig, StencilKernelLoader};
63
64#[cfg(feature = "cuda")]
66pub mod memory_exports {
67 pub use super::memory::{CudaBuffer, CudaControlBlock, CudaMemoryPool, CudaMessageQueue};
68}
69
70#[cfg(not(feature = "cuda"))]
72mod stub {
73 use async_trait::async_trait;
74 use ringkernel_core::error::{Result, RingKernelError};
75 use ringkernel_core::runtime::{
76 Backend, KernelHandle, KernelId, LaunchOptions, RingKernelRuntime, RuntimeMetrics,
77 };
78
79 pub struct CudaRuntime;
81
82 impl CudaRuntime {
83 pub async fn new() -> Result<Self> {
85 Err(RingKernelError::BackendUnavailable(
86 "CUDA feature not enabled".to_string(),
87 ))
88 }
89 }
90
91 #[async_trait]
92 impl RingKernelRuntime for CudaRuntime {
93 fn backend(&self) -> Backend {
94 Backend::Cuda
95 }
96
97 fn is_backend_available(&self, _backend: Backend) -> bool {
98 false
99 }
100
101 async fn launch(&self, _kernel_id: &str, _options: LaunchOptions) -> Result<KernelHandle> {
102 Err(RingKernelError::BackendUnavailable("CUDA".to_string()))
103 }
104
105 fn get_kernel(&self, _kernel_id: &KernelId) -> Option<KernelHandle> {
106 None
107 }
108
109 fn list_kernels(&self) -> Vec<KernelId> {
110 vec![]
111 }
112
113 fn metrics(&self) -> RuntimeMetrics {
114 RuntimeMetrics::default()
115 }
116
117 async fn shutdown(&self) -> Result<()> {
118 Ok(())
119 }
120 }
121}
122
123#[cfg(not(feature = "cuda"))]
124pub use stub::CudaRuntime;
125
126pub fn is_cuda_available() -> bool {
135 #[cfg(feature = "cuda")]
136 {
137 std::panic::catch_unwind(|| {
139 cudarc::driver::CudaContext::device_count()
140 .map(|c| c > 0)
141 .unwrap_or(false)
142 })
143 .unwrap_or(false)
144 }
145 #[cfg(not(feature = "cuda"))]
146 {
147 false
148 }
149}
150
151pub fn cuda_device_count() -> usize {
155 #[cfg(feature = "cuda")]
156 {
157 std::panic::catch_unwind(|| {
159 cudarc::driver::CudaContext::device_count().unwrap_or(0) as usize
160 })
161 .unwrap_or(0)
162 }
163 #[cfg(not(feature = "cuda"))]
164 {
165 0
166 }
167}
168
169pub const RING_KERNEL_PTX_TEMPLATE: &str = r#"
174.version 8.0
175.target sm_89
176.address_size 64
177
178.visible .entry ring_kernel_main(
179 .param .u64 control_block_ptr,
180 .param .u64 input_queue_ptr,
181 .param .u64 output_queue_ptr,
182 .param .u64 shared_state_ptr
183) {
184 .reg .u64 %cb_ptr;
185 .reg .u32 %one;
186
187 // Load control block pointer
188 ld.param.u64 %cb_ptr, [control_block_ptr];
189
190 // Mark as terminated immediately (offset 8)
191 mov.u32 %one, 1;
192 st.global.u32 [%cb_ptr + 8], %one;
193
194 ret;
195}
196"#;