Crate Structure

Workspace Organization

RustCompute/
├── Cargo.toml                    # Workspace manifest
├── README.md
├── CLAUDE.md                     # AI assistant guidance
├── LICENSE-MIT / LICENSE-APACHE
│
├── crates/
│   ├── ringkernel/               # Main facade crate (re-exports)
│   │   ├── Cargo.toml            # Dependencies + 11 examples
│   │   └── src/lib.rs
│   │
│   ├── ringkernel-core/          # Core traits and types
│   │   ├── Cargo.toml
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── message.rs        # RingMessage trait, priority constants
│   │       ├── queue.rs          # MessageQueue trait
│   │       ├── runtime.rs        # RingKernel, KernelHandle, LaunchOptions
│   │       ├── context.rs        # RingContext struct
│   │       ├── control.rs        # ControlBlock struct
│   │       ├── telemetry.rs      # TelemetryBuffer, MetricsCollector
│   │       ├── pubsub.rs         # PubSubBroker, Topic wildcards
│   │       ├── hlc.rs            # HlcTimestamp, HlcClock
│   │       └── error.rs          # Error types
│   │
│   ├── ringkernel-derive/        # Proc macros (in development)
│   │   └── src/lib.rs
│   │
│   ├── ringkernel-cpu/           # CPU backend (working)
│   │   └── src/
│   │       ├── lib.rs
│   │       └── runtime.rs
│   │
│   ├── ringkernel-cuda/          # CUDA backend (working)
│   │   ├── Cargo.toml
│   │   ├── src/
│   │   │   ├── lib.rs
│   │   │   ├── runtime.rs        # CudaRuntime implementation
│   │   │   └── ptx.rs            # PTX template for persistent kernels
│   │   └── tests/
│   │       └── gpu_execution_verify.rs  # GPU execution verification
│   │
│   ├── ringkernel-metal/         # Metal backend (scaffolded)
│   │   └── src/lib.rs
│   │
│   ├── ringkernel-wgpu/          # WebGPU backend (scaffolded)
│   │   └── src/lib.rs
│   │
│   ├── ringkernel-codegen/       # Kernel code generation
│   │   └── src/lib.rs
│   │
│   ├── ringkernel-cuda-codegen/  # Rust-to-CUDA transpiler
│   │   └── src/
│   │       ├── lib.rs            # Public API (3 kernel types)
│   │       ├── transpiler.rs     # Core transpilation engine
│   │       ├── intrinsics.rs     # GPU intrinsic mappings (40+)
│   │       ├── stencil.rs        # Stencil kernel support
│   │       ├── types.rs          # Type mapping (Rust → CUDA)
│   │       ├── dsl.rs            # DSL functions (block_idx_x, etc.)
│   │       ├── validation.rs     # Code validation with modes
│   │       ├── loops.rs          # Loop transpilation (for/while/loop)
│   │       ├── shared.rs         # Shared memory (__shared__)
│   │       ├── ring_kernel.rs    # Ring kernel generation
│   │       └── handler.rs        # Handler function integration
│   │
│   ├── ringkernel-ecosystem/     # Integration utilities
│   │   └── src/lib.rs
│   │
│   ├── ringkernel-audio-fft/     # Example: GPU audio processing
│   │   └── src/lib.rs
│   │
│   ├── ringkernel-wavesim/       # Example: 2D wave simulation
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── simulation/       # Grid, kernels, backends
│   │       └── gui/              # Interactive visualization
│   │
│   ├── ringkernel-wavesim3d/     # Showcase: 3D wave simulation
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── simulation/       # 3D FDTD, actor backend, physics
│   │       ├── audio/            # Binaural audio, sources, virtual head
│   │       ├── visualization/    # Volume renderer, slices, camera
│   │       └── gui/              # Controls panel
│   │
│   ├── ringkernel-txmon/         # Showcase: Transaction monitoring
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── types/            # Transaction, CustomerProfile, Alert
│   │       ├── factory/          # Transaction generator
│   │       ├── monitoring/       # Rule engine
│   │       ├── gui/              # Real-time fraud detection UI
│   │       ├── cuda/             # GPU backends (batch, ring, stencil)
│   │       └── bin/
│   │           ├── txmon.rs      # Main GUI binary
│   │           └── benchmark.rs  # GPU benchmark
│   │
│   ├── ringkernel-accnet/        # Showcase: Accounting network analytics
│   │   └── src/
│   │       ├── lib.rs
│   │       └── ...               # Network analysis, fraud detection
│   │
│   └── ringkernel-procint/       # Showcase: Process intelligence
│       └── src/
│           ├── lib.rs
│           └── ...               # DFG mining, pattern detection
│
├── examples/                     # 20+ working examples
│   ├── basic/
│   │   ├── hello_kernel.rs       # Runtime, lifecycle, suspend/resume
│   │   ├── kernel_states.rs      # State machine, multi-kernel
│   │   └── wgpu_hello.rs         # WebGPU backend
│   ├── messaging/
│   │   ├── request_response.rs   # Correlation IDs, priorities
│   │   ├── pub_sub.rs            # Topic wildcards, QoS
│   │   └── kernel_to_kernel.rs   # K2K direct messaging
│   ├── cuda-codegen/             # CUDA code generation examples
│   │   ├── global_kernel.rs      # SAXPY, halo exchange, array init
│   │   ├── stencil_kernel.rs     # FDTD wave, heat diffusion, GridPos
│   │   └── ring_kernel.rs        # Persistent kernels, HLC, K2K
│   ├── web-api/
│   │   └── axum_api.rs           # REST API integration
│   ├── data-processing/
│   │   └── batch_processor.rs    # Data pipelines
│   ├── monitoring/
│   │   └── telemetry.rs          # Metrics, alerts
│   ├── ecosystem/
│   │   ├── grpc_server.rs        # gRPC patterns
│   │   ├── config_management.rs  # TOML, env vars
│   │   └── ml_pipeline.rs        # ML inference
│   ├── macros/
│   │   └── derive_example.rs     # RingMessage derive macro
│   └── advanced/
│       ├── multi_gpu.rs          # Load balancing
│       └── educational_modes.rs  # WaveSim parallel computing modes
│
├── docs/                         # Architecture documentation
│   ├── 01-architecture-overview.md
│   ├── 02-crate-structure.md
│   └── ...
│
└── benches/
    └── serialization.rs

Crate Dependencies

                    ┌──────────────────┐
                    │   ringkernel     │  (facade - re-exports all)
                    └────────┬─────────┘
                             │
           ┌─────────────────┼─────────────────┐
           │                 │                 │
           ▼                 ▼                 ▼
    ┌─────────────┐  ┌─────────────┐  ┌─────────────┐
    │ ringkernel- │  │ ringkernel- │  │ ringkernel- │
    │    cpu      │  │    cuda     │  │  ecosystem  │
    │  (working)  │  │  (working)  │  │             │
    └──────┬──────┘  └──────┬──────┘  └──────┬──────┘
           │                │                 │
           └────────────────┼─────────────────┘
                            │
                            ▼
                    ┌──────────────────┐
                    │ ringkernel-core  │
                    │ (traits, types)  │
                    └────────┬─────────┘
                             │
              ┌──────────────┼──────────────┐
              │              │              │
              ▼              ▼              ▼
       ┌───────────┐  ┌───────────┐  ┌───────────┐
       │ ringkernel│  │ ringkernel│  │   rkyv    │
       │  -derive  │  │ -codegen  │  │ (zero-copy│
       │ (working) │  │           │  │  serde)   │
       └───────────┘  └───────────┘  └───────────┘
                            │
                            ▼
                    ┌───────────────┐
                    │ ringkernel-   │
                    │ cuda-codegen  │
                    │ (transpiler)  │
                    └───────┬───────┘
                            │
                            ▼
                    ┌───────────────┐
                    │ ringkernel-   │
                    │    txmon      │
                    │ (showcase)    │
                    └───────────────┘

Crate Descriptions

Crate Status Description
ringkernel Working Main facade, re-exports everything
ringkernel-core Working Core traits: RingKernel, KernelHandle, HLC, PubSub, K2K
ringkernel-cpu Working CPU backend for development/testing
ringkernel-cuda Working NVIDIA CUDA backend with PTX kernels
ringkernel-metal Scaffolded Apple Metal backend (API defined)
ringkernel-wgpu Working WebGPU cross-platform backend
ringkernel-derive Working Proc macros for message/kernel definitions
ringkernel-codegen In Development GPU kernel code generation
ringkernel-cuda-codegen Working Rust-to-CUDA transpiler for GPU kernels
ringkernel-wgpu-codegen Working Rust-to-WGSL transpiler for GPU kernels
ringkernel-ecosystem Working Integration utilities
ringkernel-audio-fft Working Example: GPU audio FFT processing
ringkernel-wavesim Working Example: 2D wave simulation with FDTD
ringkernel-wavesim3d Working Showcase: 3D wave simulation with binaural audio
ringkernel-txmon Working Showcase: GPU-accelerated transaction monitoring
ringkernel-accnet Working Showcase: Accounting network analytics
ringkernel-procint Working Showcase: Process intelligence with DFG mining

Cargo.toml (Workspace Root)

[workspace]
resolver = "2"
members = [
    "crates/ringkernel",
    "crates/ringkernel-core",
    "crates/ringkernel-derive",
    "crates/ringkernel-cuda",
    "crates/ringkernel-metal",
    "crates/ringkernel-wgpu",
    "crates/ringkernel-cpu",
    "crates/ringkernel-codegen",
]

[workspace.package]
version = "0.1.2"
edition = "2021"
rust-version = "1.75"
license = "MIT OR Apache-2.0"
repository = "https://github.com/example/ringkernel"
keywords = ["gpu", "cuda", "actor", "hpc", "compute"]
categories = ["concurrency", "asynchronous", "science"]

[workspace.dependencies]
# Async runtime
tokio = { version = "1.35", features = ["rt-multi-thread", "sync", "macros"] }
async-trait = "0.1"
futures = "0.3"

# Serialization (zero-copy)
rkyv = { version = "0.7", features = ["validation", "strict"] }
zerocopy = { version = "0.7", features = ["derive"] }
bytemuck = { version = "1.14", features = ["derive"] }

# Error handling
thiserror = "1.0"
anyhow = "1.0"

# Logging
tracing = "0.1"
tracing-subscriber = "0.3"

# GPU backends
cudarc = { version = "0.10", optional = true }         # CUDA
metal = { version = "0.27", optional = true }          # Metal
wgpu = { version = "0.19", optional = true }           # WebGPU
ash = { version = "0.37", optional = true }            # Vulkan

# Proc macros
syn = { version = "2.0", features = ["full", "parsing"] }
quote = "1.0"
proc-macro2 = "1.0"

# Testing
criterion = "0.5"
proptest = "1.4"

Feature Flags

# crates/ringkernel/Cargo.toml
[features]
default = ["cpu"]

# Backends (enable as needed)
cpu = ["ringkernel-cpu"]       # Always available
cuda = ["ringkernel-cuda"]     # Requires NVIDIA GPU + CUDA toolkit

# All backends
full = ["cpu", "cuda"]

# Built-in features (always available in ringkernel-core)
# - telemetry: TelemetryPipeline, MetricsCollector
# - pubsub: PubSubBroker, Topic wildcards
# - hlc: HlcTimestamp, HlcClock

Enabling CUDA

[dependencies]
ringkernel = { version = "0.1", features = ["cuda"] }

Requires:

  • NVIDIA GPU with compute capability 3.5+
  • CUDA toolkit installed
  • CUDA_PATH environment variable (or /usr/local/cuda)

Platform-Specific Compilation

# crates/ringkernel-cuda/Cargo.toml
[target.'cfg(any(target_os = "linux", target_os = "windows"))'.dependencies]
cudarc = { workspace = true }

# crates/ringkernel-metal/Cargo.toml
[target.'cfg(any(target_os = "macos", target_os = "ios"))'.dependencies]
metal = { workspace = true }

Build Scripts

CUDA Backend (build.rs)

// crates/ringkernel-cuda/build.rs
fn main() {
    println!("cargo:rerun-if-changed=src/kernels/");

    // Find CUDA installation
    let cuda_path = std::env::var("CUDA_PATH")
        .unwrap_or_else(|_| "/usr/local/cuda".to_string());

    println!("cargo:rustc-link-search=native={}/lib64", cuda_path);
    println!("cargo:rustc-link-lib=cuda");
    println!("cargo:rustc-link-lib=nvrtc");

    // Compile PTX at build time (optional)
    #[cfg(feature = "precompile-ptx")]
    compile_ptx_kernels();
}

Next: Core Abstractions