Testing and Benchmarking
This module covers implementing comprehensive tests and performance benchmarks for DotCompute contributions.
Test Organization
Directory Structure
tests/
├── Unit/ # Component tests
│ ├── DotCompute.Abstractions.Tests/
│ ├── DotCompute.Memory.Tests/
│ └── DotCompute.Generators.Tests/
├── Integration/ # Cross-component tests
│ └── DotCompute.Integration.Tests/
├── Hardware/ # GPU-specific tests
│ ├── DotCompute.Hardware.Cuda.Tests/
│ └── DotCompute.Hardware.Metal.Tests/
└── Shared/ # Test utilities
└── DotCompute.TestUtilities/
Test Categories
// Unit test - no hardware required
[Fact]
[Trait("Category", "Unit")]
public void KernelDefinition_ValidatesParameters()
{
// ...
}
// Integration test - may need backends
[Fact]
[Trait("Category", "Integration")]
public async Task ComputeService_ExecutesKernel()
{
// ...
}
// Hardware test - requires specific GPU
[SkippableFact]
[Trait("Category", "Hardware")]
[Trait("Backend", "CUDA")]
public async Task CudaBackend_ExecutesKernel()
{
Skip.IfNot(CudaBackend.IsAvailable(), "CUDA not available");
// ...
}
Unit Testing
Testing Abstractions
public class KernelDefinitionTests
{
[Fact]
public void Constructor_SetsProperties()
{
var definition = new KernelDefinition
{
Name = "TestKernel",
Parameters = new[]
{
new ParameterDefinition("input", typeof(float[]), ParameterDirection.Input),
new ParameterDefinition("output", typeof(float[]), ParameterDirection.Output)
}
};
Assert.Equal("TestKernel", definition.Name);
Assert.Equal(2, definition.Parameters.Length);
Assert.Equal(ParameterDirection.Input, definition.Parameters[0].Direction);
}
[Theory]
[InlineData(null)]
[InlineData("")]
[InlineData(" ")]
public void Constructor_ThrowsForInvalidName(string? name)
{
Assert.Throws<ArgumentException>(() => new KernelDefinition { Name = name! });
}
}
Testing Memory Management
public class MemoryPoolTests
{
[Fact]
public void Rent_ReturnsBuffer()
{
var pool = new MemoryPool(poolSize: 1024 * 1024);
var buffer = pool.Rent<float>(1000);
Assert.NotNull(buffer);
Assert.Equal(1000, buffer.Length);
}
[Fact]
public void Return_AddsToPool()
{
var pool = new MemoryPool(poolSize: 1024 * 1024);
var buffer = pool.Rent<float>(1000);
pool.Return(buffer);
var stats = pool.GetStatistics();
Assert.Equal(1, stats.PooledBuffers);
}
[Fact]
public void Rent_ReusesReturnedBuffer()
{
var pool = new MemoryPool(poolSize: 1024 * 1024);
var buffer1 = pool.Rent<float>(1000);
pool.Return(buffer1);
var buffer2 = pool.Rent<float>(1000);
Assert.Same(buffer1, buffer2); // Same instance reused
}
}
Testing Source Generators
public class KernelGeneratorTests
{
[Fact]
public void Generator_ProducesValidCode()
{
var source = @"
using DotCompute.Generators.Kernel.Attributes;
namespace Test;
public static partial class Kernels
{
[Kernel]
public static void Add(ReadOnlySpan<float> a, ReadOnlySpan<float> b, Span<float> result)
{
int idx = Kernel.ThreadId.X;
if (idx < result.Length) result[idx] = a[idx] + b[idx];
}
}";
var result = RunGenerator(source);
// No errors
Assert.Empty(result.Diagnostics.Where(d => d.Severity == DiagnosticSeverity.Error));
// Generated source exists
var generated = result.GeneratedSources
.FirstOrDefault(s => s.HintName.Contains("Add"));
Assert.NotNull(generated);
// Contains expected elements
var code = generated!.SourceText.ToString();
Assert.Contains("AddDefinition", code);
Assert.Contains("KernelDefinition", code);
}
private static GeneratorDriverRunResult RunGenerator(string source)
{
var syntaxTree = CSharpSyntaxTree.ParseText(source);
var compilation = CSharpCompilation.Create("Test",
new[] { syntaxTree },
GetReferences(),
new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary));
var generator = new KernelSourceGenerator();
return CSharpGeneratorDriver.Create(generator)
.RunGenerators(compilation)
.GetRunResult();
}
}
Integration Testing
Testing Compute Service
public class ComputeServiceIntegrationTests
{
private readonly IComputeOrchestrator _orchestrator;
public ComputeServiceIntegrationTests()
{
var services = new ServiceCollection();
services.AddDotComputeRuntime();
var provider = services.BuildServiceProvider();
_orchestrator = provider.GetRequiredService<IComputeOrchestrator>();
}
[Fact]
public void GetAvailableBackends_ReturnsAtLeastOne()
{
var backends = _orchestrator.GetAvailableBackends();
Assert.NotEmpty(backends);
Assert.Contains(backends, b => b.Type == BackendType.CPU);
}
[Fact]
public async Task ExecuteKernel_CompletesSuccessfully()
{
using var buffer = _orchestrator.CreateBuffer<float>(100);
var data = new float[100];
Array.Fill(data, 1.0f);
await buffer.CopyFromAsync(data);
await _orchestrator.ExecuteKernelAsync(
TestKernels.DoubleValues,
new KernelConfig { BlockSize = 32, GridSize = 4 },
buffer);
await buffer.CopyToAsync(data);
Assert.All(data, d => Assert.Equal(2.0f, d));
}
}
Testing Ring Kernels
public class RingKernelIntegrationTests
{
[Fact]
public async Task RingKernel_ProcessesMessages()
{
var services = new ServiceCollection();
services.AddDotComputeRuntime();
var provider = services.BuildServiceProvider();
var ringService = provider.GetRequiredService<IRingKernelService>();
// Launch ring kernel
var kernel = await ringService.LaunchAsync<TestRequest, TestResponse>(
EchoKernel.Process,
new RingKernelLaunchOptions { QueueCapacity = 64 });
try
{
// Send messages
for (int i = 0; i < 10; i++)
{
await kernel.SendAsync(new TestRequest { Value = i });
}
// Receive responses
var responses = new List<TestResponse>();
for (int i = 0; i < 10; i++)
{
var response = await kernel.ReceiveAsync(TimeSpan.FromSeconds(5));
responses.Add(response);
}
Assert.Equal(10, responses.Count);
Assert.All(responses, r => Assert.True(r.Processed));
}
finally
{
await kernel.TerminateAsync();
}
}
}
Hardware Testing
Skippable Tests
public class CudaHardwareTests
{
[SkippableFact]
[Trait("Category", "Hardware")]
public async Task CudaKernel_ExecutesOnGpu()
{
Skip.IfNot(CudaBackend.IsAvailable(), "CUDA not available");
var services = new ServiceCollection();
services.AddDotComputeRuntime();
var provider = services.BuildServiceProvider();
var orchestrator = provider.GetRequiredService<IComputeOrchestrator>();
Assert.Equal(BackendType.CUDA, service.ActiveBackend.Type);
// Test GPU execution
using var buffer = service.CreateBuffer<float>(1000);
await service.ExecuteKernelAsync(
TestKernels.VectorAdd,
new KernelConfig { BlockSize = 256, GridSize = 4 },
buffer, buffer, buffer);
await service.SynchronizeAsync();
}
[SkippableTheory]
[Trait("Category", "Hardware")]
[InlineData(1000)]
[InlineData(100000)]
[InlineData(10000000)]
public async Task CudaKernel_HandlesVariousSizes(int size)
{
Skip.IfNot(CudaBackend.IsAvailable(), "CUDA not available");
var service = GetCudaService();
using var buffer = service.CreateBuffer<float>(size);
var config = new KernelConfig
{
BlockSize = 256,
GridSize = (size + 255) / 256
};
await service.ExecuteKernelAsync(TestKernels.Identity, config, buffer);
}
}
GPU Memory Tests
public class GpuMemoryTests
{
[SkippableFact]
[Trait("Category", "Hardware")]
public async Task LargeAllocation_Succeeds()
{
Skip.IfNot(CudaBackend.IsAvailable(), "CUDA not available");
var service = GetCudaService();
var deviceInfo = service.ActiveBackend.DeviceInfo;
// Allocate 50% of GPU memory
var sizeBytes = deviceInfo.TotalMemory / 2;
var elements = (int)(sizeBytes / sizeof(float));
using var buffer = service.CreateBuffer<float>(elements);
Assert.Equal(elements, buffer.Length);
}
[SkippableFact]
[Trait("Category", "Hardware")]
public void OutOfMemory_ThrowsException()
{
Skip.IfNot(CudaBackend.IsAvailable(), "CUDA not available");
var service = GetCudaService();
// Try to allocate more than GPU memory
Assert.Throws<OutOfMemoryException>(() =>
service.CreateBuffer<float>(int.MaxValue / 2));
}
}
Benchmarking
BenchmarkDotNet Setup
[SimpleJob(RuntimeMoniker.Net90)]
[MemoryDiagnoser]
[RankColumn]
public class KernelBenchmarks
{
private IComputeOrchestrator _orchestrator = null!;
private float[] _buffer = null!;
[Params(1024, 1024 * 1024, 10 * 1024 * 1024)]
public int Size { get; set; }
[GlobalSetup]
public void Setup()
{
var services = new ServiceCollection();
services.AddDotComputeRuntime();
var provider = services.BuildServiceProvider();
_orchestrator = provider.GetRequiredService<IComputeOrchestrator>();
_buffer = new float[Size];
}
[GlobalCleanup]
public void Cleanup()
{
_buffer.Dispose();
}
[Benchmark(Baseline = true)]
public async Task VectorAdd()
{
await _orchestrator.ExecuteKernelAsync(
TestKernels.VectorAdd,
new KernelConfig { BlockSize = 256, GridSize = (Size + 255) / 256 },
_buffer, _buffer, _buffer);
await _orchestrator.SynchronizeAsync();
}
[Benchmark]
public async Task VectorMultiply()
{
await _orchestrator.ExecuteKernelAsync(
TestKernels.VectorMultiply,
new KernelConfig { BlockSize = 256, GridSize = (Size + 255) / 256 },
_buffer, _buffer, _buffer);
await _orchestrator.SynchronizeAsync();
}
}
Running Benchmarks
# Run all benchmarks
dotnet run -c Release --project benchmarks/DotCompute.Benchmarks
# Run specific benchmark
dotnet run -c Release -- --filter "*VectorAdd*"
# Export results
dotnet run -c Release -- --exporters json html
Custom Performance Tests
public class PerformanceTests
{
[Fact]
[Trait("Category", "Performance")]
public async Task KernelLaunch_UnderTargetLatency()
{
var service = GetService();
using var buffer = service.CreateBuffer<float>(1000);
// Warmup
for (int i = 0; i < 100; i++)
{
await service.ExecuteKernelAsync(TestKernels.Identity, config, buffer);
}
await service.SynchronizeAsync();
// Measure
var sw = Stopwatch.StartNew();
const int iterations = 1000;
for (int i = 0; i < iterations; i++)
{
await service.ExecuteKernelAsync(TestKernels.Identity, config, buffer);
}
await service.SynchronizeAsync();
var avgMs = sw.ElapsedMilliseconds / (double)iterations;
// Assert under target (e.g., 0.1ms per launch)
Assert.True(avgMs < 0.1, $"Average launch time {avgMs:F3}ms exceeds target");
}
[Fact]
[Trait("Category", "Performance")]
public async Task MemoryTransfer_MeetsBandwidthTarget()
{
var service = GetService();
const int sizeBytes = 100 * 1024 * 1024; // 100 MB
var elements = sizeBytes / sizeof(float);
var data = new float[elements];
using var buffer = service.CreateBuffer<float>(elements);
var sw = Stopwatch.StartNew();
await buffer.CopyFromAsync(data);
sw.Stop();
var bandwidthGBps = (sizeBytes / 1e9) / (sw.Elapsed.TotalSeconds);
// Assert meets minimum bandwidth (e.g., 5 GB/s)
Assert.True(bandwidthGBps > 5.0,
$"Transfer bandwidth {bandwidthGBps:F1} GB/s below target");
}
}
Test Utilities
Test Fixtures
public class ComputeServiceFixture : IAsyncLifetime
{
public IServiceProvider Provider { get; private set; } = null!;
public IComputeOrchestrator Orchestrator { get; private set; } = null!;
public async Task InitializeAsync()
{
var services = new ServiceCollection();
services.AddDotComputeRuntime();
services.AddLogging(b => b.AddConsole().SetMinimumLevel(LogLevel.Debug));
Provider = services.BuildServiceProvider();
Orchestrator = Provider.GetRequiredService<IComputeOrchestrator>();
// Warmup
using var buffer = ComputeService.CreateBuffer<float>(100);
await ComputeService.ExecuteKernelAsync(
TestKernels.Identity,
new KernelConfig { BlockSize = 32, GridSize = 4 },
buffer);
}
public Task DisposeAsync()
{
if (Provider is IDisposable disposable)
disposable.Dispose();
return Task.CompletedTask;
}
}
public class ComputeTests : IClassFixture<ComputeServiceFixture>
{
private readonly ComputeServiceFixture _fixture;
public ComputeTests(ComputeServiceFixture fixture)
{
_fixture = fixture;
}
[Fact]
public async Task Test()
{
var service = _fixture.ComputeService;
// ...
}
}
Test Helpers
public static class TestHelpers
{
public static float[] GenerateRandomData(int size, int seed = 42)
{
var random = new Random(seed);
return Enumerable.Range(0, size)
.Select(_ => (float)random.NextDouble())
.ToArray();
}
public static void AssertArraysEqual(float[] expected, float[] actual, float tolerance = 1e-5f)
{
Assert.Equal(expected.Length, actual.Length);
for (int i = 0; i < expected.Length; i++)
{
Assert.True(
Math.Abs(expected[i] - actual[i]) < tolerance,
$"Arrays differ at index {i}: expected {expected[i]}, got {actual[i]}");
}
}
}
Exercises
Exercise 1: Unit Test Suite
Write comprehensive unit tests for a new kernel attribute.
Exercise 2: Hardware Test
Create a hardware test that validates correct execution on GPU.
Exercise 3: Performance Benchmark
Create a benchmark comparing two kernel implementations.
Key Takeaways
- Categorize tests for selective execution
- Use SkippableFact for hardware-dependent tests
- Test edge cases - empty arrays, large sizes, invalid inputs
- Benchmark with warmup for accurate measurements
- Use fixtures to share expensive setup
Path Complete
Congratulations! You've completed the Contributor Learning Path.
What you learned:
- DotCompute's four-layer architecture
- Building source generators
- Creating Roslyn analyzers
- Implementing comprehensive tests
Next steps:
- Review open issues on GitHub
- Join discussions on design decisions
- Submit your first PR!
Further Reading
- CLAUDE.md - Development guidelines
- Benchmarking Guide - Detailed methodology
- Architecture Overview - System design