Table of Contents

Class MetalAlertsManager

Namespace
DotCompute.Backends.Metal.Telemetry
Assembly
DotCompute.Backends.Metal.dll

Threshold monitoring and alerting system for Metal backend

public sealed class MetalAlertsManager : IDisposable
Inheritance
MetalAlertsManager
Implements
Inherited Members
Extension Methods

Constructors

MetalAlertsManager(ILogger<MetalAlertsManager>, MetalAlertsOptions)

public MetalAlertsManager(ILogger<MetalAlertsManager> logger, MetalAlertsOptions options)

Parameters

logger ILogger<MetalAlertsManager>
options MetalAlertsOptions

Properties

ActiveAlerts

Gets all currently active alerts

public IReadOnlyList<Alert> ActiveAlerts { get; }

Property Value

IReadOnlyList<Alert>

Methods

CheckErrorRate(MetalError)

Checks for high error rates and potentially triggers an alert

public void CheckErrorRate(MetalError error)

Parameters

error MetalError

CheckHighGpuUtilization(double)

Checks for high GPU utilization and potentially triggers an alert

public void CheckHighGpuUtilization(double utilizationPercentage)

Parameters

utilizationPercentage double

CheckHighMemoryPressure(MemoryPressureLevel, double)

Checks for high memory pressure and potentially triggers an alert

public void CheckHighMemoryPressure(MemoryPressureLevel level, double percentage)

Parameters

level MemoryPressureLevel
percentage double

CheckHighMemoryUtilization(double)

Checks for high memory utilization and potentially triggers an alert

public void CheckHighMemoryUtilization(double utilizationPercentage)

Parameters

utilizationPercentage double

CheckHighResourceUtilization(ResourceType, double)

Checks for high resource utilization and potentially triggers an alert

public void CheckHighResourceUtilization(ResourceType resourceType, double utilizationPercentage)

Parameters

resourceType ResourceType
utilizationPercentage double

CheckKernelExecutionFailure(string, TimeSpan)

Checks for kernel execution failure and potentially triggers an alert

public void CheckKernelExecutionFailure(string kernelName, TimeSpan duration)

Parameters

kernelName string
duration TimeSpan

CheckMemoryAllocationFailure(long)

Checks for memory allocation failure and potentially triggers an alert

public void CheckMemoryAllocationFailure(long sizeBytes)

Parameters

sizeBytes long

CheckSlowOperation(string, TimeSpan)

Checks for slow operations and potentially triggers an alert

public void CheckSlowOperation(string operationName, TimeSpan duration)

Parameters

operationName string
duration TimeSpan

Dispose()

Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.

public void Dispose()

EvaluateActiveAlerts(MetalTelemetrySnapshot)

Evaluates active alerts against current telemetry data

public void EvaluateActiveAlerts(MetalTelemetrySnapshot snapshot)

Parameters

snapshot MetalTelemetrySnapshot