Abeshith's picture
Added Monitoring Stages
b53ee19
groups:
- name: model_performance
interval: 1m
rules:
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate detected"
description: "Error rate is {{ $value }} requests/sec"
- alert: ModelLatencyHigh
expr: histogram_quantile(0.95, rate(prediction_duration_seconds_bucket[5m])) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "Model prediction latency is high"
description: "95th percentile latency is {{ $value }}s"
- alert: DataDriftDetected
expr: data_drift_detected == 1
for: 10m
labels:
severity: warning
annotations:
summary: "Data drift detected in model inputs"
description: "Drift has been detected in feature distributions"