File size: 1,186 Bytes
a0e85b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
groups:
- name: hopcroft_alerts
rules:
- alert: ServiceDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Service {{ $labels.instance }} is down"
description: "The job {{ $labels.job }} has been down for more than 1 minute."
- alert: HighErrorRate
expr: |
sum(rate(hopcroft_requests_total{http_status=~"5.."}[5m]))
/
sum(rate(hopcroft_requests_total[5m])) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "High error rate on {{ $labels.instance }}"
description: "Error rate is above 10% for the last 5 minutes (current value: {{ $value | printf \"%.2f\" }})."
- alert: SlowRequests
expr: histogram_quantile(0.95, sum by (le, endpoint) (rate(hopcroft_request_duration_seconds_bucket[5m]))) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "Slow requests on {{ $labels.endpoint }}"
description: "95th percentile of request latency is above 2s (current value: {{ $value | printf \"%.2f\" }}s)."
|