voiceforge-universal / deploy /monitoring /prometheus-rules.yaml
creator-o1
Initial commit: Complete VoiceForge Enterprise Speech AI Platform
d00203b
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: voiceforge-backend
namespace: monitoring
labels:
app: voiceforge-backend
release: prometheus
spec:
selector:
matchLabels:
app: voiceforge-backend
namespaceSelector:
matchNames:
- voiceforge
endpoints:
- port: http
path: /metrics
interval: 30s
scrapeTimeout: 10s
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: voiceforge-alerts
namespace: monitoring
labels:
app: voiceforge
release: prometheus
spec:
groups:
- name: voiceforge.rules
rules:
- alert: HighErrorRate
expr: sum(rate(http_requests_total{app="voiceforge-backend",status=~"5.."}[5m])) / sum(rate(http_requests_total{app="voiceforge-backend"}[5m])) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate on VoiceForge API"
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
- alert: HighLatency
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{app="voiceforge-backend"}[5m])) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "High latency on VoiceForge API"
description: "p95 latency is {{ $value }}s (threshold: 2s)"
- alert: PodRestarts
expr: increase(kube_pod_container_status_restarts_total{namespace="voiceforge"}[1h]) > 3
for: 5m
labels:
severity: warning
annotations:
summary: "Pod restarts detected"
description: "Pod {{ $labels.pod }} has restarted {{ $value }} times in the last hour"
- alert: HighCPU
expr: avg(container_cpu_usage_seconds_total{pod=~"voiceforge-.*"}) > 0.8
for: 10m
labels:
severity: warning
annotations:
summary: "High CPU usage on VoiceForge pods"
description: "Average CPU usage is {{ $value | humanizePercentage }}"
- alert: HighMemory
expr: avg(container_memory_usage_bytes{pod=~"voiceforge-.*"}) / avg(container_spec_memory_limit_bytes{pod=~"voiceforge-.*"}) > 0.9
for: 10m
labels:
severity: critical
annotations:
summary: "High memory usage on VoiceForge pods"
description: "Memory usage is at {{ $value | humanizePercentage }}"