Spaces:
Sleeping
Sleeping
| apiVersion: monitoring.coreos.com/v1 | |
| kind: ServiceMonitor | |
| metadata: | |
| name: voiceforge-backend | |
| namespace: monitoring | |
| labels: | |
| app: voiceforge-backend | |
| release: prometheus | |
| spec: | |
| selector: | |
| matchLabels: | |
| app: voiceforge-backend | |
| namespaceSelector: | |
| matchNames: | |
| - voiceforge | |
| endpoints: | |
| - port: http | |
| path: /metrics | |
| interval: 30s | |
| scrapeTimeout: 10s | |
| apiVersion: monitoring.coreos.com/v1 | |
| kind: PrometheusRule | |
| metadata: | |
| name: voiceforge-alerts | |
| namespace: monitoring | |
| labels: | |
| app: voiceforge | |
| release: prometheus | |
| spec: | |
| groups: | |
| - name: voiceforge.rules | |
| rules: | |
| - alert: HighErrorRate | |
| expr: sum(rate(http_requests_total{app="voiceforge-backend",status=~"5.."}[5m])) / sum(rate(http_requests_total{app="voiceforge-backend"}[5m])) > 0.05 | |
| for: 5m | |
| labels: | |
| severity: critical | |
| annotations: | |
| summary: "High error rate on VoiceForge API" | |
| description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)" | |
| - alert: HighLatency | |
| expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{app="voiceforge-backend"}[5m])) > 2 | |
| for: 5m | |
| labels: | |
| severity: warning | |
| annotations: | |
| summary: "High latency on VoiceForge API" | |
| description: "p95 latency is {{ $value }}s (threshold: 2s)" | |
| - alert: PodRestarts | |
| expr: increase(kube_pod_container_status_restarts_total{namespace="voiceforge"}[1h]) > 3 | |
| for: 5m | |
| labels: | |
| severity: warning | |
| annotations: | |
| summary: "Pod restarts detected" | |
| description: "Pod {{ $labels.pod }} has restarted {{ $value }} times in the last hour" | |
| - alert: HighCPU | |
| expr: avg(container_cpu_usage_seconds_total{pod=~"voiceforge-.*"}) > 0.8 | |
| for: 10m | |
| labels: | |
| severity: warning | |
| annotations: | |
| summary: "High CPU usage on VoiceForge pods" | |
| description: "Average CPU usage is {{ $value | humanizePercentage }}" | |
| - alert: HighMemory | |
| expr: avg(container_memory_usage_bytes{pod=~"voiceforge-.*"}) / avg(container_spec_memory_limit_bytes{pod=~"voiceforge-.*"}) > 0.9 | |
| for: 10m | |
| labels: | |
| severity: critical | |
| annotations: | |
| summary: "High memory usage on VoiceForge pods" | |
| description: "Memory usage is at {{ $value | humanizePercentage }}" | |