File size: 2,541 Bytes
7c19d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# =============================================================================
# Grafana Dashboard — Platform Overview
# =============================================================================

apiVersion: v1
kind: ConfigMap
metadata:
  name: platform-overview-dashboard
  namespace: monitoring
  labels:
    grafana_dashboard: "1"
data:
  platform-overview.json: |
    {
      "dashboard": {
        "title": "Platform Overview",
        "tags": ["platform", "overview"],
        "panels": [
          {
            "title": "Request Rate (req/s)",
            "type": "timeseries",
            "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
            "targets": [{
              "expr": "sum(rate(http_requests_total[5m])) by (service)",
              "legendFormat": "{{service}}"
            }]
          },
          {
            "title": "Error Rate (%)",
            "type": "timeseries",
            "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
            "targets": [{
              "expr": "sum(rate(http_requests_total{code=~\"5..\"}[5m])) by (service) / sum(rate(http_requests_total[5m])) by (service) * 100",
              "legendFormat": "{{service}}"
            }]
          },
          {
            "title": "P95 Latency",
            "type": "timeseries",
            "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
            "targets": [{
              "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, service))",
              "legendFormat": "{{service}}"
            }]
          },
          {
            "title": "Pod Status",
            "type": "stat",
            "gridPos": {"h": 8, "w": 6, "x": 12, "y": 8},
            "targets": [{
              "expr": "sum(kube_pod_status_phase) by (phase)",
              "legendFormat": "{{phase}}"
            }]
          },
          {
            "title": "CPU Usage by Namespace",
            "type": "timeseries",
            "gridPos": {"h": 8, "w": 6, "x": 18, "y": 8},
            "targets": [{
              "expr": "sum(rate(container_cpu_usage_seconds_total[5m])) by (namespace)",
              "legendFormat": "{{namespace}}"
            }]
          },
          {
            "title": "Security Alerts",
            "type": "alertlist",
            "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
            "options": {
              "show": "current"
            },
            "targets": [{
              "expr": "ALERTS{team=\"security\"}"
            }]
          }
        ]
      }
    }