File size: 2,695 Bytes
9ec4919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
{
  "name": "Deploy verifier",
  "objective": "Watch a rollout after deployment, compare live signals against release expectations, and escalate quickly when anomalies appear.",
  "trigger": {
    "type": "event",
    "cadence_or_event": "Deployment starts, canary advances, feature flag flips, or release tag is created; then poll every 5-15 minutes during the rollout window."
  },
  "intake": {
    "sources": ["release notes", "dashboards", "logs", "traces", "synthetic checks", "feature flag state"],
    "selection_rule": "Compare only signals named in the release expectations and rollback criteria against the pre-rollout baseline."
  },
  "workspace": {
    "isolation": "Read-only access to observability systems; no production mutation surface.",
    "allowed_actions": ["read metrics, logs, and traces", "write rollout status updates", "create issues"],
    "disallowed_actions": ["rollback", "config change", "traffic shifting", "database action", "incident declaration"]
  },
  "context": {
    "required_files": ["release notes", "rollout checklist"],
    "runtime_sources": ["baseline metric window", "current dashboards", "synthetic check results", "feature flag state"]
  },
  "agents": [
    {
      "role": "Observer",
      "responsibility": "Gather metrics, logs, traces, and check status on the polling cadence."
    },
    {
      "role": "Comparator",
      "responsibility": "Compare observed signals against release expectations, thresholds, and baseline."
    },
    {
      "role": "Reporter",
      "responsibility": "Write concise rollout status: stable, degraded, blocked, or unknown."
    }
  ],
  "verification": {
    "gates": ["synthetic checks pass", "error rate, latency, and saturation stay within thresholds", "no new dominant failure mode in logs or traces", "deployment phase matches expectations"],
    "receipts": ["dashboards checked", "baseline window", "anomalies with evidence links", "status decisions with timestamps"]
  },
  "state": {
    "artifacts": ["rollout status report", "checked-signal log"],
    "update_rule": "Record release version, rollout phase, signals checked, anomalies, and decisions after every polling interval."
  },
  "budget": {
    "max_retries": 2,
    "max_runtime_minutes": 120
  },
  "escalation": {
    "conditions": ["threshold breach", "missing telemetry", "migration errors", "customer-impacting regression", "rollback criteria met"],
    "destination": "On-call channel with evidence links and the rollback checklist"
  },
  "exit": {
    "success": "The rollout window completes with all signals stable.",
    "stop_without_success": "The deploy is rolled back, paused, or handed to an incident owner."
  }
}