petter2025 commited on
Commit
5176a1c
·
verified ·
1 Parent(s): ca25698

Update demo/scenarios.py

Browse files
Files changed (1) hide show
  1. demo/scenarios.py +132 -12
demo/scenarios.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Incident scenarios for the demo
3
  """
4
 
5
  INCIDENT_SCENARIOS = {
@@ -11,44 +11,164 @@ INCIDENT_SCENARIOS = {
11
  "cache_hit_rate": 18.5,
12
  "database_load": 92,
13
  "response_time_ms": 1850,
14
- "affected_users": 45000
 
15
  },
16
  "business_impact": {
17
  "revenue_loss_per_hour": 8500,
18
  "sla_violation": True,
19
- "customer_sat_change": -40
 
 
 
 
 
 
 
 
 
 
20
  }
21
  },
 
22
  "Database Connection Pool Exhaustion": {
23
- "description": "Database connection pool exhausted causing API timeouts",
24
  "severity": "HIGH",
25
- "component": "database",
26
  "metrics": {
27
  "active_connections": 98,
28
  "max_connections": 100,
29
  "api_latency_ms": 2450,
30
- "error_rate": 15.2
 
 
31
  },
32
  "business_impact": {
33
  "revenue_loss_per_hour": 4200,
34
- "affected_services": 3,
35
- "sla_violation": True
 
 
 
 
 
 
 
 
 
 
36
  }
37
  },
38
- "Memory Leak in Production": {
39
- "description": "Java service memory leak causing gradual degradation",
 
40
  "severity": "HIGH",
41
- "component": "java_service",
42
  "metrics": {
43
  "memory_usage": 96,
44
  "gc_pause_time_ms": 4500,
45
  "error_rate": 28.5,
46
- "restart_frequency_per_hour": 12
 
47
  },
48
  "business_impact": {
49
  "revenue_loss_per_hour": 5500,
50
  "session_loss": 8500,
 
51
  "support_tickets_increase": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  }
53
  }
54
  }
 
1
  """
2
+ Incident scenarios for the demo - EXPANDED VERSION
3
  """
4
 
5
  INCIDENT_SCENARIOS = {
 
11
  "cache_hit_rate": 18.5,
12
  "database_load": 92,
13
  "response_time_ms": 1850,
14
+ "affected_users": 45000,
15
+ "eviction_rate_per_sec": 125
16
  },
17
  "business_impact": {
18
  "revenue_loss_per_hour": 8500,
19
  "sla_violation": True,
20
+ "customer_sat_change": -40,
21
+ "affected_services": ["API Gateway", "User Service", "Payment"]
22
+ },
23
+ "roi_data": {
24
+ "hourly_revenue_loss": 8500,
25
+ "manual_recovery_hours": 1.0,
26
+ "enterprise_recovery_hours": 0.2,
27
+ "engineers_required": 4,
28
+ "engineer_hourly_rate": 150,
29
+ "estimated_monthly_occurrences": 2,
30
+ "enterprise_savings_percentage": 0.85
31
  }
32
  },
33
+
34
  "Database Connection Pool Exhaustion": {
35
+ "description": "PostgreSQL connection pool exhausted causing API timeouts",
36
  "severity": "HIGH",
37
+ "component": "postgresql_database",
38
  "metrics": {
39
  "active_connections": 98,
40
  "max_connections": 100,
41
  "api_latency_ms": 2450,
42
+ "error_rate": 15.2,
43
+ "queue_depth": 1250,
44
+ "connection_wait_seconds": 45
45
  },
46
  "business_impact": {
47
  "revenue_loss_per_hour": 4200,
48
+ "affected_services": ["API Gateway", "User Service", "Payment Service"],
49
+ "sla_violation": True,
50
+ "partner_api_impact": 3
51
+ },
52
+ "roi_data": {
53
+ "hourly_revenue_loss": 4200,
54
+ "manual_recovery_hours": 0.75,
55
+ "enterprise_recovery_hours": 0.13,
56
+ "engineers_required": 2,
57
+ "engineer_hourly_rate": 150,
58
+ "estimated_monthly_occurrences": 3,
59
+ "enterprise_savings_percentage": 0.82
60
  }
61
  },
62
+
63
+ "Kubernetes Memory Leak": {
64
+ "description": "Java microservice memory leak causing pod restarts",
65
  "severity": "HIGH",
66
+ "component": "java_payment_service",
67
  "metrics": {
68
  "memory_usage": 96,
69
  "gc_pause_time_ms": 4500,
70
  "error_rate": 28.5,
71
+ "restart_frequency_per_hour": 12,
72
+ "heap_fragmentation": 42
73
  },
74
  "business_impact": {
75
  "revenue_loss_per_hour": 5500,
76
  "session_loss": 8500,
77
+ "payment_failures_percentage": 3.2,
78
  "support_tickets_increase": 300
79
+ },
80
+ "roi_data": {
81
+ "hourly_revenue_loss": 5500,
82
+ "manual_recovery_hours": 1.5,
83
+ "enterprise_recovery_hours": 0.25,
84
+ "engineers_required": 3,
85
+ "engineer_hourly_rate": 150,
86
+ "estimated_monthly_occurrences": 1,
87
+ "enterprise_savings_percentage": 0.79
88
+ }
89
+ },
90
+
91
+ "API Rate Limit Storm": {
92
+ "description": "Third-party API rate limiting causing cascading failures",
93
+ "severity": "MEDIUM",
94
+ "component": "external_api_gateway",
95
+ "metrics": {
96
+ "rate_limit_hits_percentage": 95,
97
+ "error_rate": 42.8,
98
+ "retry_storm": True,
99
+ "cascade_effect_services": 3,
100
+ "queue_backlog": 8500
101
+ },
102
+ "business_impact": {
103
+ "revenue_loss_per_hour": 3800,
104
+ "partner_sla_breach": True,
105
+ "data_sync_delay_hours": 4,
106
+ "customer_reports_delay_hours": 6
107
+ },
108
+ "roi_data": {
109
+ "hourly_revenue_loss": 3800,
110
+ "manual_recovery_hours": 1.25,
111
+ "enterprise_recovery_hours": 0.17,
112
+ "engineers_required": 3,
113
+ "engineer_hourly_rate": 150,
114
+ "estimated_monthly_occurrences": 4,
115
+ "enterprise_savings_percentage": 0.85
116
+ }
117
+ },
118
+
119
+ "Network Partition": {
120
+ "description": "Network partition causing split-brain in distributed database",
121
+ "severity": "CRITICAL",
122
+ "component": "distributed_database",
123
+ "metrics": {
124
+ "partition_detected": True,
125
+ "write_conflicts": 1250,
126
+ "data_inconsistency_percentage": 8.5,
127
+ "replication_lag_seconds": 45,
128
+ "quorum_lost": True
129
+ },
130
+ "business_impact": {
131
+ "revenue_loss_per_hour": 12000,
132
+ "data_corruption_risk": True,
133
+ "recovery_complexity": "HIGH",
134
+ "compliance_violation": True
135
+ },
136
+ "roi_data": {
137
+ "hourly_revenue_loss": 12000,
138
+ "manual_recovery_hours": 2.0,
139
+ "enterprise_recovery_hours": 0.3,
140
+ "engineers_required": 5,
141
+ "engineer_hourly_rate": 150,
142
+ "estimated_monthly_occurrences": 0.5,
143
+ "enterprise_savings_percentage": 0.88
144
+ }
145
+ },
146
+
147
+ "Storage I/O Saturation": {
148
+ "description": "Storage system I/O saturation causing application timeouts",
149
+ "severity": "HIGH",
150
+ "component": "storage_cluster",
151
+ "metrics": {
152
+ "io_utilization": 98,
153
+ "latency_ms": 450,
154
+ "throughput_mbps": 1250,
155
+ "queue_depth": 850,
156
+ "error_rate": 8.5
157
+ },
158
+ "business_impact": {
159
+ "revenue_loss_per_hour": 6800,
160
+ "data_processing_delay_hours": 3,
161
+ "analytics_backlog": True,
162
+ "reporting_failure": True
163
+ },
164
+ "roi_data": {
165
+ "hourly_revenue_loss": 6800,
166
+ "manual_recovery_hours": 1.75,
167
+ "enterprise_recovery_hours": 0.22,
168
+ "engineers_required": 3,
169
+ "engineer_hourly_rate": 150,
170
+ "estimated_monthly_occurrences": 1.5,
171
+ "enterprise_savings_percentage": 0.83
172
  }
173
  }
174
  }