petter2025 commited on
Commit
bdff4ef
·
verified ·
1 Parent(s): 45b5bdb

Create scenarios.yaml

Browse files
Files changed (1) hide show
  1. config/scenarios.yaml +150 -0
config/scenarios.yaml ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Cache Miss Storm:
2
+ description: "Redis cluster experiencing 80% cache miss rate causing database overload"
3
+ severity: "CRITICAL"
4
+ component: "redis_cache"
5
+ metrics:
6
+ cache_hit_rate: 18.5
7
+ database_load: 92
8
+ response_time_ms: 1850
9
+ affected_users: 45000
10
+ eviction_rate_per_sec: 125
11
+ business_impact:
12
+ revenue_loss_per_hour: 8500
13
+ sla_violation: true
14
+ customer_sat_change: -40
15
+ affected_services:
16
+ - "API Gateway"
17
+ - "User Service"
18
+ - "Payment"
19
+ roi_data:
20
+ hourly_revenue_loss: 8500
21
+ manual_recovery_hours: 1.0
22
+ enterprise_recovery_hours: 0.2
23
+ engineers_required: 4
24
+ engineer_hourly_rate: 150
25
+ estimated_monthly_occurrences: 2
26
+ enterprise_savings_percentage: 0.85
27
+
28
+ Database Connection Pool Exhaustion:
29
+ description: "PostgreSQL connection pool exhausted causing API timeouts"
30
+ severity: "HIGH"
31
+ component: "postgresql_database"
32
+ metrics:
33
+ active_connections: 98
34
+ max_connections: 100
35
+ api_latency_ms: 2450
36
+ error_rate: 15.2
37
+ queue_depth: 1250
38
+ connection_wait_seconds: 45
39
+ business_impact:
40
+ revenue_loss_per_hour: 4200
41
+ affected_services:
42
+ - "API Gateway"
43
+ - "User Service"
44
+ - "Payment Service"
45
+ sla_violation: true
46
+ partner_api_impact: 3
47
+ roi_data:
48
+ hourly_revenue_loss: 4200
49
+ manual_recovery_hours: 0.75
50
+ enterprise_recovery_hours: 0.13
51
+ engineers_required: 2
52
+ engineer_hourly_rate: 150
53
+ estimated_monthly_occurrences: 3
54
+ enterprise_savings_percentage: 0.82
55
+
56
+ Kubernetes Memory Leak:
57
+ description: "Java microservice memory leak causing pod restarts"
58
+ severity: "HIGH"
59
+ component: "java_payment_service"
60
+ metrics:
61
+ memory_usage: 96
62
+ gc_pause_time_ms: 4500
63
+ error_rate: 28.5
64
+ restart_frequency_per_hour: 12
65
+ heap_fragmentation: 42
66
+ business_impact:
67
+ revenue_loss_per_hour: 5500
68
+ session_loss: 8500
69
+ payment_failures_percentage: 3.2
70
+ support_tickets_increase: 300
71
+ roi_data:
72
+ hourly_revenue_loss: 5500
73
+ manual_recovery_hours: 1.5
74
+ enterprise_recovery_hours: 0.25
75
+ engineers_required: 3
76
+ engineer_hourly_rate: 150
77
+ estimated_monthly_occurrences: 1
78
+ enterprise_savings_percentage: 0.79
79
+
80
+ API Rate Limit Storm:
81
+ description: "Third-party API rate limiting causing cascading failures"
82
+ severity: "MEDIUM"
83
+ component: "external_api_gateway"
84
+ metrics:
85
+ rate_limit_hits_percentage: 95
86
+ error_rate: 42.8
87
+ retry_storm: true
88
+ cascade_effect_services: 3
89
+ queue_backlog: 8500
90
+ business_impact:
91
+ revenue_loss_per_hour: 3800
92
+ partner_sla_breach: true
93
+ data_sync_delay_hours: 4
94
+ customer_reports_delay_hours: 6
95
+ roi_data:
96
+ hourly_revenue_loss: 3800
97
+ manual_recovery_hours: 1.25
98
+ enterprise_recovery_hours: 0.17
99
+ engineers_required: 3
100
+ engineer_hourly_rate: 150
101
+ estimated_monthly_occurrences: 4
102
+ enterprise_savings_percentage: 0.85
103
+
104
+ Network Partition:
105
+ description: "Network partition causing split-brain in distributed database"
106
+ severity: "CRITICAL"
107
+ component: "distributed_database"
108
+ metrics:
109
+ partition_detected: true
110
+ write_conflicts: 1250
111
+ data_inconsistency_percentage: 8.5
112
+ replication_lag_seconds: 45
113
+ quorum_lost: true
114
+ business_impact:
115
+ revenue_loss_per_hour: 12000
116
+ data_corruption_risk: true
117
+ recovery_complexity: "HIGH"
118
+ compliance_violation: true
119
+ roi_data:
120
+ hourly_revenue_loss: 12000
121
+ manual_recovery_hours: 2.0
122
+ enterprise_recovery_hours: 0.3
123
+ engineers_required: 5
124
+ engineer_hourly_rate: 150
125
+ estimated_monthly_occurrences: 0.5
126
+ enterprise_savings_percentage: 0.88
127
+
128
+ Storage I/O Saturation:
129
+ description: "Storage system I/O saturation causing application timeouts"
130
+ severity: "HIGH"
131
+ component: "storage_cluster"
132
+ metrics:
133
+ io_utilization: 98
134
+ latency_ms: 450
135
+ throughput_mbps: 1250
136
+ queue_depth: 850
137
+ error_rate: 8.5
138
+ business_impact:
139
+ revenue_loss_per_hour: 6800
140
+ data_processing_delay_hours: 3
141
+ analytics_backlog: true
142
+ reporting_failure: true
143
+ roi_data:
144
+ hourly_revenue_loss: 6800
145
+ manual_recovery_hours: 1.75
146
+ enterprise_recovery_hours: 0.22
147
+ engineers_required: 3
148
+ engineer_hourly_rate: 150
149
+ estimated_monthly_occurrences: 1.5
150
+ enterprise_savings_percentage: 0.83