Spaces:
Runtime error
Runtime error
| """ | |
| Incident scenarios for the demo - EXPANDED VERSION | |
| """ | |
| INCIDENT_SCENARIOS = { | |
| "Cache Miss Storm": { | |
| "description": "Redis cluster experiencing 80% cache miss rate causing database overload", | |
| "severity": "CRITICAL", | |
| "component": "redis_cache", | |
| "metrics": { | |
| "cache_hit_rate": 18.5, | |
| "database_load": 92, | |
| "response_time_ms": 1850, | |
| "affected_users": 45000, | |
| "eviction_rate_per_sec": 125 | |
| }, | |
| "business_impact": { | |
| "revenue_loss_per_hour": 8500, | |
| "sla_violation": True, | |
| "customer_sat_change": -40, | |
| "affected_services": ["API Gateway", "User Service", "Payment"] | |
| }, | |
| "roi_data": { | |
| "hourly_revenue_loss": 8500, | |
| "manual_recovery_hours": 1.0, | |
| "enterprise_recovery_hours": 0.2, | |
| "engineers_required": 4, | |
| "engineer_hourly_rate": 150, | |
| "estimated_monthly_occurrences": 2, | |
| "enterprise_savings_percentage": 0.85 | |
| } | |
| }, | |
| "Database Connection Pool Exhaustion": { | |
| "description": "PostgreSQL connection pool exhausted causing API timeouts", | |
| "severity": "HIGH", | |
| "component": "postgresql_database", | |
| "metrics": { | |
| "active_connections": 98, | |
| "max_connections": 100, | |
| "api_latency_ms": 2450, | |
| "error_rate": 15.2, | |
| "queue_depth": 1250, | |
| "connection_wait_seconds": 45 | |
| }, | |
| "business_impact": { | |
| "revenue_loss_per_hour": 4200, | |
| "affected_services": ["API Gateway", "User Service", "Payment Service"], | |
| "sla_violation": True, | |
| "partner_api_impact": 3 | |
| }, | |
| "roi_data": { | |
| "hourly_revenue_loss": 4200, | |
| "manual_recovery_hours": 0.75, | |
| "enterprise_recovery_hours": 0.13, | |
| "engineers_required": 2, | |
| "engineer_hourly_rate": 150, | |
| "estimated_monthly_occurrences": 3, | |
| "enterprise_savings_percentage": 0.82 | |
| } | |
| }, | |
| "Kubernetes Memory Leak": { | |
| "description": "Java microservice memory leak causing pod restarts", | |
| "severity": "HIGH", | |
| "component": "java_payment_service", | |
| "metrics": { | |
| "memory_usage": 96, | |
| "gc_pause_time_ms": 4500, | |
| "error_rate": 28.5, | |
| "restart_frequency_per_hour": 12, | |
| "heap_fragmentation": 42 | |
| }, | |
| "business_impact": { | |
| "revenue_loss_per_hour": 5500, | |
| "session_loss": 8500, | |
| "payment_failures_percentage": 3.2, | |
| "support_tickets_increase": 300 | |
| }, | |
| "roi_data": { | |
| "hourly_revenue_loss": 5500, | |
| "manual_recovery_hours": 1.5, | |
| "enterprise_recovery_hours": 0.25, | |
| "engineers_required": 3, | |
| "engineer_hourly_rate": 150, | |
| "estimated_monthly_occurrences": 1, | |
| "enterprise_savings_percentage": 0.79 | |
| } | |
| }, | |
| "API Rate Limit Storm": { | |
| "description": "Third-party API rate limiting causing cascading failures", | |
| "severity": "MEDIUM", | |
| "component": "external_api_gateway", | |
| "metrics": { | |
| "rate_limit_hits_percentage": 95, | |
| "error_rate": 42.8, | |
| "retry_storm": True, | |
| "cascade_effect_services": 3, | |
| "queue_backlog": 8500 | |
| }, | |
| "business_impact": { | |
| "revenue_loss_per_hour": 3800, | |
| "partner_sla_breach": True, | |
| "data_sync_delay_hours": 4, | |
| "customer_reports_delay_hours": 6 | |
| }, | |
| "roi_data": { | |
| "hourly_revenue_loss": 3800, | |
| "manual_recovery_hours": 1.25, | |
| "enterprise_recovery_hours": 0.17, | |
| "engineers_required": 3, | |
| "engineer_hourly_rate": 150, | |
| "estimated_monthly_occurrences": 4, | |
| "enterprise_savings_percentage": 0.85 | |
| } | |
| }, | |
| "Network Partition": { | |
| "description": "Network partition causing split-brain in distributed database", | |
| "severity": "CRITICAL", | |
| "component": "distributed_database", | |
| "metrics": { | |
| "partition_detected": True, | |
| "write_conflicts": 1250, | |
| "data_inconsistency_percentage": 8.5, | |
| "replication_lag_seconds": 45, | |
| "quorum_lost": True | |
| }, | |
| "business_impact": { | |
| "revenue_loss_per_hour": 12000, | |
| "data_corruption_risk": True, | |
| "recovery_complexity": "HIGH", | |
| "compliance_violation": True | |
| }, | |
| "roi_data": { | |
| "hourly_revenue_loss": 12000, | |
| "manual_recovery_hours": 2.0, | |
| "enterprise_recovery_hours": 0.3, | |
| "engineers_required": 5, | |
| "engineer_hourly_rate": 150, | |
| "estimated_monthly_occurrences": 0.5, | |
| "enterprise_savings_percentage": 0.88 | |
| } | |
| }, | |
| "Storage I/O Saturation": { | |
| "description": "Storage system I/O saturation causing application timeouts", | |
| "severity": "HIGH", | |
| "component": "storage_cluster", | |
| "metrics": { | |
| "io_utilization": 98, | |
| "latency_ms": 450, | |
| "throughput_mbps": 1250, | |
| "queue_depth": 850, | |
| "error_rate": 8.5 | |
| }, | |
| "business_impact": { | |
| "revenue_loss_per_hour": 6800, | |
| "data_processing_delay_hours": 3, | |
| "analytics_backlog": True, | |
| "reporting_failure": True | |
| }, | |
| "roi_data": { | |
| "hourly_revenue_loss": 6800, | |
| "manual_recovery_hours": 1.75, | |
| "enterprise_recovery_hours": 0.22, | |
| "engineers_required": 3, | |
| "engineer_hourly_rate": 150, | |
| "estimated_monthly_occurrences": 1.5, | |
| "enterprise_savings_percentage": 0.83 | |
| } | |
| } | |
| } |