Spaces:

A-R-F
/

Agentic-Reliability-Framework-API

Running

App Files Files Community

petter2025 commited on Dec 30, 2025

Commit

f456223

verified ·

1 Parent(s): 651e9d6

Update core/data_models.py

Browse files

Files changed (1) hide show

core/data_models.py +206 -7

core/data_models.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """
-Pythonic data models integrated with actual ARF OSS package
 """
 from dataclasses import dataclass, asdict
 from enum import Enum
-from typing import Dict, List, Optional, Any, Tuple
 import datetime
 # Import from actual ARF OSS package
@@ -22,11 +22,46 @@ except ImportError:
     # Fallback mock classes for demo
     class HealingIntent:
         def __init__(self, **kwargs):
-            self.data = kwargs
     class OSSMCPClient:
-        def analyze(self, *args, **kwargs):
-            return {"status": "OSS Analysis Complete"}
 class IncidentSeverity(Enum):
     """Enum for incident severity levels"""
@@ -66,7 +101,6 @@ class OSSAnalysis:
     @classmethod
     def from_arf_analysis(cls, arf_result: Dict, scenario_name: str) -> 'OSSAnalysis':
         """Create from actual ARF analysis result"""
-        # This would be connected to actual ARF OSS analysis
         recommendations = arf_result.get("recommendations", [
             "Increase resource allocation",
             "Implement monitoring",
@@ -142,4 +176,169 @@ class DemoStep:
     action: str
     message: str
     icon: str = "🎯"
-    arf_integration: bool = False  # Whether this step uses actual ARF

 """
+Pythonic data models for ARF Demo - COMPLETE VERSION
 """
 from dataclasses import dataclass, asdict
 from enum import Enum
+from typing import Dict, List, Optional, Any
 import datetime
 # Import from actual ARF OSS package
     # Fallback mock classes for demo
     class HealingIntent:
         def __init__(self, **kwargs):
+            self.intent_type = kwargs.get("intent_type", "scale_out")
+            self.parameters = kwargs.get("parameters", {})
+        def to_dict(self):
+            return {
+                "intent_type": self.intent_type,
+                "parameters": self.parameters,
+                "created_at": datetime.datetime.now().isoformat()
+            }
+    def create_scale_out_intent(resource_type: str, scale_factor: float = 2.0):
+        return HealingIntent(
+            intent_type="scale_out",
+            parameters={
+                "resource_type": resource_type,
+                "scale_factor": scale_factor,
+                "action": "Increase capacity"
+            }
+        )
     class OSSMCPClient:
+        def __init__(self):
+            self.mode = "advisory"
+        def analyze_incident(self, metrics: Dict, pattern: str = "") -> Dict:
+            return {
+                "status": "analysis_complete",
+                "recommendations": [
+                    "Increase resource allocation",
+                    "Implement monitoring",
+                    "Add circuit breakers",
+                    "Optimize configuration"
+                ],
+                "confidence": 0.92,
+                "pattern_matched": pattern,
+                "healing_intent": {
+                    "type": "scale_out",
+                    "requires_execution": True
+                }
+            }
 class IncidentSeverity(Enum):
     """Enum for incident severity levels"""
     @classmethod
     def from_arf_analysis(cls, arf_result: Dict, scenario_name: str) -> 'OSSAnalysis':
         """Create from actual ARF analysis result"""
         recommendations = arf_result.get("recommendations", [
             "Increase resource allocation",
             "Implement monitoring",
     action: str
     message: str
     icon: str = "🎯"
+    arf_integration: bool = False
+# ===========================================
+# INCIDENT DATABASE - ADD THIS CLASS
+# ===========================================
+class IncidentDatabase:
+    """Database of incident scenarios for the demo"""
+    @staticmethod
+    def get_scenarios() -> Dict[str, IncidentScenario]:
+        """Get all incident scenarios"""
+        cache_miss = IncidentScenario(
+            name="Cache Miss Storm",
+            severity=IncidentSeverity.CRITICAL,
+            metrics={
+                "Cache Hit Rate": "18.5% (Critical)",
+                "Database Load": "92% (Overloaded)",
+                "Response Time": "1850ms (Slow)",
+                "Affected Users": "45,000",
+                "Eviction Rate": "125/sec"
+            },
+            impact={
+                "Revenue Loss": "$8,500/hour",
+                "Page Load Time": "+300%",
+                "Users Impacted": "45,000",
+                "SLA Violation": "Yes",
+                "Customer Satisfaction": "-40%"
+            },
+            arf_pattern="cache_miss_storm",
+            oss_analysis=OSSAnalysis(
+                status="✅ Analysis Complete",
+                recommendations=[
+                    "Increase Redis cache memory allocation by 2x",
+                    "Implement cache warming strategy with predictive loading",
+                    "Optimize key patterns and implement TTL adjustments",
+                    "Add circuit breaker for graceful database fallback",
+                    "Deploy monitoring for cache hit rate trends"
+                ],
+                estimated_time="60-90 minutes",
+                engineers_needed="2-3 SREs + 1 DBA",
+                manual_effort="High",
+                confidence_score=0.92,
+                healing_intent={
+                    "type": "scale_out",
+                    "resource": "cache",
+                    "scale_factor": 2.0
+                }
+            ),
+            enterprise_results=EnterpriseResults(
+                actions_completed=[
+                    "✅ Auto-scaled Redis cluster: 4GB → 8GB",
+                    "✅ Deployed intelligent cache warming service",
+                    "✅ Optimized 12 key patterns with ML recommendations",
+                    "✅ Implemented circuit breaker with 95% success rate",
+                    "✅ Validated recovery with automated testing"
+                ],
+                metrics_improvement={
+                    "Cache Hit Rate": "18.5% → 72%",
+                    "Response Time": "1850ms → 450ms",
+                    "Database Load": "92% → 45%",
+                    "Throughput": "1250 → 2450 req/sec"
+                },
+                business_impact={
+                    "Recovery Time": "60 min → 12 min",
+                    "Cost Saved": "$7,200",
+                    "Users Impacted": "45,000 → 0",
+                    "Revenue Protected": "$1,700",
+                    "MTTR Improvement": "80% reduction"
+                },
+                approval_required=True,
+                execution_time="8 minutes"
+            )
+        )
+        db_exhaustion = IncidentScenario(
+            name="Database Connection Pool Exhaustion",
+            severity=IncidentSeverity.HIGH,
+            metrics={
+                "Active Connections": "98/100 (Critical)",
+                "API Latency": "2450ms",
+                "Error Rate": "15.2%",
+                "Queue Depth": "1250",
+                "Connection Wait Time": "45s"
+            },
+            impact={
+                "Revenue Loss": "$4,200/hour",
+                "Affected Services": "API Gateway, User Service, Payment Service",
+                "SLA Violation": "Yes",
+                "Partner Impact": "3 external APIs"
+            },
+            arf_pattern="db_connection_exhaustion",
+            oss_analysis=OSSAnalysis(
+                status="✅ Analysis Complete",
+                recommendations=[
+                    "Increase connection pool size from 100 to 200",
+                    "Add connection timeout (30s)",
+                    "Implement leak detection",
+                    "Add connection health checks",
+                    "Optimize query patterns"
+                ],
+                estimated_time="45-60 minutes",
+                engineers_needed="1-2 DBAs",
+                manual_effort="Medium-High",
+                confidence_score=0.88
+            )
+        )
+        memory_leak = IncidentScenario(
+            name="Memory Leak in Production",
+            severity=IncidentSeverity.HIGH,
+            metrics={
+                "Memory Usage": "96% (Critical)",
+                "GC Pause Time": "4500ms",
+                "Error Rate": "28.5%",
+                "Restart Frequency": "12/hour",
+                "Heap Fragmentation": "42%"
+            },
+            impact={
+                "Revenue Loss": "$5,500/hour",
+                "Session Loss": "8,500 users",
+                "Customer Impact": "High",
+                "Support Tickets": "+300%"
+            },
+            arf_pattern="memory_leak_java",
+            oss_analysis=OSSAnalysis(
+                status="✅ Analysis Complete",
+                recommendations=[
+                    "Increase JVM heap size from 4GB to 8GB",
+                    "Implement memory leak detection with profiling",
+                    "Add proactive health checks",
+                    "Schedule rolling restart with zero downtime",
+                    "Deploy memory monitoring dashboard"
+                ],
+                estimated_time="75-90 minutes",
+                engineers_needed="2 Java SREs",
+                manual_effort="High",
+                confidence_score=0.85
+            )
+        )
+        api_rate_limit = IncidentScenario(
+            name="API Rate Limit Exceeded",
+            severity=IncidentSeverity.MEDIUM,
+            metrics={
+                "429 Error Rate": "42.5%",
+                "Successful Requests": "58.3%",
+                "API Latency": "120ms",
+                "Queue Depth": "1250",
+                "Client Satisfaction": "65/100"
+            },
+            impact={
+                "Revenue Loss": "$1,800/hour",
+                "Affected Partners": "8",
+                "Partner SLA Violations": "3",
+                "Business Impact": "Medium"
+            },
+            arf_pattern="api_rate_limit"
+        )
+        return {
+            "Cache Miss Storm": cache_miss,
+            "Database Connection Pool Exhaustion": db_exhaustion,
+            "Memory Leak in Production": memory_leak,
+            "API Rate Limit Exceeded": api_rate_limit
+        }