Spaces:
Runtime error
Runtime error
Update engine.py
Browse files
engine.py
CHANGED
|
@@ -11,7 +11,7 @@ import numpy as np
|
|
| 11 |
from typing import Optional, Dict, Any, List
|
| 12 |
|
| 13 |
from agentic_reliability_framework.core.models.event import ReliabilityEvent, EventSeverity, HealingAction
|
| 14 |
-
from policy_engine import PolicyEngine
|
| 15 |
from agentic_reliability_framework.runtime.analytics.anomaly import AdvancedAnomalyDetector
|
| 16 |
from agentic_reliability_framework.runtime.analytics.predictive import BusinessImpactCalculator
|
| 17 |
from agentic_reliability_framework.runtime.orchestration.manager import OrchestrationManager
|
|
@@ -41,6 +41,11 @@ class ThreadSafeEventStore:
|
|
| 41 |
|
| 42 |
|
| 43 |
class EnhancedReliabilityEngine:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
def __init__(self, orchestrator: Optional[OrchestrationManager] = None,
|
| 45 |
policy_engine: Optional[PolicyEngine] = None,
|
| 46 |
event_store: Optional[ThreadSafeEventStore] = None,
|
|
@@ -66,6 +71,20 @@ class EnhancedReliabilityEngine:
|
|
| 66 |
async def process_event_enhanced(self, component: str, latency: float, error_rate: float,
|
| 67 |
throughput: float = 1000, cpu_util: Optional[float] = None,
|
| 68 |
memory_util: Optional[float] = None) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
logger.info(f"Processing event for {component}: latency={latency}ms, error_rate={error_rate*100:.1f}%")
|
| 70 |
from agentic_reliability_framework.core.models.event import validate_component_id
|
| 71 |
is_valid, error_msg = validate_component_id(component)
|
|
@@ -157,6 +176,10 @@ class EnhancedReliabilityEngine:
|
|
| 157 |
return result
|
| 158 |
|
| 159 |
async def enhance_with_claude(self, event: ReliabilityEvent, agent_results: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
context_parts = []
|
| 161 |
context_parts.append("INCIDENT SUMMARY:")
|
| 162 |
context_parts.append(f"Component: {event.component}")
|
|
|
|
| 11 |
from typing import Optional, Dict, Any, List
|
| 12 |
|
| 13 |
from agentic_reliability_framework.core.models.event import ReliabilityEvent, EventSeverity, HealingAction
|
| 14 |
+
from policy_engine import PolicyEngine # local patched version (see Dockerfile)
|
| 15 |
from agentic_reliability_framework.runtime.analytics.anomaly import AdvancedAnomalyDetector
|
| 16 |
from agentic_reliability_framework.runtime.analytics.predictive import BusinessImpactCalculator
|
| 17 |
from agentic_reliability_framework.runtime.orchestration.manager import OrchestrationManager
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
class EnhancedReliabilityEngine:
|
| 44 |
+
"""
|
| 45 |
+
Main engine for processing infrastructure events.
|
| 46 |
+
Orchestrates agents, policy evaluation, risk scoring, and optional Claude enhancement.
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
def __init__(self, orchestrator: Optional[OrchestrationManager] = None,
|
| 50 |
policy_engine: Optional[PolicyEngine] = None,
|
| 51 |
event_store: Optional[ThreadSafeEventStore] = None,
|
|
|
|
| 71 |
async def process_event_enhanced(self, component: str, latency: float, error_rate: float,
|
| 72 |
throughput: float = 1000, cpu_util: Optional[float] = None,
|
| 73 |
memory_util: Optional[float] = None) -> Dict[str, Any]:
|
| 74 |
+
"""
|
| 75 |
+
Process a single telemetry event and return analysis results.
|
| 76 |
+
|
| 77 |
+
Args:
|
| 78 |
+
component: Name of the component (e.g., "api-service").
|
| 79 |
+
latency: P99 latency in milliseconds.
|
| 80 |
+
error_rate: Error rate between 0 and 1.
|
| 81 |
+
throughput: Requests per second.
|
| 82 |
+
cpu_util: CPU utilization (0-1), optional.
|
| 83 |
+
memory_util: Memory utilization (0-1), optional.
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
Dictionary containing analysis results.
|
| 87 |
+
"""
|
| 88 |
logger.info(f"Processing event for {component}: latency={latency}ms, error_rate={error_rate*100:.1f}%")
|
| 89 |
from agentic_reliability_framework.core.models.event import validate_component_id
|
| 90 |
is_valid, error_msg = validate_component_id(component)
|
|
|
|
| 176 |
return result
|
| 177 |
|
| 178 |
async def enhance_with_claude(self, event: ReliabilityEvent, agent_results: Dict[str, Any]) -> Dict[str, Any]:
|
| 179 |
+
"""
|
| 180 |
+
Enhance agent results with a Claude‑generated executive summary.
|
| 181 |
+
Falls back gracefully if Claude is unavailable.
|
| 182 |
+
"""
|
| 183 |
context_parts = []
|
| 184 |
context_parts.append("INCIDENT SUMMARY:")
|
| 185 |
context_parts.append(f"Component: {event.component}")
|