| """ |
| True ARF OSS v3.3.7 - Real Implementation |
| Production-grade multi-agent AI for reliability monitoring (Advisory only) |
| |
| Core Agents: |
| 1. Detection Agent: Anomaly detection and incident identification |
| 2. Recall Agent: RAG-based memory for similar incidents |
| 3. Decision Agent: Healing intent generation with confidence scoring |
| |
| OSS Edition: Apache 2.0 Licensed, Advisory mode only |
| """ |
|
|
| import asyncio |
| import logging |
| import time |
| import uuid |
| from typing import Dict, Any, List, Optional, Tuple |
| from dataclasses import dataclass, field |
| from datetime import datetime |
| import numpy as np |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| |
| |
|
|
| @dataclass |
| class TelemetryPoint: |
| """Telemetry data point""" |
| timestamp: float |
| metric: str |
| value: float |
| component: str |
|
|
| @dataclass |
| class Anomaly: |
| """Detected anomaly""" |
| id: str |
| component: str |
| metric: str |
| value: float |
| expected_range: Tuple[float, float] |
| confidence: float |
| severity: str |
| timestamp: float = field(default_factory=time.time) |
| |
| @dataclass |
| class Incident: |
| """Incident representation for RAG memory""" |
| id: str |
| component: str |
| anomaly: Anomaly |
| telemetry: List[TelemetryPoint] |
| context: Dict[str, Any] |
| timestamp: float = field(default_factory=time.time) |
| resolved: bool = False |
| resolution: Optional[str] = None |
| |
| def to_vector(self) -> List[float]: |
| """Convert incident to vector for similarity search""" |
| |
| features = [] |
| |
| |
| features.append(hash(self.component) % 1000 / 1000.0) |
| |
| |
| severity_map = {"low": 0.1, "medium": 0.3, "high": 0.7, "critical": 1.0} |
| features.append(severity_map.get(self.anomaly.severity, 0.5)) |
| |
| |
| features.append(self.anomaly.confidence) |
| |
| |
| if self.telemetry: |
| values = [p.value for p in self.telemetry] |
| features.append(np.mean(values)) |
| features.append(np.std(values) if len(values) > 1 else 0.0) |
| else: |
| features.extend([0.0, 0.0]) |
| |
| |
| if "error_rate" in self.context: |
| features.append(self.context["error_rate"]) |
| else: |
| features.append(0.0) |
| |
| if "latency_p99" in self.context: |
| features.append(min(self.context["latency_p99"] / 1000.0, 1.0)) |
| else: |
| features.append(0.0) |
| |
| return features |
|
|
| |
| |
| |
|
|
| class DetectionAgent: |
| """ |
| Detection Agent - Identifies anomalies in telemetry data |
| |
| Features: |
| - Statistical anomaly detection |
| - Multi-metric correlation analysis |
| - Confidence scoring |
| - Severity classification |
| """ |
| |
| def __init__(self, config: Optional[Dict[str, Any]] = None): |
| self.config = config or {} |
| self.detection_history: List[Anomaly] = [] |
| self.telemetry_buffer: Dict[str, List[TelemetryPoint]] = {} |
| |
| |
| self.thresholds = { |
| "error_rate": {"warning": 0.01, "critical": 0.05}, |
| "latency_p99": {"warning": 200, "critical": 500}, |
| "cpu_util": {"warning": 0.8, "critical": 0.95}, |
| "memory_util": {"warning": 0.85, "critical": 0.95}, |
| "throughput": {"warning": 0.7, "critical": 0.3}, |
| } |
| |
| logger.info("Detection Agent initialized") |
| |
| async def analyze_telemetry(self, component: str, telemetry: List[TelemetryPoint]) -> List[Anomaly]: |
| """ |
| Analyze telemetry data for anomalies |
| |
| Args: |
| component: Target component name |
| telemetry: List of telemetry data points |
| |
| Returns: |
| List of detected anomalies |
| """ |
| anomalies = [] |
| |
| |
| metrics = {} |
| for point in telemetry: |
| if point.metric not in metrics: |
| metrics[point.metric] = [] |
| metrics[point.metric].append(point) |
| |
| |
| for metric, points in metrics.items(): |
| if len(points) < 3: |
| continue |
| |
| values = [p.value for p in points] |
| recent_value = values[-1] |
| |
| |
| if metric in self.thresholds: |
| threshold = self.thresholds[metric] |
| |
| |
| if recent_value >= threshold["critical"]: |
| severity = "critical" |
| confidence = min(0.95 + (recent_value - threshold["critical"]) * 2, 0.99) |
| elif recent_value >= threshold["warning"]: |
| severity = "high" |
| confidence = 0.85 + (recent_value - threshold["warning"]) * 0.5 |
| else: |
| |
| continue |
| |
| |
| anomaly = Anomaly( |
| id=str(uuid.uuid4()), |
| component=component, |
| metric=metric, |
| value=recent_value, |
| expected_range=(0, threshold["warning"]), |
| confidence=min(confidence, 0.99), |
| severity=severity |
| ) |
| |
| anomalies.append(anomaly) |
| |
| |
| self._store_in_buffer(component, metric, points[-5:]) |
| |
| logger.info(f"Detection Agent: Found {severity} anomaly in {component}.{metric}: {recent_value}") |
| |
| |
| correlated = await self._detect_correlated_anomalies(component, metrics) |
| anomalies.extend(correlated) |
| |
| |
| self.detection_history.extend(anomalies) |
| |
| return anomalies |
| |
| async def _detect_correlated_anomalies(self, component: str, metrics: Dict[str, List[TelemetryPoint]]) -> List[Anomaly]: |
| """Detect anomalies that correlate across multiple metrics""" |
| anomalies = [] |
| |
| |
| anomalous_metrics = [] |
| |
| for metric, points in metrics.items(): |
| if metric in self.thresholds and len(points) >= 3: |
| recent_value = points[-1].value |
| threshold = self.thresholds[metric] |
| |
| if recent_value >= threshold["warning"]: |
| anomalous_metrics.append({ |
| "metric": metric, |
| "value": recent_value, |
| "severity": "critical" if recent_value >= threshold["critical"] else "high" |
| }) |
| |
| |
| if len(anomalous_metrics) >= 2: |
| |
| base_confidence = 0.7 + (len(anomalous_metrics) - 2) * 0.1 |
| confidence = min(base_confidence, 0.97) |
| |
| |
| severities = [m["severity"] for m in anomalous_metrics] |
| severity = "critical" if "critical" in severities else "high" |
| |
| anomaly = Anomaly( |
| id=str(uuid.uuid4()), |
| component=component, |
| metric="correlated", |
| value=len(anomalous_metrics), |
| expected_range=(0, 1), |
| confidence=confidence, |
| severity=severity |
| ) |
| |
| anomalies.append(anomaly) |
| logger.info(f"Detection Agent: Found correlated anomaly across {len(anomalous_metrics)} metrics") |
| |
| return anomalies |
| |
| def _store_in_buffer(self, component: str, metric: str, points: List[TelemetryPoint]): |
| """Store telemetry in buffer for trend analysis""" |
| key = f"{component}:{metric}" |
| if key not in self.telemetry_buffer: |
| self.telemetry_buffer[key] = [] |
| |
| self.telemetry_buffer[key].extend(points) |
| |
| |
| if len(self.telemetry_buffer[key]) > 100: |
| self.telemetry_buffer[key] = self.telemetry_buffer[key][-100:] |
| |
| def get_detection_stats(self) -> Dict[str, Any]: |
| """Get detection statistics""" |
| return { |
| "total_detections": len(self.detection_history), |
| "by_severity": { |
| "critical": len([a for a in self.detection_history if a.severity == "critical"]), |
| "high": len([a for a in self.detection_history if a.severity == "high"]), |
| "medium": len([a for a in self.detection_history if a.severity == "medium"]), |
| "low": len([a for a in self.detection_history if a.severity == "low"]), |
| }, |
| "buffer_size": sum(len(points) for points in self.telemetry_buffer.values()), |
| "unique_metrics": len(self.telemetry_buffer), |
| } |
|
|
| |
| |
| |
|
|
| class RecallAgent: |
| """ |
| Recall Agent - RAG-based memory for similar incidents |
| |
| Features: |
| - Vector similarity search |
| - Incident clustering |
| - Success rate tracking |
| - Resolution pattern extraction |
| """ |
| |
| def __init__(self, config: Optional[Dict[str, Any]] = None): |
| self.config = config or {} |
| self.incidents: List[Incident] = [] |
| self.incident_vectors: List[List[float]] = [] |
| |
| |
| self.outcomes: Dict[str, Dict[str, Any]] = {} |
| |
| |
| self.similarity_cache: Dict[str, List[Dict[str, Any]]] = {} |
| |
| logger.info("Recall Agent initialized") |
| |
| async def add_incident(self, incident: Incident) -> str: |
| """ |
| Add incident to memory |
| |
| Args: |
| incident: Incident to add |
| |
| Returns: |
| Incident ID |
| """ |
| self.incidents.append(incident) |
| self.incident_vectors.append(incident.to_vector()) |
| |
| logger.info(f"Recall Agent: Added incident {incident.id} for {incident.component}") |
| return incident.id |
| |
| async def find_similar(self, current_incident: Incident, k: int = 5) -> List[Dict[str, Any]]: |
| """ |
| Find similar incidents using vector similarity |
| |
| Args: |
| current_incident: Current incident to compare against |
| k: Number of similar incidents to return |
| |
| Returns: |
| List of similar incidents with similarity scores |
| """ |
| if not self.incidents: |
| return [] |
| |
| |
| cache_key = f"{current_incident.component}:{current_incident.anomaly.metric}" |
| if cache_key in self.similarity_cache: |
| return self.similarity_cache[cache_key][:k] |
| |
| |
| current_vector = np.array(current_incident.to_vector()) |
| similarities = [] |
| |
| for idx, (incident, vector) in enumerate(zip(self.incidents, self.incident_vectors)): |
| |
| if current_incident.component != incident.component: |
| continue |
| |
| |
| incident_vector = np.array(vector) |
| if np.linalg.norm(current_vector) == 0 or np.linalg.norm(incident_vector) == 0: |
| similarity = 0.0 |
| else: |
| similarity = np.dot(current_vector, incident_vector) / ( |
| np.linalg.norm(current_vector) * np.linalg.norm(incident_vector) |
| ) |
| |
| |
| outcome = self.outcomes.get(incident.id, {}) |
| success_rate = outcome.get("success_rate", 0.0) |
| resolution_time = outcome.get("resolution_time_minutes", 0.0) |
| |
| similarities.append({ |
| "incident": incident, |
| "similarity": float(similarity), |
| "success_rate": success_rate, |
| "resolution_time_minutes": resolution_time, |
| "index": idx |
| }) |
| |
| |
| similarities.sort(key=lambda x: x["similarity"], reverse=True) |
| |
| |
| results = [] |
| for sim in similarities[:k]: |
| incident = sim["incident"] |
| results.append({ |
| "incident_id": incident.id, |
| "component": incident.component, |
| "severity": incident.anomaly.severity, |
| "similarity_score": sim["similarity"], |
| "success_rate": sim["success_rate"], |
| "resolution_time_minutes": sim["resolution_time_minutes"], |
| "timestamp": incident.timestamp, |
| "anomaly_metric": incident.anomaly.metric, |
| "anomaly_value": incident.anomaly.value, |
| }) |
| |
| |
| self.similarity_cache[cache_key] = results |
| |
| logger.info(f"Recall Agent: Found {len(results)} similar incidents for {current_incident.component}") |
| return results |
| |
| async def add_outcome(self, incident_id: str, success: bool, |
| resolution_action: str, resolution_time_minutes: float): |
| """ |
| Add resolution outcome to incident |
| |
| Args: |
| incident_id: ID of the incident |
| success: Whether the resolution was successful |
| resolution_action: Action taken to resolve |
| resolution_time_minutes: Time taken to resolve |
| """ |
| |
| incident_idx = -1 |
| for idx, incident in enumerate(self.incidents): |
| if incident.id == incident_id: |
| incident_idx = idx |
| break |
| |
| if incident_idx == -1: |
| logger.warning(f"Recall Agent: Incident {incident_id} not found for outcome") |
| return |
| |
| |
| self.incidents[incident_idx].resolved = True |
| self.incidents[incident_idx].resolution = resolution_action |
| |
| |
| if incident_id not in self.outcomes: |
| self.outcomes[incident_id] = { |
| "successes": 0, |
| "attempts": 0, |
| "actions": [], |
| "resolution_times": [] |
| } |
| |
| self.outcomes[incident_id]["attempts"] += 1 |
| if success: |
| self.outcomes[incident_id]["successes"] += 1 |
| |
| self.outcomes[incident_id]["actions"].append(resolution_action) |
| self.outcomes[incident_id]["resolution_times"].append(resolution_time_minutes) |
| |
| |
| attempts = self.outcomes[incident_id]["attempts"] |
| successes = self.outcomes[incident_id]["successes"] |
| self.outcomes[incident_id]["success_rate"] = successes / attempts if attempts > 0 else 0.0 |
| |
| |
| times = self.outcomes[incident_id]["resolution_times"] |
| self.outcomes[incident_id]["resolution_time_minutes"] = sum(times) / len(times) |
| |
| logger.info(f"Recall Agent: Added outcome for incident {incident_id} (success: {success})") |
| |
| def get_memory_stats(self) -> Dict[str, Any]: |
| """Get memory statistics""" |
| return { |
| "total_incidents": len(self.incidents), |
| "resolved_incidents": len([i for i in self.incidents if i.resolved]), |
| "outcomes_tracked": len(self.outcomes), |
| "cache_size": len(self.similarity_cache), |
| "vector_dimension": len(self.incident_vectors[0]) if self.incident_vectors else 0, |
| } |
|
|
| |
| |
| |
|
|
| class DecisionAgent: |
| """ |
| Decision Agent - Generates healing intents based on analysis |
| |
| Features: |
| - Confidence scoring |
| - Action selection |
| - Parameter optimization |
| - Safety validation |
| """ |
| |
| def __init__(self, config: Optional[Dict[str, Any]] = None): |
| self.config = config or {} |
| |
| |
| self.action_success_rates = { |
| "restart_container": 0.95, |
| "scale_out": 0.87, |
| "circuit_breaker": 0.92, |
| "traffic_shift": 0.85, |
| "rollback": 0.78, |
| "alert_team": 0.99, |
| } |
| |
| |
| self.anomaly_to_action = { |
| "cpu_util": ["scale_out", "traffic_shift"], |
| "memory_util": ["scale_out", "restart_container"], |
| "error_rate": ["circuit_breaker", "rollback", "alert_team"], |
| "latency_p99": ["scale_out", "traffic_shift", "circuit_breaker"], |
| "throughput": ["scale_out", "traffic_shift"], |
| "correlated": ["alert_team", "scale_out", "restart_container"], |
| } |
| |
| logger.info("Decision Agent initialized") |
| |
| async def generate_healing_intent( |
| self, |
| anomaly: Anomaly, |
| similar_incidents: List[Dict[str, Any]], |
| context: Dict[str, Any] |
| ) -> Dict[str, Any]: |
| """ |
| Generate healing intent based on anomaly and similar incidents |
| |
| Args: |
| anomaly: Detected anomaly |
| similar_incidents: Similar historical incidents |
| context: Additional context |
| |
| Returns: |
| Healing intent dictionary |
| """ |
| |
| action = await self._select_action(anomaly, similar_incidents) |
| |
| |
| confidence = await self._calculate_confidence(anomaly, similar_incidents, action) |
| |
| |
| parameters = await self._determine_parameters(anomaly, action, context) |
| |
| |
| justification = await self._generate_justification(anomaly, similar_incidents, action, confidence) |
| |
| |
| healing_intent = { |
| "action": action, |
| "component": anomaly.component, |
| "parameters": parameters, |
| "confidence": confidence, |
| "justification": justification, |
| "anomaly_id": anomaly.id, |
| "anomaly_severity": anomaly.severity, |
| "similar_incidents_count": len(similar_incidents), |
| "similar_incidents_success_rate": self._calculate_average_success_rate(similar_incidents), |
| "requires_enterprise": True, |
| "oss_advisory": True, |
| "timestamp": time.time(), |
| "arf_version": "3.3.7", |
| } |
| |
| logger.info(f"Decision Agent: Generated {action} intent for {anomaly.component} (confidence: {confidence:.2f})") |
| return healing_intent |
| |
| async def _select_action(self, anomaly: Anomaly, |
| similar_incidents: List[Dict[str, Any]]) -> str: |
| """Select the most appropriate healing action""" |
| |
| if similar_incidents: |
| |
| action_successes = {} |
| for incident in similar_incidents: |
| |
| resolution = incident.get("resolution", "") |
| success = incident.get("success_rate", 0.5) > 0.5 |
| |
| if resolution: |
| if resolution not in action_successes: |
| action_successes[resolution] = {"successes": 0, "total": 0} |
| |
| action_successes[resolution]["total"] += 1 |
| if success: |
| action_successes[resolution]["successes"] += 1 |
| |
| |
| for action, stats in action_successes.items(): |
| success_rate = stats["successes"] / stats["total"] if stats["total"] > 0 else 0.0 |
| action_successes[action]["rate"] = success_rate |
| |
| |
| if action_successes: |
| best_action = max(action_successes.items(), |
| key=lambda x: x[1]["rate"]) |
| return best_action[0] |
| |
| |
| candidate_actions = self.anomaly_to_action.get(anomaly.metric, ["alert_team"]) |
| |
| |
| if anomaly.severity in ["critical", "high"]: |
| |
| preferred_actions = ["scale_out", "circuit_breaker", "restart_container"] |
| candidate_actions = [a for a in candidate_actions if a in preferred_actions] |
| |
| |
| if candidate_actions: |
| action_rates = [(a, self.action_success_rates.get(a, 0.5)) |
| for a in candidate_actions] |
| return max(action_rates, key=lambda x: x[1])[0] |
| |
| return "alert_team" |
| |
| async def _calculate_confidence(self, anomaly: Anomaly, |
| similar_incidents: List[Dict[str, Any]], |
| selected_action: str) -> float: |
| """Calculate confidence score for the selected action""" |
| base_confidence = anomaly.confidence * 0.8 |
| |
| |
| if similar_incidents: |
| avg_similarity = np.mean([i.get("similarity_score", 0.0) |
| for i in similar_incidents]) |
| similarity_boost = avg_similarity * 0.3 |
| base_confidence += similarity_boost |
| |
| |
| avg_success = self._calculate_average_success_rate(similar_incidents) |
| success_boost = avg_success * 0.2 |
| base_confidence += success_boost |
| |
| |
| action_rate = self.action_success_rates.get(selected_action, 0.5) |
| action_factor = 0.5 + action_rate * 0.5 |
| base_confidence *= action_factor |
| |
| |
| return min(base_confidence, 0.99) |
| |
| async def _determine_parameters(self, anomaly: Anomaly, |
| action: str, context: Dict[str, Any]) -> Dict[str, Any]: |
| """Determine parameters for the healing action""" |
| parameters = {} |
| |
| if action == "scale_out": |
| |
| severity_factor = {"low": 1, "medium": 2, "high": 3, "critical": 4} |
| scale_factor = severity_factor.get(anomaly.severity, 2) |
| |
| parameters = { |
| "scale_factor": scale_factor, |
| "resource_profile": "standard", |
| "strategy": "gradual" if anomaly.severity in ["low", "medium"] else "immediate" |
| } |
| |
| elif action == "restart_container": |
| parameters = { |
| "grace_period": 30, |
| "force": anomaly.severity == "critical" |
| } |
| |
| elif action == "circuit_breaker": |
| parameters = { |
| "threshold": 0.5, |
| "timeout": 60, |
| "half_open_after": 300 |
| } |
| |
| elif action == "rollback": |
| parameters = { |
| "revision": "previous", |
| "verify": True |
| } |
| |
| elif action == "traffic_shift": |
| parameters = { |
| "percentage": 50, |
| "target": "canary" if anomaly.severity in ["low", "medium"] else "stable" |
| } |
| |
| elif action == "alert_team": |
| parameters = { |
| "severity": anomaly.severity, |
| "channels": ["slack", "email"], |
| "escalate_after_minutes": 5 if anomaly.severity == "critical" else 15 |
| } |
| |
| |
| if "environment" in context: |
| parameters["environment"] = context["environment"] |
| |
| return parameters |
| |
| async def _generate_justification(self, anomaly: Anomaly, |
| similar_incidents: List[Dict[str, Any]], |
| action: str, confidence: float) -> str: |
| """Generate human-readable justification""" |
| |
| if similar_incidents: |
| similar_count = len(similar_incidents) |
| avg_success = self._calculate_average_success_rate(similar_incidents) |
| |
| return ( |
| f"Detected {anomaly.severity} anomaly in {anomaly.component} ({anomaly.metric}: {anomaly.value:.2f}). " |
| f"Found {similar_count} similar historical incidents with {avg_success:.0%} average success rate. " |
| f"Recommended action '{action}' with {confidence:.0%} confidence based on pattern matching." |
| ) |
| else: |
| return ( |
| f"Detected {anomaly.severity} anomaly in {anomaly.component} ({anomaly.metric}: {anomaly.value:.2f}). " |
| f"No similar historical incidents found. " |
| f"Recommended action '{action}' with {confidence:.0%} confidence based on anomaly characteristics." |
| ) |
| |
| def _calculate_average_success_rate(self, similar_incidents: List[Dict[str, Any]]) -> float: |
| """Calculate average success rate from similar incidents""" |
| if not similar_incidents: |
| return 0.0 |
| |
| success_rates = [inc.get("success_rate", 0.0) for inc in similar_incidents] |
| return sum(success_rates) / len(success_rates) |
| |
| def update_success_rate(self, action: str, success: bool): |
| """Update action success rate based on outcome""" |
| if action not in self.action_success_rates: |
| self.action_success_rates[action] = 0.5 |
| |
| current_rate = self.action_success_rates[action] |
| |
| if success: |
| new_rate = current_rate * 0.9 + 0.1 |
| else: |
| new_rate = current_rate * 0.9 |
| |
| self.action_success_rates[action] = new_rate |
| logger.info(f"Decision Agent: Updated {action} success rate to {new_rate:.2f}") |
|
|
| |
| |
| |
|
|
| class TrueARFOSS: |
| """ |
| True ARF OSS v3.3.7 - Complete integration of all agents |
| |
| This is the class that TrueARF337Orchestrator expects to import. |
| Provides real ARF OSS functionality for the demo. |
| """ |
| |
| def __init__(self, config: Optional[Dict[str, Any]] = None): |
| self.config = config or {} |
| self.detection_agent = DetectionAgent(config) |
| self.recall_agent = RecallAgent(config) |
| self.decision_agent = DecisionAgent(config) |
| self.oss_available = True |
| |
| logger.info("True ARF OSS v3.3.7 initialized") |
| |
| async def analyze_scenario(self, scenario_name: str, |
| scenario_data: Dict[str, Any]) -> Dict[str, Any]: |
| """ |
| Complete ARF analysis for a scenario |
| |
| Args: |
| scenario_name: Name of the scenario |
| scenario_data: Scenario data including telemetry and context |
| |
| Returns: |
| Complete analysis result |
| """ |
| start_time = time.time() |
| |
| try: |
| |
| component = scenario_data.get("component", "unknown") |
| telemetry_data = scenario_data.get("telemetry", []) |
| context = scenario_data.get("context", {}) |
| |
| |
| telemetry = [] |
| for point in telemetry_data: |
| telemetry.append(TelemetryPoint( |
| timestamp=point.get("timestamp", time.time()), |
| metric=point.get("metric", "unknown"), |
| value=point.get("value", 0.0), |
| component=component |
| )) |
| |
| |
| logger.info(f"True ARF OSS: Running detection for {scenario_name}") |
| anomalies = await self.detection_agent.analyze_telemetry(component, telemetry) |
| |
| if not anomalies: |
| |
| return { |
| "status": "success", |
| "scenario": scenario_name, |
| "result": "no_anomalies_detected", |
| "analysis_time_ms": (time.time() - start_time) * 1000, |
| "arf_version": "3.3.7", |
| "oss_edition": True |
| } |
| |
| |
| anomaly = max(anomalies, key=lambda a: a.confidence) |
| |
| |
| incident = Incident( |
| id=str(uuid.uuid4()), |
| component=component, |
| anomaly=anomaly, |
| telemetry=telemetry[-10:], |
| context=context |
| ) |
| |
| |
| logger.info(f"True ARF OSS: Searching for similar incidents for {scenario_name}") |
| similar_incidents = await self.recall_agent.find_similar(incident, k=5) |
| |
| |
| await self.recall_agent.add_incident(incident) |
| |
| |
| logger.info(f"True ARF OSS: Generating healing intent for {scenario_name}") |
| healing_intent = await self.decision_agent.generate_healing_intent( |
| anomaly, similar_incidents, context |
| ) |
| |
| |
| analysis_time_ms = (time.time() - start_time) * 1000 |
| |
| |
| result = { |
| "status": "success", |
| "scenario": scenario_name, |
| "analysis": { |
| "detection": { |
| "anomaly_found": True, |
| "anomaly_id": anomaly.id, |
| "metric": anomaly.metric, |
| "value": anomaly.value, |
| "confidence": anomaly.confidence, |
| "severity": anomaly.severity, |
| "detection_time_ms": analysis_time_ms * 0.3, |
| }, |
| "recall": similar_incidents, |
| "decision": healing_intent, |
| }, |
| "capabilities": { |
| "execution_allowed": False, |
| "mcp_modes": ["advisory"], |
| "oss_boundary": "advisory_only", |
| "requires_enterprise": True, |
| }, |
| "agents_used": ["Detection", "Recall", "Decision"], |
| "analysis_time_ms": analysis_time_ms, |
| "arf_version": "3.3.7", |
| "oss_edition": True, |
| "demo_display": { |
| "real_arf_version": "3.3.7", |
| "true_oss_used": True, |
| "enterprise_simulated": False, |
| "agent_details": { |
| "detection_confidence": anomaly.confidence, |
| "similar_incidents_count": len(similar_incidents), |
| "decision_confidence": healing_intent["confidence"], |
| "healing_action": healing_intent["action"], |
| } |
| } |
| } |
| |
| logger.info(f"True ARF OSS: Analysis complete for {scenario_name} " |
| f"({analysis_time_ms:.1f}ms)") |
| return result |
| |
| except Exception as e: |
| logger.error(f"True ARF OSS analysis failed: {e}", exc_info=True) |
| return { |
| "status": "error", |
| "error": str(e), |
| "scenario": scenario_name, |
| "analysis_time_ms": (time.time() - start_time) * 1000, |
| "arf_version": "3.3.7", |
| "oss_edition": True, |
| "demo_display": { |
| "real_arf_version": "3.3.7", |
| "true_oss_used": True, |
| "error": str(e)[:100] |
| } |
| } |
| |
| def get_agent_stats(self) -> Dict[str, Any]: |
| """Get statistics from all agents""" |
| return { |
| "detection": self.detection_agent.get_detection_stats(), |
| "recall": self.recall_agent.get_memory_stats(), |
| "decision": { |
| "action_success_rates": self.decision_agent.action_success_rates |
| }, |
| "oss_available": self.oss_available, |
| "arf_version": "3.3.7", |
| } |
|
|
| |
| |
| |
|
|
| async def get_true_arf_oss(config: Optional[Dict[str, Any]] = None) -> TrueARFOSS: |
| """ |
| Factory function for TrueARFOSS |
| |
| This is the function that TrueARF337Orchestrator expects to call. |
| |
| Args: |
| config: Optional configuration |
| |
| Returns: |
| TrueARFOSS instance |
| """ |
| return TrueARFOSS(config) |
|
|
| |
| |
| |
|
|
| async def get_mock_true_arf_oss(config: Optional[Dict[str, Any]] = None) -> TrueARFOSS: |
| """ |
| Mock version for when dependencies are missing |
| """ |
| logger.warning("Using mock TrueARFOSS - real implementation not available") |
| |
| class MockTrueARFOSS: |
| def __init__(self, config): |
| self.config = config or {} |
| self.oss_available = False |
| |
| async def analyze_scenario(self, scenario_name, scenario_data): |
| return { |
| "status": "mock", |
| "scenario": scenario_name, |
| "message": "Mock analysis - install true ARF OSS v3.3.7 for real analysis", |
| "demo_display": { |
| "real_arf_version": "mock", |
| "true_oss_used": False, |
| "enterprise_simulated": False, |
| } |
| } |
| |
| return MockTrue |