File size: 5,009 Bytes
ba59239 5c55cb5 e94f0ea ba59239 644fff6 3e50ac5 644fff6 775c77d 82009c8 775c77d 644fff6 775c77d 1eb0dc5 644fff6 e94f0ea 775c77d 644fff6 775c77d 644fff6 ba59239 775c77d 1eb0dc5 775c77d 644fff6 775c77d 644fff6 775c77d a81efd4 775c77d e94f0ea 775c77d 82009c8 775c77d d97b7c8 775c77d e94f0ea 775c77d 644fff6 775c77d 644fff6 775c77d 644fff6 775c77d 644fff6 775c77d 644fff6 775c77d 644fff6 775c77d 644fff6 775c77d 644fff6 775c77d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | import os
import json
import numpy as np
import gradio as gr
import requests
import pandas as pd
import datetime
from typing import List, Dict, Any
import hashlib
import asyncio
# Import enhanced modules
from models import ReliabilityEvent, EventSeverity, AnomalyResult, HealingAction
from healing_policies import PolicyEngine
from agent_orchestrator import OrchestrationManager, AgentSpecialization
# === Configuration ===
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
HF_API_URL = "https://router.huggingface.co/hf-inference/v1/completions"
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
# === Initialize Enhanced Components ===
policy_engine = PolicyEngine()
orchestration_manager = OrchestrationManager()
events_history: List[ReliabilityEvent] = []
# [Keep existing FAISS setup and BusinessImpactCalculator...]
class EnhancedReliabilityEngine:
def __init__(self):
self.performance_metrics = {
'total_incidents_processed': 0,
'multi_agent_analyses': 0,
'average_processing_time': 0.0
}
async def process_event_enhanced(self, component: str, latency: float, error_rate: float,
throughput: float = 1000, cpu_util: float = None,
memory_util: float = None) -> Dict[str, Any]:
"""Enhanced event processing with multi-agent orchestration"""
start_time = asyncio.get_event_loop().time()
# Create event
event = ReliabilityEvent(
component=component,
latency_p99=latency,
error_rate=error_rate,
throughput=throughput,
cpu_util=cpu_util,
memory_util=memory_util,
upstream_deps=["auth-service", "database"] if component == "api-service" else []
)
# Multi-agent analysis
agent_analysis = await orchestration_manager.orchestrate_analysis(event)
# Policy evaluation
healing_actions = policy_engine.evaluate_policies(event)
# Business impact
business_impact = business_calculator.calculate_impact(event)
# Update metrics
processing_time = asyncio.get_event_loop().time() - start_time
self._update_performance_metrics(processing_time)
# Prepare comprehensive result
result = {
"timestamp": event.timestamp,
"component": component,
"latency_p99": latency,
"error_rate": error_rate,
"throughput": throughput,
"status": "ANOMALY" if agent_analysis.get('incident_summary', {}).get('anomaly_confidence', 0) > 0.5 else "NORMAL",
"multi_agent_analysis": agent_analysis,
"healing_actions": [action.value for action in healing_actions],
"business_impact": business_impact,
"processing_metadata": {
"processing_time_seconds": round(processing_time, 3),
"agents_used": agent_analysis.get('agent_metadata', {}).get('participating_agents', []),
"analysis_confidence": agent_analysis.get('incident_summary', {}).get('anomaly_confidence', 0)
}
}
events_history.append(event)
return result
# Initialize enhanced engine
enhanced_engine = EnhancedReliabilityEngine()
# [Keep existing UI setup, but enhance the submission function...]
async def submit_event_enhanced(component, latency, error_rate, throughput, cpu_util, memory_util):
"""Enhanced event submission with async processing"""
try:
# Convert inputs
latency = float(latency)
error_rate = float(error_rate)
throughput = float(throughput) if throughput else 1000
cpu_util = float(cpu_util) if cpu_util else None
memory_util = float(memory_util) if memory_util else None
# Process with enhanced engine
result = await enhanced_engine.process_event_enhanced(
component, latency, error_rate, throughput, cpu_util, memory_util
)
# [Keep existing table formatting...]
# Enhanced output formatting
status_emoji = "🚨" if result["status"] == "ANOMALY" else "✅"
output_msg = f"{status_emoji} {result['status']}"
# Add multi-agent insights
if "multi_agent_analysis" in result:
analysis = result["multi_agent_analysis"]
output_msg += f"\n🎯 Confidence: {analysis.get('incident_summary', {}).get('anomaly_confidence', 0)*100:.1f}%"
if analysis.get('recommended_actions'):
output_msg += f"\n💡 Insights: {', '.join(analysis['recommended_actions'][:2])}"
# [Keep existing business impact and healing actions display...]
return (output_msg, table_output)
except Exception as e:
return f"❌ Error processing event: {str(e)}", gr.Dataframe(value=[]) |