# app.py - FIXED VERSION WITH PROPER DATA TYPES """ ARF OSS v3.3.9 Demo Application Fixed to return correct data types for UI components: - Plotly figures for visualizations - JSON/dict for analysis functions """ import os import json import plotly.graph_objects as go import plotly.express as px import pandas as pd import numpy as np from datetime import datetime, timedelta import gradio as gr import random import logging from typing import Dict, Any, Optional, Tuple # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # ARF OSS imports try: from arf_core.monitoring import TelemetryCollector from arf_core.analysis import ReliabilityAnalyzer from arf_core.healing import AutoHealingEngine ARF_OSS_AVAILABLE = True logger.info("✅ ARF OSS v3.3.9 detected") except ImportError: ARF_OSS_AVAILABLE = False logger.warning("⚠️ ARF OSS components not found, using mock implementations") # Configuration DEMO_CONFIG = { "version": "3.3.9", "mode": "demo", "show_boundaries": True, "use_true_arf": True } # =========================================== # FIXED VISUALIZATION FUNCTIONS - RETURN PLOTLY FIGURES # =========================================== def create_simple_telemetry_plot(scenario_name: str, is_real_arf: bool = True) -> go.Figure: """ Create telemetry plot using Plotly - returns Plotly figure object FIXED: Returns Plotly figure instead of HTML string """ try: # Generate sample telemetry data times = pd.date_range(start=datetime.now() - timedelta(minutes=10), end=datetime.now(), periods=60) # Different patterns based on scenario if "Cache" in scenario_name: normal_values = np.random.normal(30, 5, 30).tolist() anomaly_values = np.random.normal(85, 10, 30).tolist() data = normal_values + anomaly_values title = f"Cache Hit Rate: {scenario_name}" y_label = "Hit Rate (%)" threshold = 75 elif "Database" in scenario_name: normal_values = np.random.normal(15, 3, 30).tolist() anomaly_values = np.random.normal(95, 5, 30).tolist() data = normal_values + anomaly_values title = f"Database Connections: {scenario_name}" y_label = "Connections (%)" threshold = 90 elif "Kubernetes" in scenario_name: normal_values = np.random.normal(40, 8, 30).tolist() anomaly_values = np.random.normal(95, 2, 30).tolist() data = normal_values + anomaly_values title = f"Memory Usage: {scenario_name}" y_label = "Memory (%)" threshold = 85 else: normal_values = np.random.normal(50, 10, 30).tolist() anomaly_values = np.random.normal(90, 5, 30).tolist() data = normal_values + anomaly_values title = f"System Metrics: {scenario_name}" y_label = "Metric (%)" threshold = 80 # Create Plotly figure fig = go.Figure() # Add normal region fig.add_trace(go.Scatter( x=times[:30], y=data[:30], mode='lines', name='Normal', line=dict(color='#10b981', width=3), fill='tozeroy', fillcolor='rgba(16, 185, 129, 0.1)' )) # Add anomaly region fig.add_trace(go.Scatter( x=times[30:], y=data[30:], mode='lines', name='Anomaly', line=dict(color='#ef4444', width=3), fill='tozeroy', fillcolor='rgba(239, 68, 68, 0.1)' )) # Add threshold line fig.add_hline(y=threshold, line_dash="dash", line_color="#f59e0b", annotation_text="Threshold", annotation_position="top right") # Update layout fig.update_layout( title=dict( text=title, font=dict(size=18, color='#1e293b'), x=0.5 ), xaxis_title="Time", yaxis_title=y_label, height=300, margin=dict(l=20, r=20, t=40, b=20), plot_bgcolor='white', showlegend=True, legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 ), xaxis=dict( showgrid=True, gridcolor='#f1f5f9', gridwidth=1 ), yaxis=dict( showgrid=True, gridcolor='#f1f5f9', gridwidth=1, range=[0, 100] ) ) # Add ARF badge based on mode if is_real_arf: fig.add_annotation( x=0.01, y=0.99, xref="paper", yref="paper", text="✅ ARF OSS v3.3.9", showarrow=False, font=dict(size=10, color="#10b981"), bgcolor="rgba(16, 185, 129, 0.1)", borderpad=4 ) else: fig.add_annotation( x=0.01, y=0.99, xref="paper", yref="paper", text="⚠️ Mock Mode", showarrow=False, font=dict(size=10, color="#f59e0b"), bgcolor="rgba(245, 158, 11, 0.1)", borderpad=4 ) return fig except Exception as e: logger.error(f"Error creating telemetry plot: {e}") # Return empty figure as fallback fig = go.Figure() fig.update_layout( title="Error loading telemetry", height=300, plot_bgcolor='white' ) return fig def create_simple_impact_plot(scenario_name: str, is_real_arf: bool = True) -> go.Figure: """ Create impact gauge chart using Plotly - returns Plotly figure object FIXED: Returns Plotly figure instead of HTML string """ try: # Impact values based on scenario impact_values = { "Cache Miss Storm": 8500, "Database Connection Pool Exhaustion": 4200, "Kubernetes Memory Leak": 5500, "API Rate Limit Storm": 3800, "Network Partition": 12000, "Storage I/O Saturation": 6800 } impact = impact_values.get(scenario_name, 5000) savings = int(impact * 0.85) # Create gauge chart fig = go.Figure(go.Indicator( mode = "gauge+number+delta", value = impact, domain = {'x': [0, 1], 'y': [0, 1]}, title = {'text': f"Revenue Impact: {scenario_name}", 'font': {'size': 16}}, delta = {'reference': 0, 'position': "top", 'prefix': "Potential loss: $"}, number = {'prefix': "$", 'suffix': "/hour", 'font': {'size': 28}}, gauge = { 'axis': {'range': [None, impact * 1.2], 'tickwidth': 1, 'tickcolor': "darkblue"}, 'bar': {'color': "#ef4444"}, 'bgcolor': "white", 'borderwidth': 2, 'bordercolor': "gray", 'steps': [ {'range': [0, impact * 0.3], 'color': '#10b981'}, {'range': [impact * 0.3, impact * 0.7], 'color': '#f59e0b'}, {'range': [impact * 0.7, impact], 'color': '#ef4444'} ], 'threshold': { 'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': impact } } )) # Add savings annotation fig.add_annotation( x=0.5, y=0.2, text=f"ARF saves: ${savings:,}/hour", showarrow=False, font=dict(size=14, color="#10b981", weight="bold"), bgcolor="rgba(16, 185, 129, 0.1)", bordercolor="#10b981", borderwidth=2, borderpad=4 ) # Update layout fig.update_layout( height=400, margin=dict(l=20, r=20, t=60, b=20), paper_bgcolor='white', font=dict(color='#1e293b') ) # Add ARF mode indicator if is_real_arf: fig.add_annotation( x=0.99, y=0.99, xref="paper", yref="paper", text="✅ Real ARF Analysis", showarrow=False, font=dict(size=10, color="#10b981"), bgcolor="rgba(16, 185, 129, 0.1)", borderpad=4, xanchor="right" ) else: fig.add_annotation( x=0.99, y=0.99, xref="paper", yref="paper", text="⚠️ Mock Analysis", showarrow=False, font=dict(size=10, color="#f59e0b"), bgcolor="rgba(245, 158, 11, 0.1)", borderpad=4, xanchor="right" ) return fig except Exception as e: logger.error(f"Error creating impact plot: {e}") # Return empty gauge as fallback fig = go.Figure(go.Indicator( mode="gauge", value=0, title="Error loading impact data" )) fig.update_layout(height=400) return fig def create_empty_plot(title: str, is_real_arf: bool = True) -> go.Figure: """ Create empty placeholder plot - returns Plotly figure object FIXED: Returns Plotly figure instead of HTML string """ fig = go.Figure() # Add text annotation fig.add_annotation( x=0.5, y=0.5, text=title, showarrow=False, font=dict(size=16, color="#64748b"), xref="paper", yref="paper" ) # Add mode indicator if is_real_arf: mode_text = "✅ ARF OSS v3.3.9" color = "#10b981" else: mode_text = "⚠️ Mock Mode" color = "#f59e0b" fig.add_annotation( x=0.5, y=0.4, text=mode_text, showarrow=False, font=dict(size=12, color=color), xref="paper", yref="paper" ) fig.update_layout( title=dict( text="Visualization Placeholder", font=dict(size=14, color="#94a3b8") ), height=300, plot_bgcolor='white', xaxis=dict(visible=False), yaxis=dict(visible=False), margin=dict(l=20, r=20, t=40, b=20) ) return fig def create_timeline_plot(scenario_name: str, is_real_arf: bool = True) -> go.Figure: """ Create timeline comparison plot - returns Plotly figure object FIXED: Returns Plotly figure instead of HTML string """ # Timeline data stages = ["Detection", "Analysis", "Response", "Resolution", "Verification"] # Without ARF (manual) manual_times = [5, 15, 20, 45, 10] # With ARF arf_times = [0.75, 2, 5, 12, 2] fig = go.Figure() # Add manual timeline fig.add_trace(go.Bar( name='Without ARF', x=manual_times, y=stages, orientation='h', marker_color='#ef4444', text=[f'{t}min' for t in manual_times], textposition='auto', )) # Add ARF timeline fig.add_trace(go.Bar( name='With ARF', x=arf_times, y=stages, orientation='h', marker_color='#10b981', text=[f'{t}min' for t in arf_times], textposition='auto', )) # Calculate savings total_manual = sum(manual_times) total_arf = sum(arf_times) savings = total_manual - total_arf savings_percent = int((savings / total_manual) * 100) # Update layout fig.update_layout( title=dict( text=f"Timeline Comparison: {scenario_name}", font=dict(size=18, color='#1e293b'), x=0.5 ), barmode='group', height=400, xaxis_title="Time (minutes)", yaxis_title="Stage", plot_bgcolor='white', showlegend=True, legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 ), margin=dict(l=20, r=20, t=60, b=20) ) # Add savings annotation fig.add_annotation( x=0.5, y=1.12, xref="paper", yref="paper", text=f"ARF saves {savings_percent}% ({savings} minutes)", showarrow=False, font=dict(size=14, color="#10b981", weight="bold"), bgcolor="rgba(16, 185, 129, 0.1)", borderpad=4 ) # Add ARF mode indicator if is_real_arf: fig.add_annotation( x=0.01, y=1.12, xref="paper", yref="paper", text="✅ ARF OSS v3.3.9", showarrow=False, font=dict(size=10, color="#10b981"), bgcolor="rgba(16, 185, 129, 0.1)", borderpad=4 ) return fig # =========================================== # FIXED ANALYSIS FUNCTIONS - RETURN JSON/DICT # =========================================== def run_true_arf_analysis(scenario_name: str) -> Dict[str, Any]: """ Run ARF analysis - returns JSON/dict instead of HTML FIXED: Returns dict for gr.JSON() component """ try: # Simulate analysis time import time time.sleep(0.5) # Analysis results based on scenario analysis_results = { "status": "success", "scenario": scenario_name, "timestamp": datetime.now().isoformat(), "analysis": { "detection_time": "45 seconds", "confidence": "94%", "similar_incidents_found": 3, "pattern_match": "87% similarity", "severity": "HIGH", "component_affected": "Redis Cache Cluster" if "Cache" in scenario_name else "Database Pool" if "Database" in scenario_name else "Kubernetes Pod", "affected_users": 45000, "revenue_risk_per_hour": 8500 if "Cache" in scenario_name else 4200 }, "agents": { "detection": { "status": "active", "confidence": 94, "data_points_analyzed": 1245, "anomaly_score": 0.92 }, "recall": { "status": "active", "similar_incidents": 3, "best_match_similarity": 87, "previous_success_rate": "92%" }, "decision": { "status": "active", "healing_intent_created": True, "confidence": 89, "recommended_action": "Scale Redis cluster from 3 to 5 nodes", "estimated_recovery": "12 minutes", "safety_check": "passed" } }, "healing_intent": { "action": "Scale Redis cluster from 3 to 5 nodes", "confidence": 89, "estimated_impact": "Reduce MTTR from 45min to 12min", "cost_savings": 6375, "safety_guarantees": ["rollback_available", "atomic_execution", "resource_isolation"] }, "boundary_note": "OSS analysis complete. HealingIntent created. Requires Enterprise license for execution.", "arf_version": "3.3.9", "license_required": "Enterprise for execution" } # Check if real ARF is available if ARF_OSS_AVAILABLE: analysis_results["arf_mode"] = "real" analysis_results["arf_components"] = ["TelemetryCollector", "ReliabilityAnalyzer", "AutoHealingEngine"] else: analysis_results["arf_mode"] = "mock" analysis_results["arf_components"] = ["simulated"] logger.info(f"✅ ARF analysis completed for {scenario_name}") return analysis_results except Exception as e: logger.error(f"Error in ARF analysis: {e}") return { "status": "error", "error": str(e), "scenario": scenario_name, "timestamp": datetime.now().isoformat(), "arf_version": "3.3.9", "recommendation": "Check ARF installation: pip install agentic-reliability-framework==3.3.9" } def execute_enterprise_healing(scenario_name: str, approval_required: bool = False, mcp_mode: str = "advisory") -> Dict[str, Any]: """ Execute enterprise healing - returns JSON/dict instead of HTML FIXED: Returns dict for gr.JSON() component """ try: # Simulate execution time import time time.sleep(0.7) # Calculate impact based on scenario impact_values = { "Cache Miss Storm": 8500, "Database Connection Pool Exhaustion": 4200, "Kubernetes Memory Leak": 5500, "API Rate Limit Storm": 3800, "Network Partition": 12000, "Storage I/O Saturation": 6800 } impact = impact_values.get(scenario_name, 5000) savings = int(impact * 0.85) # Execution results execution_results = { "status": "success", "scenario": scenario_name, "execution_timestamp": datetime.now().isoformat(), "mode": mcp_mode, "approval_required": approval_required, "approval_status": "auto_approved" if not approval_required else "pending_human", "execution": { "action_executed": "Scale Redis cluster from 3 to 5 nodes", "execution_time": "2 minutes", "start_time": (datetime.now() - timedelta(minutes=2)).isoformat(), "end_time": datetime.now().isoformat(), "status": "completed", "rollback_available": True, "atomic_guarantee": True }, "results": { "recovery_time": "12 minutes", "manual_comparison": "45 minutes", "time_saved": "33 minutes (73%)", "cost_saved": f"${savings:,}", "users_protected": 45000, "services_restored": 12, "error_rate_reduction": "94%", "latency_improvement": "67%" }, "safety_features": { "rollback_guarantee": "100%", "mcp_validation": "passed", "resource_isolation": "enforced", "blast_radius": "2 services", "dry_run_completed": True, "safety_checks_passed": 8 }, "learning": { "pattern_added_to_memory": True, "similarity_score": 87, "success_marked": True, "next_improvement": "Optimize cache eviction policy" }, "enterprise_features": { "autonomous_execution": True, "mcp_integration": True, "audit_trail": True, "compliance_logging": True, "multi_cloud_support": True }, "boundary_context": "Enterprise execution simulated. Real execution requires ARF Enterprise license.", "arf_version": "3.3.9", "enterprise_required": True, "license_status": "simulated" # Changed from "required" to be more accurate } # Add approval-specific info if approval_required: execution_results["human_workflow"] = { "step": "awaiting_approval", "approver": "system_admin", "timeout": "5 minutes", "escalation_path": "senior_engineer" } logger.info(f"✅ Enterprise healing executed for {scenario_name}") return execution_results except Exception as e: logger.error(f"Error in enterprise execution: {e}") return { "status": "error", "error": str(e), "scenario": scenario_name, "timestamp": datetime.now().isoformat(), "recommendation": "Enterprise license required for execution", "contact": "sales@arf.dev" } # =========================================== # FIXED SCENARIO UPDATE FUNCTION # =========================================== def update_scenario_display(scenario_name: str) -> Tuple[Any, go.Figure, go.Figure, go.Figure]: """ Update scenario display - returns Plotly figures, not HTML strings FIXED: Returns tuple of (scenario_card_html, telemetry_fig, impact_fig, timeline_fig) Note: First element is still HTML for the scenario card, but visualizations are Plotly figures """ try: # Get scenario data scenarios = { "Cache Miss Storm": { "component": "Redis Cache Cluster", "severity": "HIGH", "business_impact": {"revenue_loss_per_hour": 8500}, "metrics": {"affected_users": 45000} }, "Database Connection Pool Exhaustion": { "component": "PostgreSQL Database", "severity": "HIGH", "business_impact": {"revenue_loss_per_hour": 4200}, "metrics": {"affected_users": 28000} }, "Kubernetes Memory Leak": { "component": "Kubernetes Worker Node", "severity": "MEDIUM", "business_impact": {"revenue_loss_per_hour": 5500}, "metrics": {"affected_users": 32000} } } scenario = scenarios.get(scenario_name, { "component": "Unknown System", "severity": "MEDIUM", "business_impact": {"revenue_loss_per_hour": 5000}, "metrics": {"affected_users": 25000} }) # Create scenario card HTML (this is still HTML for the gr.HTML component) severity_colors = { "HIGH": "#ef4444", "MEDIUM": "#f59e0b", "LOW": "#10b981" } severity_color = severity_colors.get(scenario["severity"], "#64748b") scenario_card_html = f"""
{str(e)}