""" Gradio-only UI components for ARF Ensures full compatibility with app.py Updated with proper imports and error handling NOW WITH REAL ARF INSTALLATION DETECTION UPDATED: Added realism panel integration for Tab 1 """ import gradio as gr from typing import Dict, List, Any import logging logger = logging.getLogger(__name__) # Try to import scenarios from registry first try: from config.scenario_registry import ScenarioRegistry INCIDENT_SCENARIOS = ScenarioRegistry.load_scenarios() logger.info(f"Loaded {len(INCIDENT_SCENARIOS)} scenarios from registry") except ImportError: logger.warning("Scenario registry not available, falling back to demo scenarios") from demo.scenarios import INCIDENT_SCENARIOS # ----------------------------- # Header & Status - UPDATED WITH INSTALLATION CHECK # ----------------------------- def create_header(version="3.3.9") -> gr.HTML: return gr.HTML(f"""

v{version} (OSS + Enterprise Edition)

Production-grade multi-agent AI for autonomous system reliability intelligence

🏗️ Architecture: OSS advises → Enterprise executes

""") def create_status_bar() -> gr.HTML: return gr.HTML("""

✅ System Online ✅ ARF OSS v3.3.9 🏢 Enterprise Edition 📈 ROI: 5.2×

""") # ----------------------------- # Tab 1: Live Incident Demo - UPDATED WITH REALISM PANEL # ----------------------------- def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple: """ Create an expressive, comprehensive incident demo tab for ARF. Shows the complete OSS analysis → Enterprise execution workflow. UPDATED: Now includes realism panel for enterprise-seasoned SRE experience """ # Get the default scenario data default_scenario_data = scenarios.get(default_scenario, {}) business_impact = default_scenario_data.get("business_impact", {}) metrics = default_scenario_data.get("metrics", {}) with gr.Row(): # Left Column: Scenario Selection & Live Visualization with gr.Column(scale=1, variant="panel") as left_col: # Scenario Selection with rich preview scenario_dropdown = gr.Dropdown( choices=list(scenarios.keys()), value=default_scenario, label="🎯 Select Incident Scenario", info="Choose a production incident to analyze", interactive=True, container=False ) # Scenario Card with rich information - USING INLINE STYLES scenario_card = gr.HTML(f"""

🚨 {default_scenario}

{default_scenario_data.get('severity', 'HIGH')}

Component: {default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()}

Affected Users: {metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'}

Revenue Risk: ${business_impact.get('revenue_loss_per_hour', 0):,}/hour

Detection Time: 45 seconds (ARF AI)

{default_scenario_data.get('component', 'unknown').split('_')[0]} {default_scenario_data.get('severity', 'high').lower()} production incident

""") # Visualization section - USING gr.Plot() FOR PLOTLY FIGURES with gr.Row(): with gr.Column(scale=1): telemetry_header = gr.Markdown("### 📈 Live Telemetry") # This expects a Plotly figure from app.py telemetry_viz = gr.Plot( label="", show_label=False, elem_id="telemetry_plot" ) with gr.Column(scale=1): impact_header = gr.Markdown("### 💰 Business Impact") impact_viz = gr.Plot( label="", show_label=False, elem_id="impact_plot" ) # Middle Column: Agent Workflow with gr.Column(scale=2, variant="panel") as middle_col: # Agent Workflow Header workflow_header = gr.Markdown("## 🔄 ARF Agent Workflow") workflow_subheader = gr.Markdown("### How ARF transforms incidents into autonomous healing") # Agent Status Cards - USING INLINE STYLES with gr.Row(): detection_agent = gr.HTML("""

🕵️‍♂️

Detection Agent

Click "Run OSS Analysis" to activate

Status: Inactive

WAITING

""") recall_agent = gr.HTML("""

🧠

Recall Agent

Click "Run OSS Analysis" to activate

Status: Inactive

WAITING

""") decision_agent = gr.HTML("""

🎯

Decision Agent

Click "Run OSS Analysis" to activate

Status: Inactive

WAITING

""") # OSS vs Enterprise Boundary Visualization boundary_header = gr.Markdown("### 🎭 OSS vs Enterprise: The Safety Boundary") with gr.Row(): oss_section = gr.HTML("""

🆓

OSS Edition

Apache 2.0

Analysis & Advisory Only - No execution, permanently safe

📝 Healing Intent Created

94% confidence

Action: Scale Redis cluster from 3 to 5 nodes

Pattern Match: Similar incident resolved with scaling (87% success rate)

Safety Check: ✅ Passed (blast radius: 2 services)

Estimated Impact: Reduce MTTR from 45min to 12min

🚫 OSS STOPS HERE - No execution

""") enterprise_section = gr.HTML("""

💰

Enterprise Edition

Commercial

Full Execution & Learning - Autonomous healing with safety guarantees

⚡ Ready to Execute

AUTONOMOUS

Mode: Autonomous (Requires Enterprise license)

Expected Recovery: 12 minutes (vs 45 min manual)

Cost Saved: $6,375

Users Protected: 45,000 → 0 impacted

✅ Enterprise executes with MCP safety

""") # Execution Controls with gr.Row(): with gr.Column(scale=1): oss_btn = gr.Button( "🆓 Run OSS Analysis", variant="secondary", size="lg" ) oss_info = gr.Markdown("*Free, open-source analysis*") with gr.Column(scale=1): enterprise_btn = gr.Button( "💰 Execute Enterprise Healing", variant="primary", size="lg" ) enterprise_info = gr.Markdown("*Requires Enterprise license*") # Mode Selection & Safety Controls with gr.Row(): with gr.Column(scale=1): approval_toggle = gr.CheckboxGroup( choices=["👤 Require Human Approval"], label="Safety Controls", value=[], info="Toggle human oversight" ) with gr.Column(scale=2): mcp_mode = gr.Radio( choices=["🛡️ Advisory (OSS Only)", "👥 Approval", "⚡ Autonomous"], value="🛡️ Advisory (OSS Only)", label="MCP Safety Mode", info="Control execution safety level", interactive=True ) # Timeline visualization timeline_header = gr.Markdown("### ⏰ Incident Timeline") timeline_viz = gr.Plot( label="", show_label=False, elem_id="timeline_plot" ) # ============ NEW: Realism Panel Section ============ realism_header = gr.Markdown("### 🎭 Realism: Trade-offs & Uncertainty") realism_panel = gr.HTML( value="""

🔧

Realism Panel

Select a scenario to see ranked actions, risks, and trade-offs

""", elem_id="realism_panel" ) # Right Column: Results & Metrics with gr.Column(scale=1, variant="panel") as right_col: # Real-time Metrics Dashboard metrics_header = gr.Markdown("## 📊 Performance Metrics") # Metric Cards Grid - USING INLINE STYLES with gr.Row(): detection_time = gr.HTML("""

⏱️

Detection Time

45s

↓ 89% faster than average

""") mttr = gr.HTML("""

⚡

Mean Time to Resolve

12m

↓ 73% faster than manual

""") with gr.Row(): auto_heal = gr.HTML("""

🤖

Auto-Heal Rate

81.7%

↑ 5.4× industry average

""") savings = gr.HTML(f"""

💰

Cost Saved

${int(business_impact.get('revenue_loss_per_hour', 8500) * 0.85 / 1000):.1f}K

Per incident avoided

""") # Results Display Areas oss_results_header = gr.Markdown("### 🆓 OSS Analysis Results") oss_results_display = gr.JSON( label="", value={ "status": "Analysis Pending", "agents": ["Detection", "Recall", "Decision"], "mode": "Advisory Only", "action": "Generate HealingIntent" }, height=200 ) enterprise_results_header = gr.Markdown("### 💰 Enterprise Results") enterprise_results_display = gr.JSON( label="", value={ "status": "Execution Pending", "requires_license": True, "available_modes": ["Approval", "Autonomous"], "expected_outcome": "12m MTTR, $6.3K saved" }, height=200 ) # Approval Status - USING INLINE STYLES approval_display = gr.HTML("""

👤 Human Approval Status

Not Required

Current Mode: Advisory (OSS Only)

Switch to "Approval" mode to enable human-in-the-loop workflows

1. ARF generates intent

2. Human reviews & approves

3. ARF executes safely

""") # Demo Actions demo_btn = gr.Button( "▶️ Run Complete Demo Walkthrough", variant="secondary", size="lg" ) demo_info = gr.Markdown("*Experience the full ARF workflow from detection to resolution*") return ( # Left column returns scenario_dropdown, scenario_card, telemetry_viz, impact_viz, # Middle column returns (WITH REALISM PANEL) workflow_header, detection_agent, recall_agent, decision_agent, oss_section, enterprise_section, oss_btn, enterprise_btn, approval_toggle, mcp_mode, timeline_viz, realism_panel, # Right column returns detection_time, mttr, auto_heal, savings, oss_results_display, enterprise_results_display, approval_display, demo_btn ) # ----------------------------- # Tab 2: Business ROI - Updated # ----------------------------- def create_tab2_business_roi(scenarios=INCIDENT_SCENARIOS) -> tuple: dashboard_output = gr.Plot(label="Executive Dashboard", show_label=True) roi_scenario_dropdown = gr.Dropdown( choices=list(scenarios.keys()), value="Cache Miss Storm", label="Scenario for ROI Analysis", info="Select the primary incident type for ROI calculation" ) monthly_slider = gr.Slider( minimum=1, maximum=50, value=15, step=1, label="Monthly Incidents", info="Average number of incidents per month" ) team_slider = gr.Slider( minimum=1, maximum=50, value=5, step=1, label="Team Size", info="Number of engineers on reliability team" ) calculate_btn = gr.Button("📊 Calculate Comprehensive ROI", variant="primary", size="lg") roi_output = gr.JSON(label="ROI Analysis Results", value={}) roi_chart = gr.Plot(label="ROI Visualization") return (dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider, calculate_btn, roi_output, roi_chart) # ----------------------------- # Tab 3: Enterprise Features - UPDATED WITH INSTALLATION STATUS # ----------------------------- def create_tab3_enterprise_features() -> tuple: # Get installation status try: from app import get_installation_status installation = get_installation_status() license_data = { "status": "✅ OSS Installed" if installation["oss_installed"] else "⚠️ OSS Not Installed", "oss_version": installation["oss_version"] or "Not installed", "enterprise_installed": installation["enterprise_installed"], "enterprise_version": installation["enterprise_version"] or "Not installed", "execution_allowed": installation["execution_allowed"], "recommendations": installation["recommendations"], "badges": installation["badges"] } # Update features table based on installation features_data = [ ["ARF OSS Package", "✅ Installed" if installation["oss_installed"] else "❌ Not Installed", "OSS"], ["Self-Healing Core", "✅ Active", "Enterprise"], ["RAG Graph Memory", "✅ Active", "Both"], ["Predictive Analytics", "🔒 Enterprise" if not installation["enterprise_installed"] else "✅ Available", "Enterprise"], ["Audit Trail", "🔒 Enterprise" if not installation["enterprise_installed"] else "✅ Available", "Enterprise"], ["Compliance (SOC2)", "🔒 Enterprise" if not installation["enterprise_installed"] else "✅ Available", "Enterprise"] ] except ImportError: # Fallback if installation check fails license_data = { "status": "⚠️ Installation Check Failed", "oss_version": "Unknown", "enterprise_installed": False, "recommendations": ["Run installation check"] } features_data = [ ["Self-Healing Core", "✅ Active", "Enterprise"], ["RAG Graph Memory", "✅ Active", "Both"], ["Predictive Analytics", "🔒 Enterprise", "Enterprise"], ["Audit Trail", "🔒 Enterprise", "Enterprise"], ["Compliance (SOC2)", "🔒 Enterprise", "Enterprise"], ["Multi-Cloud", "🔒 Enterprise", "Enterprise"] ] license_display = gr.JSON( value=license_data, label="📦 Package Installation Status" ) validate_btn = gr.Button("🔍 Validate Installation", variant="secondary") trial_btn = gr.Button("🆓 Start 30-Day Trial", variant="secondary") upgrade_btn = gr.Button("🚀 Upgrade to Enterprise", variant="primary") mcp_mode = gr.Dropdown( choices=["advisory", "approval", "autonomous"], value="advisory", label="MCP Safety Mode" ) # Initial mode info mcp_mode_info = gr.JSON( value={ "current_mode": "advisory", "description": "OSS Edition - Analysis only, no execution", "features": ["Incident analysis", "RAG similarity", "HealingIntent creation"], "package": "agentic-reliability-framework==3.3.7", "license": "Apache 2.0" }, label="Mode Details" ) integrations_data = [ ["Prometheus", "✅ Connected", "Monitoring"], ["Grafana", "✅ Connected", "Visualization"], ["Slack", "🔒 Enterprise", "Notifications"], ["PagerDuty", "🔒 Enterprise", "Alerting"], ["Jira", "🔒 Enterprise", "Ticketing"], ["Datadog", "🔒 Enterprise", "Monitoring"] ] features_table = gr.Dataframe( headers=["Feature", "Status", "Edition"], value=features_data, label="Feature Comparison" ) integrations_table = gr.Dataframe( headers=["Integration", "Status", "Type"], value=integrations_data, label="Integration Status" ) return (license_display, validate_btn, trial_btn, upgrade_btn, mcp_mode, mcp_mode_info, features_table, integrations_table) # ----------------------------- # Tab 4: Audit Trail # ----------------------------- def create_tab4_audit_trail() -> tuple: refresh_btn = gr.Button("🔄 Refresh Audit Trail", variant="secondary") clear_btn = gr.Button("🗑️ Clear History", variant="secondary") export_btn = gr.Button("📥 Export as JSON", variant="primary") execution_headers = ["Time", "Scenario", "Mode", "Status", "Savings", "Details"] incident_headers = ["Time", "Component", "Scenario", "Severity", "Status"] execution_table = gr.Dataframe( headers=execution_headers, value=[], label="Execution History" ) incident_table = gr.Dataframe( headers=incident_headers, value=[], label="Incident History" ) export_text = gr.JSON( value={"status": "Export ready"}, label="Export Data" ) return (refresh_btn, clear_btn, export_btn, execution_table, incident_table, export_text) # ----------------------------- # Tab 5: Learning Engine # ----------------------------- def create_tab5_learning_engine() -> tuple: learning_graph = gr.Plot(label="RAG Memory Graph") graph_type = gr.Dropdown( choices=["Incident Patterns", "Action-Outcome Chains", "System Dependencies"], value="Incident Patterns", label="Graph Type" ) show_labels = gr.Checkbox(label="Show Labels", value=True) search_query = gr.Textbox(label="Search Patterns", placeholder="Enter pattern to search...") search_btn = gr.Button("🔍 Search Patterns", variant="secondary") clear_btn_search = gr.Button("🗑️ Clear Search", variant="secondary") search_results = gr.JSON( value={"status": "Ready for search"}, label="Search Results" ) stats_display = gr.JSON( value={"patterns": 42, "incidents": 156, "success_rate": "87.3%"}, label="Learning Statistics" ) patterns_display = gr.JSON( value={"common_patterns": ["cache_storm", "db_pool", "memory_leak"]}, label="Pattern Library" ) performance_display = gr.JSON( value={"accuracy": "94.2%", "recall": "89.7%", "precision": "92.1%"}, label="Agent Performance" ) return (learning_graph, graph_type, show_labels, search_query, search_btn, clear_btn_search, search_results, stats_display, patterns_display, performance_display) # ----------------------------- # Realism Panel Component # ----------------------------- def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML: """ Create a realism panel showing ranked actions, risks, and uncertainty. This makes ARF look cautious, opinionated, and enterprise-seasoned. """ realism = scenario_data.get("realism", {}) ranked_actions = realism.get("ranked_actions", []) # Build ranked actions HTML actions_html = "" for action in ranked_actions: rank_color = "#10b981" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#ef4444" status = "✅ RECOMMENDED" if action["rank"] == 1 else "🟡 SECONDARY" if action["rank"] == 2 else "🔴 REJECTED" actions_html += f"""

{action['rank']}

{status} • {action['confidence']}% confidence

{action['action']}

{action['confidence']}%

Rationale: {action.get('rationale', 'No rationale provided')}

⚠️ Risk: " + action['risk'] + "

" if action.get('risk') else ""} {"

🔄 Trade-off: " + action['tradeoff'] + "

" if action.get('tradeoff') else ""} {"

⏱️ Execution: " + action['execution_time'] + "

" if action.get('execution_time') else ""} {"

🚫 Rejected: " + action['rejection_reason'] + "

" if action.get('rejection_reason') else ""} {"

🛡️ Safety: " + action['safety_override'] + "

" if action.get('safety_override') else ""}

""" # Build competing hypotheses (for Network Partition scenario) hypotheses_html = "" if realism.get("competing_hypotheses"): hypotheses_html = """

🧠 Competing Hypotheses

""" for hypo in realism["competing_hypotheses"]: hypotheses_html += f"""

{hypo['cause']} ({hypo['confidence']}%)

{hypo['evidence']}

Investigation: {hypo['investigation_path']}

{hypo['confidence']}%

""" hypotheses_html += "

" # Build risk assessment panel risk_html = "" if realism.get("risk_assessment"): risk_html = """

⚠️ Risk Assessment

""" for key, value in realism["risk_assessment"].items(): risk_html += f"""

{key.replace('_', ' ').title()}

{value}

""" risk_html += "

" # Build confidence degradation panel confidence_html = "" if realism.get("confidence_degradation"): conf = realism["confidence_degradation"] confidence_html = f"""

⏱️ Confidence Degradation Over Time

{conf['initial']}%

Initial Confidence

→

{conf['after_8_min']}%

After 8 minutes

→

{conf['after_15_min']}%

After 15 minutes

Escalation at {conf['escalation_threshold']}%

⚠️ ARF escalates to human operators when confidence drops below {conf['escalation_threshold']}%

This prevents autonomous execution in high-uncertainty scenarios

""" # Build "What ARF Will NOT Do" panel (global) wont_do_html = """

🚫 What ARF Will NOT Do (Safety Boundaries)

⛔

Restart stateful leaders

During peak traffic or elections

⛔

Apply schema changes

To production databases autonomously

⛔

Exceed API limits

Contractual or rate limits

⛔

Modify ACLs/RBAC

Security permissions autonomously

These boundaries ensure ARF operates within safe, reversible limits. Enterprise edition adds approval workflows for edge cases.

""" # Combine all panels full_html = f"""

🎯 Ranked Healing Intents

ARF evaluates multiple options with confidence scores and risk assessments

REALISM UPGRADE v3.3.9+

{actions_html} {hypotheses_html} {risk_html} {confidence_html} {wont_do_html}

📈 Realistic ROI Estimates (Ranges)

$5.8K – $7.2K

Cost Avoided

Estimated range

4.8× – 5.6×

ROI Multiplier

Confidence interval

68% – 87%

Success Rate

Based on similar incidents

📊 Real systems have ranges, not single-point estimates. ARF shows uncertainty honestly.

🎭

What Senior SREs Expect at 3 a.m.

"Real systems hesitate. Real systems explain risk. Real systems earn trust. ARF shows multiple options with confidence scores because in production, there's never a single perfect answer—just trade-offs managed carefully."

""" return gr.HTML(full_html) # ----------------------------- # Footer # ----------------------------- def create_footer() -> gr.HTML: return gr.HTML("""

Production-grade multi-agent AI for autonomous system reliability intelligence

GitHub • Demo • PyPI • Enterprise Inquiries

""")