""" Gradio-only UI components for ARF Ensures full compatibility with app.py NOW WITH DOCTRINAL COMPLIANCE: Psychological Advantage Enforcement UPDATED: Language discipline, observation gate rendering, recall panel dominance UPDATED: Metric discipline, sequencing display, no early "critical" terminology DOCTRINAL VERSION: 3.3.9+restraint """ import gradio as gr from typing import Dict, List, Any import logging import datetime import time logger = logging.getLogger(__name__) # Try to import scenarios from registry first try: from config.scenario_registry import ScenarioRegistry INCIDENT_SCENARIOS = ScenarioRegistry.load_scenarios() logger.info(f"Loaded {len(INCIDENT_SCENARIOS)} scenarios from registry") except ImportError: logger.warning("Scenario registry not available, falling back to demo scenarios") from demo.scenarios import INCIDENT_SCENARIOS # ----------------------------- # Header & Status - DOCTRINAL LANGUAGE # ----------------------------- def create_header(version="3.3.9") -> gr.HTML: return gr.HTML(f"""

v{version} (Policy + Enterprise Edition)

Production-grade policy execution for system reliability intelligence

🏗️ Architecture: OSS advises → Enterprise executes

""") def create_status_bar() -> gr.HTML: return gr.HTML("""

✅ Policy System Online ✅ ARF OSS v3.3.9 🏢 Enterprise Execution

""") # ----------------------------- # NEW: Observation Gate Renderer - CRITICAL PSYCHOLOGICAL FIX # ----------------------------- def render_observation_gate(healing_intent: Dict[str, Any]) -> gr.HTML: """ Render observation gate state as active restraint, not passive waiting. Doctrinal: Make inaction an explicit, powerful decision. """ deferral_reason = healing_intent.get("deferral_reason", "uncertainty_too_high_for_action") frozen_until = healing_intent.get("decision_frozen_until", "") confidence = healing_intent.get("confidence", 0.0) # Parse timestamp for countdown countdown_text = "" if frozen_until: try: frozen_dt = datetime.datetime.fromisoformat(frozen_until.replace("Z", "+00:00")) now = datetime.datetime.now(datetime.timezone.utc) if frozen_dt.tzinfo is None: frozen_dt = frozen_dt.replace(tzinfo=datetime.timezone.utc) time_left = frozen_dt - now minutes_left = max(0, int(time_left.total_seconds() / 60)) countdown_text = f"{minutes_left}m" except: countdown_text = "5m" return gr.HTML(f"""

⏳

Decision Intentionally Deferred

System state: observe_only • Confidence: {confidence:.1%}

ACTIVE RESTRAINT

REASON FOR DEFERRAL

{deferral_reason.replace('_', ' ').title()}

NEXT EVALUATION

System re-evaluates in: {countdown_text}

🎯

This is a System Choice, Not a Limitation

The system is choosing not to act because uncertainty exceeds policy thresholds. This restraint demonstrates operational maturity—eagerness is a liability in production.

"What you are seeing is not waiting. It is judgment under uncertainty."

PREVENTED ACTIONS (CONTRANDICATED)

scale_during_retry_storm add_capacity_during_amplification any_action_during_high_uncertainty

""") # ----------------------------- # NEW: Historical Evidence Panel - RECALL DOMINANCE # ----------------------------- def create_historical_evidence_panel(scenario_data: Dict[str, Any]) -> gr.HTML: """ Create doctrinally compliant historical evidence panel. Must be visually dominant with dates/environments. """ # Extract from scenario or use defaults historical_panel = scenario_data.get("historical_evidence_panel", {}) scaling_failures = historical_panel.get("scaling_first_failures", []) dampening_successes = historical_panel.get("dampening_first_successes", []) # Build failures HTML failures_html = "" for i, failure in enumerate(scaling_failures[:3]): # Show top 3 failures_html += f"""

{failure.get('date', 'Unknown')} • {failure.get('environment', 'Unknown')}

FAILED

Action: {failure.get('action', 'Unknown')}

Outcome: {failure.get('outcome', 'Unknown')}

{failure.get('lesson', 'No lesson captured')}

""" # Build successes HTML successes_html = "" for i, success in enumerate(dampening_successes[:3]): # Show top 3 successes_html += f"""

{success.get('date', 'Unknown')} • {success.get('environment', 'Unknown')}

SUCCESS

Action: {success.get('action', 'Unknown')}

Outcome: {success.get('outcome', 'Unknown')}

{success.get('lesson', 'No lesson captured')}

""" return gr.HTML(f"""

🧠 Historical Evidence (Why Sequencing Matters)

Real outcomes from similar incidents—this evidence dominates decision logic

RECALL DOMINANCE: POLICY OVER PREDICTION

⛔ Scaling-First Failures

{failures_html if failures_html else """

📊

No scaling failure evidence in memory

"""}

✅ Dampening-First Successes

{successes_html if successes_html else """

📊

No dampening success evidence in memory

"""}

🎯

Doctrinal Principle: Memory Dominates Models

The system prioritizes historical evidence over predictive confidence. If scaling-first failed in similar conditions, scaling is contraindicated regardless of model confidence.

"What happened is more important than what might happen."

""") # ----------------------------- # Performance Metrics Function - DOCTRINAL METRICS # ----------------------------- def update_performance_metrics(scenario_name: str, scenarios=INCIDENT_SCENARIOS) -> tuple: """ Update performance metrics based on scenario Returns: (detection_time_html, mttr_html, auto_heal_html, savings_html) Doctrinal: No red/green colors, use gradient colors instead of binary thresholds """ # Scenario-specific metrics mapping WITH GRADIENT COLORS metrics_config = { "Cache": { "detection_time": ("45s", "89% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), "mttr": ("12m", "73% reduction", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), "auto_heal": ("82%", "5.4× improvement", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), "savings_multiplier": 0.85 }, "Database": { "detection_time": ("38s", "91% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), "mttr": ("18m", "68% reduction", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), "auto_heal": ("74%", "4.8× improvement", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), "savings_multiplier": 0.82 }, "Kubernetes": { "detection_time": ("52s", "87% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), "mttr": ("15m", "71% reduction", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), "auto_heal": ("79%", "5.1× improvement", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), "savings_multiplier": 0.83 }, "Network": { "detection_time": ("28s", "93% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), "mttr": ("8m", "82% reduction", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), "auto_heal": ("89%", "6.2× improvement", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), "savings_multiplier": 0.88 }, "Storage": { "detection_time": ("35s", "92% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), "mttr": ("22m", "65% reduction", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), "auto_heal": ("72%", "4.6× improvement", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), "savings_multiplier": 0.80 }, "Default": { "detection_time": ("42s", "90% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), "mttr": ("14m", "70% reduction", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), "auto_heal": ("79%", "5.0× improvement", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), "savings_multiplier": 0.85 } } # Determine scenario type scenario_type = "Default" for key in ["Cache", "Database", "Kubernetes", "Network", "Storage"]: if key.lower() in scenario_name.lower(): scenario_type = key break # Get metrics for scenario type metrics = metrics_config.get(scenario_type, metrics_config["Default"]) # Get scenario data for savings calculation scenario_data = scenarios.get(scenario_name, {}) business_impact = scenario_data.get("business_impact", {}) revenue_loss = business_impact.get('revenue_risk_per_hour', 8500) # Changed from 'revenue_loss_per_hour' savings_amount = int(revenue_loss * metrics["savings_multiplier"] / 1000) # Create HTML for each metric card WITH GRADIENT BORDERS detection_time_html = f"""

⏱️

Detection Time

{metrics['detection_time'][0]}

{metrics['detection_time'][1]} than baseline

""" mttr_html = f"""

⚡

Mean Time to Resolve

{metrics['mttr'][0]}

{metrics['mttr'][1]} than manual

""" auto_heal_html = f"""

🛡️

Auto-Heal Rate

{metrics['auto_heal'][0]}

{metrics['auto_heal'][1]}

""" savings_html = f"""

💰

Cost Avoided

${savings_amount:.1f}K

Per incident avoided

""" logger.info(f"✅ Updated performance metrics for {scenario_name} ({scenario_type} type)") return detection_time_html, mttr_html, auto_heal_html, savings_html # ----------------------------- # Tab 1: Live Incident Demo - DOCTRINAL COMPLIANCE # ----------------------------- def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple: """ Create doctrinally compliant incident demo tab. Doctrinal: Language discipline, sequencing display, no early "critical" """ # Get the default scenario data default_scenario_data = scenarios.get(default_scenario, {}) business_impact = default_scenario_data.get("business_impact", {}) metrics = default_scenario_data.get("metrics", {}) with gr.Row(): # Left Column: Scenario Selection & Live Visualization with gr.Column(scale=1, variant="panel") as left_col: # Scenario Selection with rich preview scenario_dropdown = gr.Dropdown( choices=list(scenarios.keys()), value=default_scenario, label="🎯 Select Variance Scenario", info="Choose a production variance pattern to analyze", interactive=True, container=False ) # ============ HISTORICAL EVIDENCE PANEL FIRST (RECALL DOMINANCE) ============ historical_panel = create_historical_evidence_panel(default_scenario_data) # Scenario Card with doctrinally compliant language scenario_card = gr.HTML(f"""

📊 {default_scenario}

{default_scenario_data.get('severity', 'HIGH_VARIANCE')}

Component: {default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()}

Users Affected: {metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'}

Revenue Risk: ${business_impact.get('revenue_risk_per_hour', 0):,}/hour

Detection Time: 45 seconds (Policy System)

{default_scenario_data.get('component', 'unknown').split('_')[0]} variance production pattern

""") # Visualization section with gr.Row(): with gr.Column(scale=1): telemetry_header = gr.Markdown("### 📈 Live Telemetry") telemetry_viz = gr.Plot( label="", show_label=False, elem_id="telemetry_plot" ) with gr.Column(scale=1): impact_header = gr.Markdown("### 💰 Business Impact") impact_viz = gr.Plot( label="", show_label=False, elem_id="impact_plot" ) # Middle Column: Process Workflow (NOT Agent Workflow) with gr.Column(scale=2, variant="panel") as middle_col: # ============ OBSERVATION GATE PLACEHOLDER ============ observation_gate_placeholder = gr.HTML("""

🎯

System State

Run analysis to see system's judgment under uncertainty

""") # ============ SEQUENCING VISUALIZATION ============ sequencing_header = gr.Markdown("### 🔄 Sequencing Logic: Dampening → Concurrency → Observe → Scale") sequencing_panel = gr.HTML("""

Doctrinal Sequencing

POLICY ENFORCED

Dampening

Prevent amplification

→

Concurrency Control

Manage load

→

Observe

Validate trends

→

Scale

Only if necessary

Doctrinal Constraint: Scaling NEVER appears in same intent bundle as dampening. System must observe stabilization before considering capacity increases.

""") # Process Workflow Header (NOT Agent Workflow) workflow_header = gr.Markdown("## 🔄 Policy Process Workflow") workflow_subheader = gr.Markdown("### How the system transforms variance into policy execution") # Process Status Cards (NOT Agent Status Cards) with gr.Row(): detection_process = gr.HTML("""

🕵️‍♂️

Detection Process

Click "Run Policy Analysis" to activate

Status: Inactive

WAITING

""") recall_process = gr.HTML("""

🧠

Recall Process

Click "Run Policy Analysis" to activate

Status: Inactive

WAITING

""") decision_process = gr.HTML("""

🎯

Decision Process

Click "Run Policy Analysis" to activate

Status: Inactive

WAITING

""") # Mode Selection & Safety Controls with gr.Row(): with gr.Column(scale=1): approval_toggle = gr.CheckboxGroup( choices=["👤 Require Human Approval"], label="Safety Controls", value=[], info="Toggle human oversight" ) with gr.Column(scale=2): mcp_mode = gr.Radio( choices=["🛡️ Advisory (OSS Only)", "👥 Approval", "⚡ Autonomous"], value="🛡️ Advisory (OSS Only)", label="Policy Safety Mode", info="Control execution safety level", interactive=True ) # OSS vs Enterprise Boundary Visualization boundary_header = gr.Markdown("### 🎭 Policy vs Execution: The Safety Boundary") with gr.Row(): oss_section = gr.HTML("""

🆓

Policy Edition

Apache 2.0

Analysis & Advisory Only - No execution, permanently safe

📝 HealingIntent Created

94% confidence

Action: Implement request coalescing with exponential backoff

Pattern Match: Similar incident resolved with dampening (87% success rate)

Contraindications: ✅ Checked (retry amplification detected)

Sequencing Rule: dampening_first_then_observe_then_optional_scale

🚫 OSS STOPS HERE - No execution

""") enterprise_section = gr.HTML("""

💰

Execution Edition

Commercial

Full Execution & Learning - Autonomous healing with safety guarantees

⚡ Ready to Execute

AUTONOMOUS

Mode: Autonomous (Requires Enterprise license)

Expected Recovery: 12 minutes (vs 45 min manual)

Cost Avoided: $6,375

Users Protected: 45,000 → 0 impacted

✅ Enterprise executes with MCP safety

""") # Execution Controls with gr.Row(): with gr.Column(scale=1): oss_btn = gr.Button( "🆓 Run Policy Analysis", variant="secondary", size="lg" ) oss_info = gr.Markdown("*Free, policy-only analysis*") with gr.Column(scale=1): enterprise_btn = gr.Button( "💰 Execute Enterprise Healing", variant="primary", size="lg" ) enterprise_info = gr.Markdown("*Requires Enterprise license*") # Timeline visualization timeline_header = gr.Markdown("### ⏰ Incident Timeline") timeline_viz = gr.Plot( label="", show_label=False, elem_id="timeline_plot" ) # Right Column: Results & Metrics with gr.Column(scale=1, variant="panel") as right_col: # Real-time Metrics Dashboard metrics_header = gr.Markdown("## 📊 Performance Metrics") # Metric Cards Grid - CALL update_performance_metrics function detection_time = gr.HTML() mttr = gr.HTML() auto_heal = gr.HTML() savings = gr.HTML() # Results Display Areas oss_results_header = gr.Markdown("### 🆓 Policy Analysis Results") oss_results_display = gr.JSON( label="", value={ "status": "Analysis Pending", "processes": ["Detection", "Recall", "Decision"], "mode": "Advisory Only", "action": "Generate Formal HealingIntent" }, height=200 ) enterprise_results_header = gr.Markdown("### 💰 Execution Results") enterprise_results_display = gr.JSON( label="", value={ "status": "Execution Pending", "requires_license": True, "available_modes": ["Approval", "Autonomous"], "expected_outcome": "12m MTTR, $6.3K saved" }, height=200 ) # Approval Status approval_display = gr.HTML("""

👤 Human Approval Status

Not Required

Current Mode: Advisory (Policy Only)

Switch to "Approval" mode to enable human-in-the-loop workflows

1. System generates formal HealingIntent

2. Human reviews & approves contraindications

3. System executes with sequencing constraints

""") # Demo Actions demo_btn = gr.Button( "▶️ Run Complete Walkthrough", variant="secondary", size="lg" ) demo_info = gr.Markdown("*Experience the full workflow from detection to resolution*") return ( # Left column returns scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz, # Middle column returns observation_gate_placeholder, sequencing_panel, workflow_header, detection_process, recall_process, decision_process, oss_section, enterprise_section, oss_btn, enterprise_btn, approval_toggle, mcp_mode, timeline_viz, # Right column returns detection_time, mttr, auto_heal, savings, oss_results_display, enterprise_results_display, approval_display, demo_btn ) # ----------------------------- # NEW: Create Realism Panel (Updated for doctrinal compliance) # ----------------------------- def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML: """ Create doctrinally compliant realism panel. Updated to show formal HealingIntent fields and sequencing logic. """ ranked_actions = scenario_data.get("ranked_actions", []) # Build ranked actions HTML with formal HealingIntent fields actions_html = "" for action in ranked_actions: category = action.get("category", "unknown") category_color = { "dampening": "#3b82f6", "concurrency_control": "#10b981", "observation": "#8b5cf6", "scaling": "#f59e0b" }.get(category, "#64748b") rank_color = "#3b82f6" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#64748b" status = "✅ RECOMMENDED" if action["rank"] == 1 else "🟡 SECONDARY" if action["rank"] == 2 else "🔴 CONTRAINDICATED" # Formal HealingIntent fields preconditions_html = "" if action.get("preconditions"): preconditions_html = f"""

Preconditions:

{"".join([f'

• {pre}

' for pre in action["preconditions"]])}

""" contraindications_html = "" if action.get("contraindicated_actions"): contraindications_html = f"""

Contraindicated Actions:

{"".join([f'

⛔ {contra}

' for contra in action["contraindicated_actions"]])}

""" reversibility_html = "" if action.get("reversibility_statement"): reversibility_html = f"""

Reversibility Statement:

{action["reversibility_statement"]}

""" historical_evidence_html = "" if action.get("historical_evidence"): historical_evidence_html = f"""

Historical Evidence:

{"".join([f'

📊 {evidence}

' for evidence in action["historical_evidence"]])}

""" actions_html += f"""

{action['rank']}

{status} • {action.get('confidence', 0)}% confidence {category.upper().replace('_', ' ')}

{action.get('action', 'No action specified')}

{action.get('confidence', 0)}%

{preconditions_html} {contraindications_html} {reversibility_html} {historical_evidence_html}

Sequencing: {action.get('category', 'unknown').replace('_', ' ')} • {action.get('constraints', ['No constraints'])[0]}

""" # Combine all panels full_html = f"""

🎯 Formal HealingIntent Sequence

Policy-generated intents with preconditions, contraindications, and reversibility statements

DOCTRINAL COMPLIANCE v3.3.9+

{actions_html if actions_html else '

No ranked actions available

🔄 Doctrinal Sequencing Enforcement

Dampening

First in sequence

Concurrency

Then control

Observe

Then validate

Scale

Only if necessary

Doctrinal Constraint: Scaling actions have lower confidence than dampening actions and appear last. If retry amplification is detected, scaling is contraindicated entirely.

""" return gr.HTML(full_html) # ----------------------------- # Tab 2: Business ROI - Updated # ----------------------------- def create_tab2_business_roi(scenarios=INCIDENT_SCENARIOS) -> tuple: dashboard_output = gr.Plot(label="Executive Dashboard", show_label=True) roi_scenario_dropdown = gr.Dropdown( choices=list(scenarios.keys()), value="Cache Miss Storm", label="Scenario for ROI Analysis", info="Select the primary incident type for ROI calculation" ) monthly_slider = gr.Slider( minimum=1, maximum=50, value=15, step=1, label="Monthly Incidents", info="Average number of incidents per month" ) team_slider = gr.Slider( minimum=1, maximum=50, value=5, step=1, label="Team Size", info="Number of engineers on reliability team" ) calculate_btn = gr.Button("📊 Calculate Comprehensive ROI", variant="primary", size="lg") roi_output = gr.JSON(label="ROI Analysis Results", value={}) roi_chart = gr.Plot(label="ROI Visualization") return (dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider, calculate_btn, roi_output, roi_chart) # ----------------------------- # Tab 3: Enterprise Features - UPDATED WITH INSTALLATION STATUS # ----------------------------- def create_tab3_enterprise_features() -> tuple: # Get installation status try: from app import get_installation_status installation = get_installation_status() license_data = { "status": "✅ OSS Installed" if installation["oss_installed"] else "⚠️ OSS Not Installed", "oss_version": installation["oss_version"] or "Not installed", "enterprise_installed": installation["enterprise_installed"], "enterprise_version": installation["enterprise_version"] or "Not installed", "execution_allowed": installation["execution_allowed"], "recommendations": installation["recommendations"], "badges": installation["badges"] } # Update features table based on installation features_data = [ ["ARF OSS Package", "✅ Installed" if installation["oss_installed"] else "❌ Not Installed", "OSS"], ["Self-Healing Core", "✅ Active", "Enterprise"], ["RAG Graph Memory", "✅ Active", "Both"], ["Predictive Analytics", "🔒 Enterprise" if not installation["enterprise_installed"] else "✅ Available", "Enterprise"], ["Audit Trail", "🔒 Enterprise" if not installation["enterprise_installed"] else "✅ Available", "Enterprise"], ["Compliance (SOC2)", "🔒 Enterprise" if not installation["enterprise_installed"] else "✅ Available", "Enterprise"] ] except ImportError: # Fallback if installation check fails license_data = { "status": "⚠️ Installation Check Failed", "oss_version": "Unknown", "enterprise_installed": False, "recommendations": ["Run installation check"] } features_data = [ ["Self-Healing Core", "✅ Active", "Enterprise"], ["RAG Graph Memory", "✅ Active", "Both"], ["Predictive Analytics", "🔒 Enterprise", "Enterprise"], ["Audit Trail", "🔒 Enterprise", "Enterprise"], ["Compliance (SOC2)", "🔒 Enterprise", "Enterprise"], ["Multi-Cloud", "🔒 Enterprise", "Enterprise"] ] license_display = gr.JSON( value=license_data, label="📦 Package Installation Status" ) validate_btn = gr.Button("🔍 Validate Installation", variant="secondary") trial_btn = gr.Button("🆓 Start 30-Day Trial", variant="secondary") upgrade_btn = gr.Button("🚀 Upgrade to Enterprise", variant="primary") mcp_mode = gr.Dropdown( choices=["advisory", "approval", "autonomous"], value="advisory", label="MCP Safety Mode" ) # Initial mode info mcp_mode_info = gr.JSON( value={ "current_mode": "advisory", "description": "OSS Edition - Analysis only, no execution", "features": ["Incident analysis", "RAG similarity", "HealingIntent creation"], "package": "agentic-reliability-framework==3.3.7", "license": "Apache 2.0" }, label="Mode Details" ) integrations_data = [ ["Prometheus", "✅ Connected", "Monitoring"], ["Grafana", "✅ Connected", "Visualization"], ["Slack", "🔒 Enterprise", "Notifications"], ["PagerDuty", "🔒 Enterprise", "Alerting"], ["Jira", "🔒 Enterprise", "Ticketing"], ["Datadog", "🔒 Enterprise", "Monitoring"] ] features_table = gr.Dataframe( headers=["Feature", "Status", "Edition"], value=features_data, label="Feature Comparison" ) integrations_table = gr.Dataframe( headers=["Integration", "Status", "Type"], value=integrations_data, label="Integration Status" ) return (license_display, validate_btn, trial_btn, upgrade_btn, mcp_mode, mcp_mode_info, features_table, integrations_table) # ----------------------------- # Tab 4: Audit Trail # ----------------------------- def create_tab4_audit_trail() -> tuple: refresh_btn = gr.Button("🔄 Refresh Audit Trail", variant="secondary") clear_btn = gr.Button("🗑️ Clear History", variant="secondary") export_btn = gr.Button("📥 Export as JSON", variant="primary") execution_headers = ["Time", "Scenario", "Mode", "Status", "Savings", "Details"] incident_headers = ["Time", "Component", "Scenario", "Severity", "Status"] execution_table = gr.Dataframe( headers=execution_headers, value=[], label="Execution History" ) incident_table = gr.Dataframe( headers=incident_headers, value=[], label="Incident History" ) export_text = gr.JSON( value={"status": "Export ready"}, label="Export Data" ) return (refresh_btn, clear_btn, export_btn, execution_table, incident_table, export_text) # ----------------------------- # Tab 5: Learning Engine # ----------------------------- def create_tab5_learning_engine() -> tuple: learning_graph = gr.Plot(label="RAG Memory Graph") graph_type = gr.Dropdown( choices=["Incident Patterns", "Action-Outcome Chains", "System Dependencies"], value="Incident Patterns", label="Graph Type" ) show_labels = gr.Checkbox(label="Show Labels", value=True) search_query = gr.Textbox(label="Search Patterns", placeholder="Enter pattern to search...") search_btn = gr.Button("🔍 Search Patterns", variant="secondary") clear_btn_search = gr.Button("🗑️ Clear Search", variant="secondary") search_results = gr.JSON( value={"status": "Ready for search"}, label="Search Results" ) stats_display = gr.JSON( value={"patterns": 42, "incidents": 156, "success_rate": "87.3%"}, label="Learning Statistics" ) patterns_display = gr.JSON( value={"common_patterns": ["cache_storm", "db_pool", "memory_leak"]}, label="Pattern Library" ) performance_display = gr.JSON( value={"accuracy": "94.2%", "recall": "89.7%", "precision": "92.1%"}, label="Agent Performance" ) return (learning_graph, graph_type, show_labels, search_query, search_btn, clear_btn_search, search_results, stats_display, patterns_display, performance_display) # ----------------------------- # Realism Panel Component # ----------------------------- def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML: """ Create a realism panel showing ranked actions, risks, and uncertainty. This makes ARF look cautious, opinionated, and enterprise-seasoned. """ realism = scenario_data.get("realism", {}) ranked_actions = realism.get("ranked_actions", []) # Build ranked actions HTML actions_html = "" for action in ranked_actions: rank_color = "#10b981" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#ef4444" status = "✅ RECOMMENDED" if action["rank"] == 1 else "🟡 SECONDARY" if action["rank"] == 2 else "🔴 REJECTED" actions_html += f"""

{action['rank']}

{status} • {action['confidence']}% confidence

{action['action']}

{action['confidence']}%

Rationale: {action.get('rationale', 'No rationale provided')}

⚠️ Risk: " + action['risk'] + "

" if action.get('risk') else ""} {"

🔄 Trade-off: " + action['tradeoff'] + "

" if action.get('tradeoff') else ""} {"

⏱️ Execution: " + action['execution_time'] + "

" if action.get('execution_time') else ""} {"

🚫 Rejected: " + action['rejection_reason'] + "

" if action.get('rejection_reason') else ""} {"

🛡️ Safety: " + action['safety_override'] + "

" if action.get('safety_override') else ""}

""" # Build competing hypotheses (for Network Partition scenario) hypotheses_html = "" if realism.get("competing_hypotheses"): hypotheses_html = """

🧠 Competing Hypotheses

""" for hypo in realism["competing_hypotheses"]: hypotheses_html += f"""

{hypo['cause']} ({hypo['confidence']}%)

{hypo['evidence']}

Investigation: {hypo['investigation_path']}

{hypo['confidence']}%

""" hypotheses_html += "

" # Build risk assessment panel risk_html = "" if realism.get("risk_assessment"): risk_html = """

⚠️ Risk Assessment

""" for key, value in realism["risk_assessment"].items(): risk_html += f"""

{key.replace('_', ' ').title()}

{value}

""" risk_html += "

" # Build confidence degradation panel confidence_html = "" if realism.get("confidence_degradation"): conf = realism["confidence_degradation"] confidence_html = f"""

⏱️ Confidence Degradation Over Time

{conf['initial']}%

Initial Confidence

→

{conf['after_8_min']}%

After 8 minutes

→

{conf['after_15_min']}%

After 15 minutes

Escalation at {conf['escalation_threshold']}%

⚠️ ARF escalates to human operators when confidence drops below {conf['escalation_threshold']}%

This prevents autonomous execution in high-uncertainty scenarios

""" # Build "What ARF Will NOT Do" panel (global) wont_do_html = """

🚫 What ARF Will NOT Do (Safety Boundaries)

⛔

Restart stateful leaders

During peak traffic or elections

⛔

Apply schema changes

To production databases autonomously

⛔

Exceed API limits

Contractual or rate limits

⛔

Modify ACLs/RBAC

Security permissions autonomously

These boundaries ensure ARF operates within safe, reversible limits. Enterprise edition adds approval workflows for edge cases.

""" # Combine all panels full_html = f"""

🎯 Ranked Healing Intents

ARF evaluates multiple options with confidence scores and risk assessments

REALISM UPGRADE v3.3.9+

{actions_html} {hypotheses_html} {risk_html} {confidence_html} {wont_do_html}

📈 Realistic ROI Estimates (Ranges)

$5.8K – $7.2K

Cost Avoided

Estimated range

4.8× – 5.6×

ROI Multiplier

Confidence interval

68% – 87%

Success Rate

Based on similar incidents

📊 Real systems have ranges, not single-point estimates. ARF shows uncertainty honestly.

🎭

What Senior SREs Expect at 3 a.m.

"Real systems hesitate. Real systems explain risk. Real systems earn trust. ARF shows multiple options with confidence scores because in production, there's never a single perfect answer—just trade-offs managed carefully."

""" return gr.HTML(full_html) # ----------------------------- # Footer # ----------------------------- def create_footer() -> gr.HTML: return gr.HTML("""

Production-grade multi-agent AI for autonomous system reliability intelligence

GitHub • Demo • PyPI • Enterprise Inquiries

""")