"""
Gradio-only UI components for ARF
Ensures full compatibility with app.py
Updated with proper imports and error handling
NOW WITH REAL ARF INSTALLATION DETECTION
UPDATED: Added realism panel integration for Tab 1
"""
import gradio as gr
from typing import Dict, List, Any
import logging
logger = logging.getLogger(__name__)
# Try to import scenarios from registry first
try:
from config.scenario_registry import ScenarioRegistry
INCIDENT_SCENARIOS = ScenarioRegistry.load_scenarios()
logger.info(f"Loaded {len(INCIDENT_SCENARIOS)} scenarios from registry")
except ImportError:
logger.warning("Scenario registry not available, falling back to demo scenarios")
from demo.scenarios import INCIDENT_SCENARIOS
# -----------------------------
# Header & Status - UPDATED WITH INSTALLATION CHECK
# -----------------------------
def create_header(version="3.3.9") -> gr.HTML:
return gr.HTML(f"""
v{version} (OSS + Enterprise Edition)
Production-grade multi-agent AI for autonomous system reliability intelligence
ποΈ Architecture: OSS advises β Enterprise executes
""")
def create_status_bar() -> gr.HTML:
return gr.HTML("""
β
System Online
β
ARF OSS v3.3.9
π’ Enterprise Edition
π ROI: 5.2Γ
""")
# -----------------------------
# Tab 1: Live Incident Demo - UPDATED WITH REALISM PANEL
# -----------------------------
def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple:
"""
Create an expressive, comprehensive incident demo tab for ARF.
Shows the complete OSS analysis β Enterprise execution workflow.
UPDATED: Now includes realism panel for enterprise-seasoned SRE experience
"""
# Get the default scenario data
default_scenario_data = scenarios.get(default_scenario, {})
business_impact = default_scenario_data.get("business_impact", {})
metrics = default_scenario_data.get("metrics", {})
with gr.Row():
# Left Column: Scenario Selection & Live Visualization
with gr.Column(scale=1, variant="panel") as left_col:
# Scenario Selection with rich preview
scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value=default_scenario,
label="π― Select Incident Scenario",
info="Choose a production incident to analyze",
interactive=True,
container=False
)
# Scenario Card with rich information - USING INLINE STYLES
scenario_card = gr.HTML(f"""
π¨ {default_scenario}
{default_scenario_data.get('severity', 'HIGH')}
Component:
{default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()}
Affected Users:
{metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'}
Revenue Risk:
${business_impact.get('revenue_loss_per_hour', 0):,}/hour
Detection Time:
45 seconds (ARF AI)
{default_scenario_data.get('component', 'unknown').split('_')[0]}
{default_scenario_data.get('severity', 'high').lower()}
production
incident
""")
# Visualization section - USING gr.Plot() FOR PLOTLY FIGURES
with gr.Row():
with gr.Column(scale=1):
telemetry_header = gr.Markdown("### π Live Telemetry")
# This expects a Plotly figure from app.py
telemetry_viz = gr.Plot(
label="",
show_label=False,
elem_id="telemetry_plot"
)
with gr.Column(scale=1):
impact_header = gr.Markdown("### π° Business Impact")
impact_viz = gr.Plot(
label="",
show_label=False,
elem_id="impact_plot"
)
# Middle Column: Agent Workflow
with gr.Column(scale=2, variant="panel") as middle_col:
# Agent Workflow Header
workflow_header = gr.Markdown("## π ARF Agent Workflow")
workflow_subheader = gr.Markdown("### How ARF transforms incidents into autonomous healing")
# Agent Status Cards - USING INLINE STYLES
with gr.Row():
detection_agent = gr.HTML("""
π΅οΈββοΈ
Detection Agent
Click "Run OSS Analysis" to activate
Status: Inactive
WAITING
""")
recall_agent = gr.HTML("""
π§
Recall Agent
Click "Run OSS Analysis" to activate
Status: Inactive
WAITING
""")
decision_agent = gr.HTML("""
π―
Decision Agent
Click "Run OSS Analysis" to activate
Status: Inactive
WAITING
""")
# OSS vs Enterprise Boundary Visualization
boundary_header = gr.Markdown("### π OSS vs Enterprise: The Safety Boundary")
with gr.Row():
oss_section = gr.HTML("""
π
OSS Edition
Apache 2.0
Analysis & Advisory Only - No execution, permanently safe
π Healing Intent Created
94% confidence
Action: Scale Redis cluster from 3 to 5 nodes
Pattern Match: Similar incident resolved with scaling (87% success rate)
Safety Check: β
Passed (blast radius: 2 services)
Estimated Impact: Reduce MTTR from 45min to 12min
π« OSS STOPS HERE - No execution
""")
enterprise_section = gr.HTML("""
π°
Enterprise Edition
Commercial
Full Execution & Learning - Autonomous healing with safety guarantees
β‘ Ready to Execute
AUTONOMOUS
Mode: Autonomous (Requires Enterprise license)
Expected Recovery: 12 minutes (vs 45 min manual)
Cost Saved: $6,375
Users Protected: 45,000 β 0 impacted
β
Enterprise executes with MCP safety
""")
# Execution Controls
with gr.Row():
with gr.Column(scale=1):
oss_btn = gr.Button(
"π Run OSS Analysis",
variant="secondary",
size="lg"
)
oss_info = gr.Markdown("*Free, open-source analysis*")
with gr.Column(scale=1):
enterprise_btn = gr.Button(
"π° Execute Enterprise Healing",
variant="primary",
size="lg"
)
enterprise_info = gr.Markdown("*Requires Enterprise license*")
# Mode Selection & Safety Controls
with gr.Row():
with gr.Column(scale=1):
approval_toggle = gr.CheckboxGroup(
choices=["π€ Require Human Approval"],
label="Safety Controls",
value=[],
info="Toggle human oversight"
)
with gr.Column(scale=2):
mcp_mode = gr.Radio(
choices=["π‘οΈ Advisory (OSS Only)", "π₯ Approval", "β‘ Autonomous"],
value="π‘οΈ Advisory (OSS Only)",
label="MCP Safety Mode",
info="Control execution safety level",
interactive=True
)
# Timeline visualization
timeline_header = gr.Markdown("### β° Incident Timeline")
timeline_viz = gr.Plot(
label="",
show_label=False,
elem_id="timeline_plot"
)
# ============ NEW: Realism Panel Section ============
realism_header = gr.Markdown("### π Realism: Trade-offs & Uncertainty")
realism_panel = gr.HTML(
value="""
π§
Realism Panel
Select a scenario to see ranked actions, risks, and trade-offs
""",
elem_id="realism_panel"
)
# Right Column: Results & Metrics
with gr.Column(scale=1, variant="panel") as right_col:
# Real-time Metrics Dashboard
metrics_header = gr.Markdown("## π Performance Metrics")
# Metric Cards Grid - USING INLINE STYLES
with gr.Row():
detection_time = gr.HTML("""
β±οΈ
Detection Time
45s
β 89% faster than average
""")
mttr = gr.HTML("""
β‘
Mean Time to Resolve
12m
β 73% faster than manual
""")
with gr.Row():
auto_heal = gr.HTML("""
π€
Auto-Heal Rate
81.7%
β 5.4Γ industry average
""")
savings = gr.HTML(f"""
π°
Cost Saved
${int(business_impact.get('revenue_loss_per_hour', 8500) * 0.85 / 1000):.1f}K
Per incident avoided
""")
# Results Display Areas
oss_results_header = gr.Markdown("### π OSS Analysis Results")
oss_results_display = gr.JSON(
label="",
value={
"status": "Analysis Pending",
"agents": ["Detection", "Recall", "Decision"],
"mode": "Advisory Only",
"action": "Generate HealingIntent"
},
height=200
)
enterprise_results_header = gr.Markdown("### π° Enterprise Results")
enterprise_results_display = gr.JSON(
label="",
value={
"status": "Execution Pending",
"requires_license": True,
"available_modes": ["Approval", "Autonomous"],
"expected_outcome": "12m MTTR, $6.3K saved"
},
height=200
)
# Approval Status - USING INLINE STYLES
approval_display = gr.HTML("""
π€ Human Approval Status
Not Required
Current Mode: Advisory (OSS Only)
Switch to "Approval" mode to enable human-in-the-loop workflows
1. ARF generates intent
2. Human reviews & approves
3. ARF executes safely
""")
# Demo Actions
demo_btn = gr.Button(
"βΆοΈ Run Complete Demo Walkthrough",
variant="secondary",
size="lg"
)
demo_info = gr.Markdown("*Experience the full ARF workflow from detection to resolution*")
return (
# Left column returns
scenario_dropdown, scenario_card, telemetry_viz, impact_viz,
# Middle column returns (WITH REALISM PANEL)
workflow_header, detection_agent, recall_agent, decision_agent,
oss_section, enterprise_section, oss_btn, enterprise_btn,
approval_toggle, mcp_mode, timeline_viz, realism_panel,
# Right column returns
detection_time, mttr, auto_heal, savings,
oss_results_display, enterprise_results_display, approval_display, demo_btn
)
# -----------------------------
# Tab 2: Business ROI - Updated
# -----------------------------
def create_tab2_business_roi(scenarios=INCIDENT_SCENARIOS) -> tuple:
dashboard_output = gr.Plot(label="Executive Dashboard", show_label=True)
roi_scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value="Cache Miss Storm",
label="Scenario for ROI Analysis",
info="Select the primary incident type for ROI calculation"
)
monthly_slider = gr.Slider(
minimum=1,
maximum=50,
value=15,
step=1,
label="Monthly Incidents",
info="Average number of incidents per month"
)
team_slider = gr.Slider(
minimum=1,
maximum=50,
value=5,
step=1,
label="Team Size",
info="Number of engineers on reliability team"
)
calculate_btn = gr.Button("π Calculate Comprehensive ROI", variant="primary", size="lg")
roi_output = gr.JSON(label="ROI Analysis Results", value={})
roi_chart = gr.Plot(label="ROI Visualization")
return (dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider,
calculate_btn, roi_output, roi_chart)
# -----------------------------
# Tab 3: Enterprise Features - UPDATED WITH INSTALLATION STATUS
# -----------------------------
def create_tab3_enterprise_features() -> tuple:
# Get installation status
try:
from app import get_installation_status
installation = get_installation_status()
license_data = {
"status": "β
OSS Installed" if installation["oss_installed"] else "β οΈ OSS Not Installed",
"oss_version": installation["oss_version"] or "Not installed",
"enterprise_installed": installation["enterprise_installed"],
"enterprise_version": installation["enterprise_version"] or "Not installed",
"execution_allowed": installation["execution_allowed"],
"recommendations": installation["recommendations"],
"badges": installation["badges"]
}
# Update features table based on installation
features_data = [
["ARF OSS Package", "β
Installed" if installation["oss_installed"] else "β Not Installed", "OSS"],
["Self-Healing Core", "β
Active", "Enterprise"],
["RAG Graph Memory", "β
Active", "Both"],
["Predictive Analytics", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
["Audit Trail", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
["Compliance (SOC2)", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"]
]
except ImportError:
# Fallback if installation check fails
license_data = {
"status": "β οΈ Installation Check Failed",
"oss_version": "Unknown",
"enterprise_installed": False,
"recommendations": ["Run installation check"]
}
features_data = [
["Self-Healing Core", "β
Active", "Enterprise"],
["RAG Graph Memory", "β
Active", "Both"],
["Predictive Analytics", "π Enterprise", "Enterprise"],
["Audit Trail", "π Enterprise", "Enterprise"],
["Compliance (SOC2)", "π Enterprise", "Enterprise"],
["Multi-Cloud", "π Enterprise", "Enterprise"]
]
license_display = gr.JSON(
value=license_data,
label="π¦ Package Installation Status"
)
validate_btn = gr.Button("π Validate Installation", variant="secondary")
trial_btn = gr.Button("π Start 30-Day Trial", variant="secondary")
upgrade_btn = gr.Button("π Upgrade to Enterprise", variant="primary")
mcp_mode = gr.Dropdown(
choices=["advisory", "approval", "autonomous"],
value="advisory",
label="MCP Safety Mode"
)
# Initial mode info
mcp_mode_info = gr.JSON(
value={
"current_mode": "advisory",
"description": "OSS Edition - Analysis only, no execution",
"features": ["Incident analysis", "RAG similarity", "HealingIntent creation"],
"package": "agentic-reliability-framework==3.3.7",
"license": "Apache 2.0"
},
label="Mode Details"
)
integrations_data = [
["Prometheus", "β
Connected", "Monitoring"],
["Grafana", "β
Connected", "Visualization"],
["Slack", "π Enterprise", "Notifications"],
["PagerDuty", "π Enterprise", "Alerting"],
["Jira", "π Enterprise", "Ticketing"],
["Datadog", "π Enterprise", "Monitoring"]
]
features_table = gr.Dataframe(
headers=["Feature", "Status", "Edition"],
value=features_data,
label="Feature Comparison"
)
integrations_table = gr.Dataframe(
headers=["Integration", "Status", "Type"],
value=integrations_data,
label="Integration Status"
)
return (license_display, validate_btn, trial_btn, upgrade_btn,
mcp_mode, mcp_mode_info, features_table, integrations_table)
# -----------------------------
# Tab 4: Audit Trail
# -----------------------------
def create_tab4_audit_trail() -> tuple:
refresh_btn = gr.Button("π Refresh Audit Trail", variant="secondary")
clear_btn = gr.Button("ποΈ Clear History", variant="secondary")
export_btn = gr.Button("π₯ Export as JSON", variant="primary")
execution_headers = ["Time", "Scenario", "Mode", "Status", "Savings", "Details"]
incident_headers = ["Time", "Component", "Scenario", "Severity", "Status"]
execution_table = gr.Dataframe(
headers=execution_headers,
value=[],
label="Execution History"
)
incident_table = gr.Dataframe(
headers=incident_headers,
value=[],
label="Incident History"
)
export_text = gr.JSON(
value={"status": "Export ready"},
label="Export Data"
)
return (refresh_btn, clear_btn, export_btn, execution_table, incident_table, export_text)
# -----------------------------
# Tab 5: Learning Engine
# -----------------------------
def create_tab5_learning_engine() -> tuple:
learning_graph = gr.Plot(label="RAG Memory Graph")
graph_type = gr.Dropdown(
choices=["Incident Patterns", "Action-Outcome Chains", "System Dependencies"],
value="Incident Patterns",
label="Graph Type"
)
show_labels = gr.Checkbox(label="Show Labels", value=True)
search_query = gr.Textbox(label="Search Patterns", placeholder="Enter pattern to search...")
search_btn = gr.Button("π Search Patterns", variant="secondary")
clear_btn_search = gr.Button("ποΈ Clear Search", variant="secondary")
search_results = gr.JSON(
value={"status": "Ready for search"},
label="Search Results"
)
stats_display = gr.JSON(
value={"patterns": 42, "incidents": 156, "success_rate": "87.3%"},
label="Learning Statistics"
)
patterns_display = gr.JSON(
value={"common_patterns": ["cache_storm", "db_pool", "memory_leak"]},
label="Pattern Library"
)
performance_display = gr.JSON(
value={"accuracy": "94.2%", "recall": "89.7%", "precision": "92.1%"},
label="Agent Performance"
)
return (learning_graph, graph_type, show_labels, search_query, search_btn,
clear_btn_search, search_results, stats_display, patterns_display, performance_display)
# -----------------------------
# Realism Panel Component
# -----------------------------
def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML:
"""
Create a realism panel showing ranked actions, risks, and uncertainty.
This makes ARF look cautious, opinionated, and enterprise-seasoned.
"""
realism = scenario_data.get("realism", {})
ranked_actions = realism.get("ranked_actions", [])
# Build ranked actions HTML
actions_html = ""
for action in ranked_actions:
rank_color = "#10b981" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#ef4444"
status = "β
RECOMMENDED" if action["rank"] == 1 else "π‘ SECONDARY" if action["rank"] == 2 else "π΄ REJECTED"
actions_html += f"""
{action['rank']}
{status} β’ {action['confidence']}% confidence
{action['action']}
{action['confidence']}%
Rationale: {action.get('rationale', 'No rationale provided')}
{"
β οΈ Risk: " + action['risk'] + "
" if action.get('risk') else ""}
{"
π Trade-off: " + action['tradeoff'] + "
" if action.get('tradeoff') else ""}
{"
β±οΈ Execution: " + action['execution_time'] + "
" if action.get('execution_time') else ""}
{"
π« Rejected: " + action['rejection_reason'] + "
" if action.get('rejection_reason') else ""}
{"
π‘οΈ Safety: " + action['safety_override'] + "
" if action.get('safety_override') else ""}
"""
# Build competing hypotheses (for Network Partition scenario)
hypotheses_html = ""
if realism.get("competing_hypotheses"):
hypotheses_html = """
π§ Competing Hypotheses
"""
for hypo in realism["competing_hypotheses"]:
hypotheses_html += f"""
?
{hypo['cause']} ({hypo['confidence']}%)
{hypo['evidence']}
Investigation: {hypo['investigation_path']}
"""
hypotheses_html += "
"
# Build risk assessment panel
risk_html = ""
if realism.get("risk_assessment"):
risk_html = """
β οΈ Risk Assessment
"""
for key, value in realism["risk_assessment"].items():
risk_html += f"""
{key.replace('_', ' ').title()}
{value}
"""
risk_html += "
"
# Build confidence degradation panel
confidence_html = ""
if realism.get("confidence_degradation"):
conf = realism["confidence_degradation"]
confidence_html = f"""
β±οΈ Confidence Degradation Over Time
{conf['initial']}%
Initial Confidence
β
{conf['after_8_min']}%
After 8 minutes
β
{conf['after_15_min']}%
After 15 minutes
Escalation at {conf['escalation_threshold']}%
β οΈ ARF escalates to human operators when confidence drops below {conf['escalation_threshold']}%
This prevents autonomous execution in high-uncertainty scenarios
"""
# Build "What ARF Will NOT Do" panel (global)
wont_do_html = """
π« What ARF Will NOT Do (Safety Boundaries)
β
Restart stateful leaders
During peak traffic or elections
β
Apply schema changes
To production databases autonomously
β
Exceed API limits
Contractual or rate limits
β
Modify ACLs/RBAC
Security permissions autonomously
These boundaries ensure ARF operates within safe, reversible limits.
Enterprise edition adds approval workflows for edge cases.
"""
# Combine all panels
full_html = f"""
π― Ranked Healing Intents
ARF evaluates multiple options with confidence scores and risk assessments
REALISM UPGRADE v3.3.9+
{actions_html}
{hypotheses_html}
{risk_html}
{confidence_html}
{wont_do_html}
π Realistic ROI Estimates (Ranges)
$5.8K β $7.2K
Cost Avoided
Estimated range
4.8Γ β 5.6Γ
ROI Multiplier
Confidence interval
68% β 87%
Success Rate
Based on similar incidents
π Real systems have ranges, not single-point estimates. ARF shows uncertainty honestly.
π
What Senior SREs Expect at 3 a.m.
"Real systems hesitate. Real systems explain risk. Real systems earn trust.
ARF shows multiple options with confidence scores because in production,
there's never a single perfect answerβjust trade-offs managed carefully."
"""
return gr.HTML(full_html)
# -----------------------------
# Footer
# -----------------------------
def create_footer() -> gr.HTML:
return gr.HTML("""
Agentic Reliability Framework Β© 2026
Production-grade multi-agent AI for autonomous system reliability intelligence
""")