"""
Gradio-only UI components for ARF
Ensures full compatibility with app.py
Updated with proper imports and error handling
NOW WITH REAL ARF INSTALLATION DETECTION
"""
import gradio as gr
from typing import Dict, List, Any
import logging
logger = logging.getLogger(__name__)
# Try to import scenarios from registry first
try:
from config.scenario_registry import ScenarioRegistry
INCIDENT_SCENARIOS = ScenarioRegistry.load_scenarios()
logger.info(f"Loaded {len(INCIDENT_SCENARIOS)} scenarios from registry")
except ImportError:
logger.warning("Scenario registry not available, falling back to demo scenarios")
from demo.scenarios import INCIDENT_SCENARIOS
# -----------------------------
# Header & Status - UPDATED WITH INSTALLATION CHECK
# -----------------------------
def create_header(version="3.3.7", mock_mode=False) -> gr.HTML:
# Try to get installation status
try:
from app import get_installation_badges
installation_badges = get_installation_badges()
except ImportError:
installation_badges = """
β οΈ Mock ARF
π Enterprise Required
"""
mock_text = " Β· MOCK MODE" if mock_mode else ""
return gr.HTML(f"""
π Agentic Reliability Framework
v{version} (OSS Edition){mock_text}
Production-grade multi-agent AI for autonomous system reliability intelligence
{installation_badges}
""")
def create_status_bar() -> gr.HTML:
# Try to get installation status
try:
from app import get_installation_status
installation = get_installation_status()
oss_badge = installation["badges"]["oss"]
enterprise_badge = installation["badges"]["enterprise"]
oss_status_html = f"""
{oss_badge['icon']} {oss_badge['text']}
"""
enterprise_status_html = f"""
{enterprise_badge['icon']} {enterprise_badge['text']}
"""
except ImportError:
oss_status_html = """
β οΈ Mock ARF
"""
enterprise_status_html = """
π Enterprise Required
"""
return gr.HTML(f"""
β
System Online
π§ Agentic Core Active
{oss_status_html}
{enterprise_status_html}
π° Enterprise ROI: 5.2Γ
""")
# -----------------------------
# Tab 1: Live Incident Demo - UPDATED TO USE INLINE STYLES AND PROPER PLOTLY HANDLING
# -----------------------------
def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple:
"""
Create an expressive, comprehensive incident demo tab for ARF.
Shows the complete OSS analysis β Enterprise execution workflow.
UPDATED: Properly handles Plotly figures from app.py
"""
# Get the default scenario data
default_scenario_data = scenarios.get(default_scenario, {})
business_impact = default_scenario_data.get("business_impact", {})
metrics = default_scenario_data.get("metrics", {})
with gr.Row():
# Left Column: Scenario Selection & Live Visualization
with gr.Column(scale=1, variant="panel") as left_col:
# Scenario Selection with rich preview
scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value=default_scenario,
label="π― Select Incident Scenario",
info="Choose a production incident to analyze",
interactive=True,
container=False
)
# Scenario Card with rich information - USING INLINE STYLES
scenario_card = gr.HTML(f"""
π¨ {default_scenario}
{default_scenario_data.get('severity', 'HIGH')}
Component:
{default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()}
Affected Users:
{metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'}
Revenue Risk:
${business_impact.get('revenue_loss_per_hour', 0):,}/hour
Detection Time:
45 seconds (ARF AI)
{default_scenario_data.get('component', 'unknown').split('_')[0]}
{default_scenario_data.get('severity', 'high').lower()}
production
incident
""")
# Visualization section - USING gr.Plot() FOR PLOTLY FIGURES
with gr.Row():
with gr.Column(scale=1):
telemetry_header = gr.Markdown("### π Live Telemetry")
# This expects a Plotly figure from app.py
telemetry_viz = gr.Plot(
label="",
show_label=False,
elem_id="telemetry_plot"
)
with gr.Column(scale=1):
impact_header = gr.Markdown("### π° Business Impact")
impact_viz = gr.Plot(
label="",
show_label=False,
elem_id="impact_plot"
)
# Middle Column: Agent Workflow
with gr.Column(scale=2, variant="panel") as middle_col:
# Agent Workflow Header
workflow_header = gr.Markdown("## π ARF Agent Workflow")
workflow_subheader = gr.Markdown("### How ARF transforms incidents into autonomous healing")
# Agent Status Cards - USING INLINE STYLES
with gr.Row():
detection_agent = gr.HTML("""
π΅οΈββοΈ
Detection Agent
Click "Run OSS Analysis" to activate
Status: Inactive
WAITING
""")
recall_agent = gr.HTML("""
π§
Recall Agent
Click "Run OSS Analysis" to activate
Status: Inactive
WAITING
""")
decision_agent = gr.HTML("""
π―
Decision Agent
Click "Run OSS Analysis" to activate
Status: Inactive
WAITING
""")
# OSS vs Enterprise Boundary Visualization
boundary_header = gr.Markdown("### π OSS vs Enterprise: The Safety Boundary")
with gr.Row():
oss_section = gr.HTML("""
π
OSS Edition
Apache 2.0
Analysis & Advisory Only - No execution, permanently safe
π Healing Intent Created
94% confidence
Action: Scale Redis cluster from 3 to 5 nodes
Pattern Match: Similar incident resolved with scaling (87% success rate)
Safety Check: β
Passed (blast radius: 2 services)
Estimated Impact: Reduce MTTR from 45min to 12min
π« OSS STOPS HERE - No execution
""")
enterprise_section = gr.HTML("""
π°
Enterprise Edition
Commercial
Full Execution & Learning - Autonomous healing with safety guarantees
β‘ Ready to Execute
AUTONOMOUS
Mode: Autonomous (Requires Enterprise license)
Expected Recovery: 12 minutes (vs 45 min manual)
Cost Saved: $6,375
Users Protected: 45,000 β 0 impacted
β
Enterprise executes with MCP safety
""")
# Execution Controls
with gr.Row():
with gr.Column(scale=1):
oss_btn = gr.Button(
"π Run OSS Analysis",
variant="secondary",
size="lg"
)
oss_info = gr.Markdown("*Free, open-source analysis*")
with gr.Column(scale=1):
enterprise_btn = gr.Button(
"π° Execute Enterprise Healing",
variant="primary",
size="lg"
)
enterprise_info = gr.Markdown("*Requires Enterprise license*")
# Mode Selection & Safety Controls
with gr.Row():
with gr.Column(scale=1):
approval_toggle = gr.CheckboxGroup(
choices=["π€ Require Human Approval"],
label="Safety Controls",
value=[],
info="Toggle human oversight"
)
with gr.Column(scale=2):
mcp_mode = gr.Radio(
choices=["π‘οΈ Advisory (OSS Only)", "π₯ Approval", "β‘ Autonomous"],
value="π‘οΈ Advisory (OSS Only)",
label="MCP Safety Mode",
info="Control execution safety level",
interactive=True
)
# Timeline visualization
timeline_header = gr.Markdown("### β° Incident Timeline")
timeline_viz = gr.Plot(
label="",
show_label=False,
elem_id="timeline_plot"
)
# Right Column: Results & Metrics
with gr.Column(scale=1, variant="panel") as right_col:
# Real-time Metrics Dashboard
metrics_header = gr.Markdown("## π Performance Metrics")
# Metric Cards Grid - USING INLINE STYLES
with gr.Row():
detection_time = gr.HTML("""
β±οΈ
Detection Time
45s
β 89% faster than average
""")
mttr = gr.HTML("""
β‘
Mean Time to Resolve
12m
β 73% faster than manual
""")
with gr.Row():
auto_heal = gr.HTML("""
π€
Auto-Heal Rate
81.7%
β 5.4Γ industry average
""")
savings = gr.HTML(f"""
π°
Cost Saved
${int(business_impact.get('revenue_loss_per_hour', 8500) * 0.85 / 1000):.1f}K
Per incident avoided
""")
# Results Display Areas
oss_results_header = gr.Markdown("### π OSS Analysis Results")
oss_results_display = gr.JSON(
label="",
value={
"status": "Analysis Pending",
"agents": ["Detection", "Recall", "Decision"],
"mode": "Advisory Only",
"action": "Generate HealingIntent"
},
height=200
)
enterprise_results_header = gr.Markdown("### π° Enterprise Results")
enterprise_results_display = gr.JSON(
label="",
value={
"status": "Execution Pending",
"requires_license": True,
"available_modes": ["Approval", "Autonomous"],
"expected_outcome": "12m MTTR, $6.3K saved"
},
height=200
)
# Approval Status - USING INLINE STYLES
approval_display = gr.HTML("""
π€ Human Approval Status
Not Required
Current Mode: Advisory (OSS Only)
Switch to "Approval" mode to enable human-in-the-loop workflows
1. ARF generates intent
2. Human reviews & approves
3. ARF executes safely
""")
# Demo Actions
demo_btn = gr.Button(
"βΆοΈ Run Complete Demo Walkthrough",
variant="secondary",
size="lg"
)
demo_info = gr.Markdown("*Experience the full ARF workflow from detection to resolution*")
return (
# Left column returns
scenario_dropdown, scenario_card, telemetry_viz, impact_viz,
# Middle column returns
workflow_header, detection_agent, recall_agent, decision_agent,
oss_section, enterprise_section, oss_btn, enterprise_btn,
approval_toggle, mcp_mode, timeline_viz,
# Right column returns
detection_time, mttr, auto_heal, savings,
oss_results_display, enterprise_results_display, approval_display, demo_btn
)
# -----------------------------
# Tab 2: Business ROI - Updated
# -----------------------------
def create_tab2_business_roi(scenarios=INCIDENT_SCENARIOS) -> tuple:
dashboard_output = gr.Plot(label="Executive Dashboard", show_label=True)
roi_scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value="Cache Miss Storm",
label="Scenario for ROI Analysis",
info="Select the primary incident type for ROI calculation"
)
monthly_slider = gr.Slider(
minimum=1,
maximum=50,
value=15,
step=1,
label="Monthly Incidents",
info="Average number of incidents per month"
)
team_slider = gr.Slider(
minimum=1,
maximum=50,
value=5,
step=1,
label="Team Size",
info="Number of engineers on reliability team"
)
calculate_btn = gr.Button("π Calculate Comprehensive ROI", variant="primary", size="lg")
roi_output = gr.JSON(label="ROI Analysis Results", value={})
roi_chart = gr.Plot(label="ROI Visualization")
return (dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider,
calculate_btn, roi_output, roi_chart)
# -----------------------------
# Tab 3: Enterprise Features - UPDATED WITH INSTALLATION STATUS
# -----------------------------
def create_tab3_enterprise_features() -> tuple:
# Get installation status
try:
from app import get_installation_status
installation = get_installation_status()
license_data = {
"status": "β
OSS Installed" if installation["oss_installed"] else "β οΈ OSS Not Installed",
"oss_version": installation["oss_version"] or "Not installed",
"enterprise_installed": installation["enterprise_installed"],
"enterprise_version": installation["enterprise_version"] or "Not installed",
"execution_allowed": installation["execution_allowed"],
"recommendations": installation["recommendations"],
"badges": installation["badges"]
}
# Update features table based on installation
features_data = [
["ARF OSS Package", "β
Installed" if installation["oss_installed"] else "β Not Installed", "OSS"],
["Self-Healing Core", "β
Active", "Enterprise"],
["RAG Graph Memory", "β
Active", "Both"],
["Predictive Analytics", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
["Audit Trail", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
["Compliance (SOC2)", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"]
]
except ImportError:
# Fallback if installation check fails
license_data = {
"status": "β οΈ Installation Check Failed",
"oss_version": "Unknown",
"enterprise_installed": False,
"recommendations": ["Run installation check"]
}
features_data = [
["Self-Healing Core", "β
Active", "Enterprise"],
["RAG Graph Memory", "β
Active", "Both"],
["Predictive Analytics", "π Enterprise", "Enterprise"],
["Audit Trail", "π Enterprise", "Enterprise"],
["Compliance (SOC2)", "π Enterprise", "Enterprise"],
["Multi-Cloud", "π Enterprise", "Enterprise"]
]
license_display = gr.JSON(
value=license_data,
label="π¦ Package Installation Status"
)
validate_btn = gr.Button("π Validate Installation", variant="secondary")
trial_btn = gr.Button("π Start 30-Day Trial", variant="secondary")
upgrade_btn = gr.Button("π Upgrade to Enterprise", variant="primary")
mcp_mode = gr.Dropdown(
choices=["advisory", "approval", "autonomous"],
value="advisory",
label="MCP Safety Mode"
)
# Initial mode info
mcp_mode_info = gr.JSON(
value={
"current_mode": "advisory",
"description": "OSS Edition - Analysis only, no execution",
"features": ["Incident analysis", "RAG similarity", "HealingIntent creation"],
"package": "agentic-reliability-framework==3.3.7",
"license": "Apache 2.0"
},
label="Mode Details"
)
integrations_data = [
["Prometheus", "β
Connected", "Monitoring"],
["Grafana", "β
Connected", "Visualization"],
["Slack", "π Enterprise", "Notifications"],
["PagerDuty", "π Enterprise", "Alerting"],
["Jira", "π Enterprise", "Ticketing"],
["Datadog", "π Enterprise", "Monitoring"]
]
features_table = gr.Dataframe(
headers=["Feature", "Status", "Edition"],
value=features_data,
label="Feature Comparison"
)
integrations_table = gr.Dataframe(
headers=["Integration", "Status", "Type"],
value=integrations_data,
label="Integration Status"
)
return (license_display, validate_btn, trial_btn, upgrade_btn,
mcp_mode, mcp_mode_info, features_table, integrations_table)
# -----------------------------
# Tab 4: Audit Trail
# -----------------------------
def create_tab4_audit_trail() -> tuple:
refresh_btn = gr.Button("π Refresh Audit Trail", variant="secondary")
clear_btn = gr.Button("ποΈ Clear History", variant="secondary")
export_btn = gr.Button("π₯ Export as JSON", variant="primary")
execution_headers = ["Time", "Scenario", "Mode", "Status", "Savings", "Details"]
incident_headers = ["Time", "Component", "Scenario", "Severity", "Status"]
execution_table = gr.Dataframe(
headers=execution_headers,
value=[],
label="Execution History"
)
incident_table = gr.Dataframe(
headers=incident_headers,
value=[],
label="Incident History"
)
export_text = gr.JSON(
value={"status": "Export ready"},
label="Export Data"
)
return (refresh_btn, clear_btn, export_btn, execution_table, incident_table, export_text)
# -----------------------------
# Tab 5: Learning Engine
# -----------------------------
def create_tab5_learning_engine() -> tuple:
learning_graph = gr.Plot(label="RAG Memory Graph")
graph_type = gr.Dropdown(
choices=["Incident Patterns", "Action-Outcome Chains", "System Dependencies"],
value="Incident Patterns",
label="Graph Type"
)
show_labels = gr.Checkbox(label="Show Labels", value=True)
search_query = gr.Textbox(label="Search Patterns", placeholder="Enter pattern to search...")
search_btn = gr.Button("π Search Patterns", variant="secondary")
clear_btn_search = gr.Button("ποΈ Clear Search", variant="secondary")
search_results = gr.JSON(
value={"status": "Ready for search"},
label="Search Results"
)
stats_display = gr.JSON(
value={"patterns": 42, "incidents": 156, "success_rate": "87.3%"},
label="Learning Statistics"
)
patterns_display = gr.JSON(
value={"common_patterns": ["cache_storm", "db_pool", "memory_leak"]},
label="Pattern Library"
)
performance_display = gr.JSON(
value={"accuracy": "94.2%", "recall": "89.7%", "precision": "92.1%"},
label="Agent Performance"
)
return (learning_graph, graph_type, show_labels, search_query, search_btn,
clear_btn_search, search_results, stats_display, patterns_display, performance_display)
# Add this function to ui/components.py (new component)
def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML:
"""
Create a realism panel showing ranked actions, risks, and uncertainty.
This makes ARF look cautious, opinionated, and enterprise-seasoned.
"""
realism = scenario_data.get("realism", {})
ranked_actions = realism.get("ranked_actions", [])
# Build ranked actions HTML
actions_html = ""
for action in ranked_actions:
rank_color = "#10b981" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#ef4444"
status = "β
RECOMMENDED" if action["rank"] == 1 else "π‘ SECONDARY" if action["rank"] == 2 else "π΄ REJECTED"
actions_html += f"""
{action['rank']}
{status} β’ {action['confidence']}% confidence
{action['action']}
{action['confidence']}%
Rationale: {action.get('rationale', 'No rationale provided')}
{"
β οΈ Risk: " + action['risk'] + "
" if action.get('risk') else ""}
{"
π Trade-off: " + action['tradeoff'] + "
" if action.get('tradeoff') else ""}
{"
β±οΈ Execution: " + action['execution_time'] + "
" if action.get('execution_time') else ""}
{"
π« Rejected: " + action['rejection_reason'] + "
" if action.get('rejection_reason') else ""}
{"
π‘οΈ Safety: " + action['safety_override'] + "
" if action.get('safety_override') else ""}
"""
# Build competing hypotheses (for Network Partition scenario)
hypotheses_html = ""
if realism.get("competing_hypotheses"):
hypotheses_html = """
π§ Competing Hypotheses
"""
for hypo in realism["competing_hypotheses"]:
hypotheses_html += f"""
?
{hypo['cause']} ({hypo['confidence']}%)
{hypo['evidence']}
Investigation: {hypo['investigation_path']}
"""
hypotheses_html += "
"
# Build risk assessment panel
risk_html = ""
if realism.get("risk_assessment"):
risk_html = """
β οΈ Risk Assessment
"""
for key, value in realism["risk_assessment"].items():
risk_html += f"""
{key.replace('_', ' ').title()}
{value}
"""
risk_html += "
"
# Build confidence degradation panel
confidence_html = ""
if realism.get("confidence_degradation"):
conf = realism["confidence_degradation"]
confidence_html = f"""
β±οΈ Confidence Degradation Over Time
{conf['initial']}%
Initial Confidence
β
{conf['after_8_min']}%
After 8 minutes
β
{conf['after_15_min']}%
After 15 minutes
Escalation at {conf['escalation_threshold']}%
β οΈ ARF escalates to human operators when confidence drops below {conf['escalation_threshold']}%
This prevents autonomous execution in high-uncertainty scenarios
"""
# Build "What ARF Will NOT Do" panel (global)
wont_do_html = """
π« What ARF Will NOT Do (Safety Boundaries)
β
Restart stateful leaders
During peak traffic or elections
β
Apply schema changes
To production databases autonomously
β
Exceed API limits
Contractual or rate limits
β
Modify ACLs/RBAC
Security permissions autonomously
These boundaries ensure ARF operates within safe, reversible limits.
Enterprise edition adds approval workflows for edge cases.
"""
# Combine all panels
full_html = f"""
π― Ranked Healing Intents
ARF evaluates multiple options with confidence scores and risk assessments
REALISM UPGRADE v3.3.9+
{actions_html}
{hypotheses_html}
{risk_html}
{confidence_html}
{wont_do_html}
π Realistic ROI Estimates (Ranges)
$5.8K β $7.2K
Cost Avoided
Estimated range
4.8Γ β 5.6Γ
ROI Multiplier
Confidence interval
68% β 87%
Success Rate
Based on similar incidents
π Real systems have ranges, not single-point estimates. ARF shows uncertainty honestly.
π
What Senior SREs Expect at 3 a.m.
"Real systems hesitate. Real systems explain risk. Real systems earn trust.
ARF shows multiple options with confidence scores because in production,
there's never a single perfect answerβjust trade-offs managed carefully."
"""
return gr.HTML(full_html)
# -----------------------------
# Footer
# -----------------------------
def create_footer() -> gr.HTML:
return gr.HTML("""
Agentic Reliability Framework Β© 2025
Production-grade multi-agent AI for autonomous system reliability intelligence
""")