"""
Gradio-only UI components for ARF
Ensures full compatibility with app.py
NOW WITH DOCTRINAL COMPLIANCE: Psychological Advantage Enforcement
UPDATED: Language discipline, observation gate rendering, recall panel dominance
UPDATED: Metric discipline, sequencing display, no early "critical" terminology
DOCTRINAL VERSION: 3.3.9+restraint
"""
import gradio as gr
from typing import Dict, List, Any
import logging
import datetime
import time
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
logger = logging.getLogger(__name__)
# Try to import scenarios from registry first
try:
from config.scenario_registry import ScenarioRegistry
INCIDENT_SCENARIOS = ScenarioRegistry.load_scenarios()
logger.info(f"Loaded {len(INCIDENT_SCENARIOS)} scenarios from registry")
except ImportError:
logger.warning("Scenario registry not available, falling back to demo scenarios")
from demo.scenarios import INCIDENT_SCENARIOS
# -----------------------------
# Header & Status - DOCTRINAL LANGUAGE
# -----------------------------
def create_header(version="3.3.9") -> gr.HTML:
return gr.HTML(f"""
v{version} (Policy + Enterprise Edition)
Production-grade policy execution for system reliability intelligence
ποΈ Architecture: OSS advises β Enterprise executes
""")
def create_status_bar() -> gr.HTML:
return gr.HTML("""
β
Policy System Online
β
ARF OSS v3.3.9
π’ Enterprise Execution
""")
# -----------------------------
# CRITICAL RESTORED FUNCTIONS - Missing from current version
# -----------------------------
def create_observation_gate_placeholder(confidence: float = 65.0) -> gr.HTML:
"""Observation gate that demonstrates psychological restraint.
Shows 'Decision Intentionally Deferred' when confidence is below threshold.
This is a critical doctrinal component showing restraint as a system choice.
"""
if confidence < 70.0:
html_content = f"""
β οΈ
OBSERVATION GATE ACTIVE
System restraint engaged
Decision Intentionally Deferred
The system has detected uncertainty ({confidence:.1f}% confidence) and has chosen to observe rather than act.
This is not a limitationβit is a psychological advantage demonstrating restraint.
Historical evidence suggests premature action increases risk by 47%.
Confidence threshold: 70.0%
{confidence:.1f}% confidence
"""
else:
html_content = """
β
OBSERVATION GATE CLEARED
Confidence threshold met
Proceed with Policy Action
Confidence exceeds threshold. System may proceed with sequenced actions.
Historical evidence will be consulted before any execution.
Confidence threshold: 70.0%
85.0% confidence
"""
return gr.HTML(html_content)
def create_sequencing_visualization() -> gr.HTML:
"""Creates the sequencing panel showing dampening-first progression.
Doctrinal: Shows sequencing as policy, not reaction.
"""
return gr.HTML("""
π Doctrinal Sequencing: Policy Over Reaction
System enforces sequencing regardless of prediction confidence
POLICY ENFORCED
1
Dampening
Prevent amplification first
REQUIRED
β
2
Concurrency
Manage load, then observe
REQUIRED
β
3
Observe
Validate trends for 5+ minutes
REQUIRED
β
4
Scale
Only if all previous succeed
OPTIONAL
π―
Doctrinal Constraint: Scaling Cannot Appear First
If retry amplification is detected, scaling is contraindicated entirely.
The system must observe stabilization before considering capacity increases.
Historical evidence shows scaling-first fails 76% of the time during amplification.
Current Sequence State
Waiting for detection process
π Sequence: 0/4
β±οΈ Time: 0s
""")
def create_detection_display() -> gr.HTML:
"""Creates detection process HTML display."""
return gr.HTML("""
π΅οΈββοΈ
Detection Process
Telemetry analysis & pattern recognition
STATUS: ACTIVE
Pattern Match
Retry Amplification
Detection Time
0.8 seconds
β
Detected: Retry amplification pattern with exponential growth (r=1.8)
Telemetry shows request rate doubling every 45 seconds. System flagged for sequencing enforcement.
Next Step:
Activate recall process
π 12 metrics
β±οΈ 0.8s latency
π 3 patterns
""")
def create_recall_display() -> gr.HTML:
"""Creates recall process HTML display with historical evidence dominance."""
return gr.HTML("""
π§
Recall Process
Historical evidence & pattern matching
STATUS: ACTIVE
π RECALL DOMINANCE: Historical Evidence > Predictive Confidence
β Scaling-First Failures
2024-11-15 β’ prod-east
FAILED
Action: Scale during retry storm
Outcome: Amplification increased 300%
"Scaling during amplification worsens the problem"
2024-09-22 β’ staging
FAILED
Action: Add capacity without dampening
Outcome: 45 min outage, $8.2K loss
"New capacity consumed by amplification loop"
β
Dampening-First Successes
2024-12-03 β’ prod-west
SUCCESS
Action: Request coalescing + backoff
Outcome: Resolved in 8 min, $5.1K saved
"Dampening broke amplification cycle"
2024-10-17 β’ prod-eu
SUCCESS
Action: Circuit breaker + observability
Outcome: 12 min recovery, 0 user impact
"Sequencing prevented escalation"
π― RECALL DECISION: Scaling contraindicated due to historical evidence
Historical evidence (76% failure rate) dominates predictive confidence (92%).
System will enforce dampening-first sequencing.
Evidence Weight:
Historical: 85% β’ Predictive: 15%
π 8 incidents
π― 76% failure rate
π recall dominance
""")
def create_decision_display() -> gr.HTML:
"""Creates decision process HTML display."""
return gr.HTML("""
π―
Decision Process
HealingIntent creation & sequencing
STATUS: ACTIVE
Formal HealingIntent Created
Preconditions checked, contraindications listed
CONFIDENCE: 87.3%
Primary Action
Implement request coalescing with exponential backoff (jitter: 25%)
Sequencing Rule
dampening_first_then_observe_then_optional_scale
Preconditions
β’ Retry amplification detected
β’ Confidence > 70%
β’ No scaling contraindicated
Contraindications
β Scale during retry storm
β Add capacity immediately
β Restart during amplification
Reversibility
β
Backoff can be adjusted
β
Coalescing can be disabled
β
No stateful changes
β³
Observation Gate: ACTIVE (65.0% confidence)
System will observe for 5 minutes before proceeding to execution
0%
Threshold: 70%
100%
Next Step:
Wait for observation gate clearance
π formal intent
π sequenced
π― 87% confidence
""")
def create_oss_advisory_section() -> gr.HTML:
"""Creates OSS advisory section showing policy vs execution boundary."""
return gr.HTML("""
π
Policy Edition
Analysis & Advisory Only - Apache 2.0
PERMANENTLY SAFE
π HealingIntent Created
94% confidence
β
Action Recommended
Implement request coalescing with exponential backoff
π§
Pattern Match
Similar incident resolved with dampening (87% success rate)
β οΈ
Contraindications
β
Checked (retry amplification detected)
π
Sequencing Rule
dampening_first_then_observe_then_optional_scale
π« OSS STOPS HERE - No Execution
OSS provides policy advice only. Enterprise edition required for execution.
This architectural boundary ensures safety by design.
""")
def create_timeline_comparison_plot() -> go.Figure:
"""Creates timeline comparison plot for incident resolution."""
fig = go.Figure()
# Timeline data
manual_times = [0, 5, 15, 30, 45, 60]
manual_users = [45000, 45000, 42000, 35000, 20000, 5000]
arf_times = [0, 0.8, 1.5, 3, 8, 12]
arf_users = [45000, 45000, 45000, 42000, 15000, 0]
# Add traces
fig.add_trace(go.Scatter(
x=manual_times,
y=manual_users,
mode='lines+markers',
name='Manual Resolution',
line=dict(color='#ef4444', width=3, dash='dash'),
marker=dict(size=8, color='#ef4444'),
hovertemplate='Time: %{x}min
Users Affected: %{y:,}'
))
fig.add_trace(go.Scatter(
x=arf_times,
y=arf_users,
mode='lines+markers',
name='ARF Policy Execution',
line=dict(color='#10b981', width=4),
marker=dict(size=10, color='#10b981'),
hovertemplate='Time: %{x}min
Users Affected: %{y:,}'
))
# Add vertical lines for key events
fig.add_vline(x=0.8, line_width=2, line_dash="dot", line_color="#3b82f6",
annotation_text="Detection", annotation_position="top right")
fig.add_vline(x=1.5, line_width=2, line_dash="dot", line_color="#8b5cf6",
annotation_text="Recall", annotation_position="top right")
fig.add_vline(x=3, line_width=2, line_dash="dot", line_color="#f59e0b",
annotation_text="Decision", annotation_position="top right")
fig.add_vline(x=8, line_width=2, line_dash="dot", line_color="#10b981",
annotation_text="Resolution", annotation_position="top right")
# Update layout
fig.update_layout(
title=dict(
text="β° Incident Timeline: Manual vs Policy Execution",
font=dict(size=18, color='#1e293b'),
x=0.5
),
xaxis=dict(
title="Time (minutes)",
gridcolor='#e2e8f0',
tickformat=',d'
),
yaxis=dict(
title="Users Affected",
gridcolor='#e2e8f0',
tickformat=','
),
plot_bgcolor='white',
paper_bgcolor='white',
font=dict(family="Inter, sans-serif", color="#475569"),
hovermode='x unified',
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="center",
x=0.5,
bgcolor='rgba(255, 255, 255, 0.8)',
bordercolor='#e2e8f0',
borderwidth=1
),
margin=dict(l=50, r=50, t=80, b=50),
height=400
)
# Add annotation for savings
fig.add_annotation(
x=12,
y=0,
text="π 73% faster resolution
π° $6.3K saved",
showarrow=True,
arrowhead=2,
arrowsize=1,
arrowwidth=2,
arrowcolor="#10b981",
ax=50,
ay=-50,
bgcolor="#f0fdf4",
bordercolor="#10b981",
borderwidth=2,
font=dict(size=12, color="#065f46")
)
return fig
# -----------------------------
# NEW: Observation Gate Renderer - CRITICAL PSYCHOLOGICAL FIX
# -----------------------------
def render_observation_gate(healing_intent: Dict[str, Any]) -> gr.HTML:
"""
Render observation gate state as active restraint, not passive waiting.
Doctrinal: Make inaction an explicit, powerful decision.
"""
deferral_reason = healing_intent.get("deferral_reason", "uncertainty_too_high_for_action")
frozen_until = healing_intent.get("decision_frozen_until", "")
confidence = healing_intent.get("confidence", 0.0)
# Parse timestamp for countdown
countdown_text = ""
if frozen_until:
try:
frozen_dt = datetime.datetime.fromisoformat(frozen_until.replace("Z", "+00:00"))
now = datetime.datetime.now(datetime.timezone.utc)
if frozen_dt.tzinfo is None:
frozen_dt = frozen_dt.replace(tzinfo=datetime.timezone.utc)
time_left = frozen_dt - now
minutes_left = max(0, int(time_left.total_seconds() / 60))
countdown_text = f"{minutes_left}m"
except:
countdown_text = "5m"
return gr.HTML(f"""
β³
Decision Intentionally Deferred
System state: observe_only β’ Confidence: {confidence:.1%}
ACTIVE RESTRAINT
REASON FOR DEFERRAL
{deferral_reason.replace('_', ' ').title()}
NEXT EVALUATION
System re-evaluates in: {countdown_text}
π―
This is a System Choice, Not a Limitation
The system is choosing not to act because uncertainty exceeds policy thresholds.
This restraint demonstrates operational maturityβeagerness is a liability in production.
"What you are seeing is not waiting. It is judgment under uncertainty."
PREVENTED ACTIONS (CONTRANDICATED)
scale_during_retry_storm
add_capacity_during_amplification
any_action_during_high_uncertainty
""")
# -----------------------------
# NEW: Historical Evidence Panel - RECALL DOMINANCE
# -----------------------------
def create_historical_evidence_panel(scenario_data: Dict[str, Any]) -> gr.HTML:
"""
Create doctrinally compliant historical evidence panel.
Must be visually dominant with dates/environments.
"""
# Extract from scenario or use defaults
historical_panel = scenario_data.get("historical_evidence_panel", {})
scaling_failures = historical_panel.get("scaling_first_failures", [])
dampening_successes = historical_panel.get("dampening_first_successes", [])
# Build failures HTML
failures_html = ""
for i, failure in enumerate(scaling_failures[:3]): # Show top 3
failures_html += f"""
{failure.get('date', 'Unknown')} β’ {failure.get('environment', 'Unknown')}
FAILED
Action: {failure.get('action', 'Unknown')}
Outcome: {failure.get('outcome', 'Unknown')}
{failure.get('lesson', 'No lesson captured')}
"""
# Build successes HTML
successes_html = ""
for i, success in enumerate(dampening_successes[:3]): # Show top 3
successes_html += f"""
{success.get('date', 'Unknown')} β’ {success.get('environment', 'Unknown')}
SUCCESS
Action: {success.get('action', 'Unknown')}
Outcome: {success.get('outcome', 'Unknown')}
{success.get('lesson', 'No lesson captured')}
"""
return gr.HTML(f"""
π§ Historical Evidence (Why Sequencing Matters)
Real outcomes from similar incidentsβthis evidence dominates decision logic
RECALL DOMINANCE: POLICY OVER PREDICTION
β Scaling-First Failures
{failures_html if failures_html else """
π
No scaling failure evidence in memory
"""}
β
Dampening-First Successes
{successes_html if successes_html else """
π
No dampening success evidence in memory
"""}
π―
Doctrinal Principle: Memory Dominates Models
The system prioritizes historical evidence over predictive confidence.
If scaling-first failed in similar conditions, scaling is contraindicated regardless of model confidence.
"What happened is more important than what might happen."
""")
# -----------------------------
# Performance Metrics Function - DOCTRINAL METRICS
# -----------------------------
# 1. First, update the update_performance_metrics function to return what app.py expects:
def update_performance_metrics(scenario_name: str, scenarios=INCIDENT_SCENARIOS) -> tuple:
"""
Update performance metrics based on scenario
Returns: (detection_time_html, recall_quality_html, confidence_score_html, sequencing_stage_html)
MUST MATCH what app.py demo_btn.click() expects!
"""
# Scenario-specific metrics mapping WITH GRADIENT COLORS
metrics_config = {
"Cache": {
"detection_time": ("45s", "89% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("92%", "85% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("87%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Dampening", "Step 1/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.85
},
"Database": {
"detection_time": ("38s", "91% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("89%", "82% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("84%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Concurrency", "Step 2/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.82
},
"Kubernetes": {
"detection_time": ("52s", "87% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("91%", "84% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("86%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Observe", "Step 3/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.83
},
"Network": {
"detection_time": ("28s", "93% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("94%", "88% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("89%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Scale", "Step 4/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.88
},
"Default": {
"detection_time": ("42s", "90% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("90%", "85% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("85%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Dampening", "Step 1/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.85
}
}
# Determine scenario type
scenario_type = "Default"
for key in ["Cache", "Database", "Kubernetes", "Network", "Storage"]:
if key.lower() in scenario_name.lower():
scenario_type = key
break
# Get metrics for scenario type
metrics = metrics_config.get(scenario_type, metrics_config["Default"])
# Create HTML for each metric card WITH GRADIENT BORDERS
detection_time_html = f"""
β±οΈ
Detection Time
{metrics['detection_time'][0]}
{metrics['detection_time'][1]} than baseline
"""
recall_quality_html = f"""
π§
Recall Quality
{metrics['recall_quality'][0]}
{metrics['recall_quality'][1]}
"""
confidence_score_html = f"""
π―
Confidence Score
{metrics['confidence_score'][0]}
{metrics['confidence_score'][1]}
"""
sequencing_stage_html = f"""
π
Sequencing Stage
{metrics['sequencing_stage'][0]}
{metrics['sequencing_stage'][1]}
"""
logger.info(f"β
Updated performance metrics for {scenario_name} ({scenario_type} type)")
return detection_time_html, recall_quality_html, confidence_score_html, sequencing_stage_html
# 2. Update create_tab1_incident_demo to use the SAME variable names:
def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple:
"""
Create doctrinally compliant incident demo tab.
Doctrinal: Language discipline, sequencing display, no early "critical"
"""
# Get the default scenario data
default_scenario_data = scenarios.get(default_scenario, {})
business_impact = default_scenario_data.get("business_impact", {})
metrics = default_scenario_data.get("metrics", {})
with gr.Row():
# Left Column: Scenario Selection & Live Visualization
with gr.Column(scale=1, variant="panel") as left_col:
# Scenario Selection with rich preview
scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value=default_scenario,
label="π― Select Variance Scenario",
info="Choose a production variance pattern to analyze",
interactive=True,
container=False
)
# ============ HISTORICAL EVIDENCE PANEL FIRST (RECALL DOMINANCE) ============
historical_panel = create_historical_evidence_panel(default_scenario_data)
# Scenario Card with doctrinally compliant language
scenario_card = gr.HTML(f"""
π {default_scenario}
{default_scenario_data.get('severity', 'HIGH_VARIANCE')}
Component:
{default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()}
Users Affected:
{metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'}
Revenue Risk:
${business_impact.get('revenue_risk_per_hour', 0):,}/hour
Detection Time:
45 seconds (Policy System)
{default_scenario_data.get('component', 'unknown').split('_')[0]}
variance
production
pattern
""")
# Visualization section
with gr.Row():
with gr.Column(scale=1):
telemetry_header = gr.Markdown("### π Live Telemetry")
telemetry_viz = gr.Plot(
label="",
show_label=False,
elem_id="telemetry_plot"
)
with gr.Column(scale=1):
impact_header = gr.Markdown("### π° Business Impact")
impact_viz = gr.Plot(
label="",
show_label=False,
elem_id="impact_plot"
)
# Middle Column: Process Workflow (NOT Agent Workflow)
with gr.Column(scale=2, variant="panel") as middle_col:
# ============ OBSERVATION GATE PLACEHOLDER ============
observation_gate_placeholder = create_observation_gate_placeholder()
# ============ SEQUENCING VISUALIZATION ============
sequencing_header = gr.Markdown("### π Sequencing Logic: Dampening β Concurrency β Observe β Scale")
sequencing_panel = create_sequencing_visualization()
# Process Workflow Header (NOT Agent Workflow)
workflow_header = gr.Markdown("## π Policy Process Workflow")
workflow_subheader = gr.Markdown("### How the system transforms variance into policy execution")
# Process Status Cards (NOT Agent Status Cards)
with gr.Row():
detection_process = create_detection_display()
recall_process = create_recall_display()
decision_process = create_decision_display()
# Mode Selection & Safety Controls
with gr.Row():
with gr.Column(scale=1):
approval_toggle = gr.CheckboxGroup(
choices=["π€ Require Human Approval"],
label="Safety Controls",
value=[],
info="Toggle human oversight"
)
with gr.Column(scale=2):
mcp_mode = gr.Radio(
choices=["π‘οΈ Advisory (OSS Only)", "π₯ Approval", "β‘ Autonomous"],
value="π‘οΈ Advisory (OSS Only)",
label="Policy Safety Mode",
info="Control execution safety level",
interactive=True
)
# OSS vs Enterprise Boundary Visualization
boundary_header = gr.Markdown("### π Policy vs Execution: The Safety Boundary")
with gr.Row():
oss_section = create_oss_advisory_section()
enterprise_section = gr.HTML("""
π°
Execution Edition
Full Execution & Learning - Commercial
REQUIRES LICENSE
β‘ Ready to Execute
AUTONOMOUS
βοΈ
Mode
Autonomous (Requires Enterprise license)
β‘
Expected Recovery
12 minutes (vs 45 min manual)
π₯
Users Protected
45,000 β 0 impacted
β
Enterprise executes with MCP safety
Enterprise edition adds execution, learning, and safety guarantees.
MCP safety modes: Advisory β Approval β Autonomous
""")
# Execution Controls
with gr.Row():
with gr.Column(scale=1):
oss_btn = gr.Button(
"π Run Policy Analysis",
variant="secondary",
size="lg"
)
oss_info = gr.Markdown("*Free, policy-only analysis*")
with gr.Column(scale=1):
enterprise_btn = gr.Button(
"π° Execute Enterprise Healing",
variant="primary",
size="lg"
)
enterprise_info = gr.Markdown("*Requires Enterprise license*")
# Timeline visualization
timeline_header = gr.Markdown("### β° Incident Timeline")
timeline_viz = gr.Plot(
create_timeline_comparison_plot(),
label="",
show_label=False,
elem_id="timeline_plot"
)
# Right Column: Results & Metrics
with gr.Column(scale=1, variant="panel") as right_col:
# Real-time Metrics Dashboard
metrics_header = gr.Markdown("## π Performance Metrics")
# Metric Cards Grid - MUST MATCH app.py demo_btn.click() expectations!
detection_time = gr.HTML()
recall_quality = gr.HTML() # Changed from mttr to match demo_btn.click()
confidence_score = gr.HTML() # Changed from auto_heal to match demo_btn.click()
sequencing_stage = gr.HTML() # Changed from savings to match demo_btn.click()
# Results Display Areas
oss_results_header = gr.Markdown("### π Policy Analysis Results")
oss_results_display = gr.JSON(
label="",
value={
"status": "Analysis Pending",
"processes": ["Detection", "Recall", "Decision"],
"mode": "Advisory Only",
"action": "Generate Formal HealingIntent"
},
height=200
)
enterprise_results_header = gr.Markdown("### π° Execution Results")
enterprise_results_display = gr.JSON(
label="",
value={
"status": "Execution Pending",
"requires_license": True,
"available_modes": ["Approval", "Autonomous"],
"expected_outcome": "12m MTTR, $6.3K saved"
},
height=200
)
# Approval Status
approval_display = gr.HTML("""
π€ Human Approval Status
Not Required
Current Mode: Advisory (Policy Only)
Switch to "Approval" mode to enable human-in-the-loop workflows
1. System generates formal HealingIntent
2. Human reviews & approves contraindications
3. System executes with sequencing constraints
""")
# Demo Actions
demo_btn = gr.Button(
"βΆοΈ Run Complete Walkthrough",
variant="secondary",
size="lg"
)
demo_info = gr.Markdown("*Experience the full workflow from detection to resolution*")
# CRITICAL: Return EXACTLY 26 values that match app.py expectations
return (
# Left column returns (5 values)
scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz,
# Middle column returns (13 values)
observation_gate_placeholder, sequencing_panel, workflow_header, detection_process,
recall_process, decision_process, oss_section, enterprise_section, oss_btn, enterprise_btn,
approval_toggle, mcp_mode, timeline_viz,
# Right column returns (8 values) - MUST MATCH demo_btn.click() expectations!
detection_time, recall_quality, confidence_score, sequencing_stage, # Changed to match app.py
oss_results_display, enterprise_results_display, approval_display, demo_btn
)
# -----------------------------
# Tab 1: Live Incident Demo - UPDATED WITH RESTORED FUNCTIONS
# -----------------------------
def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple:
"""
Create doctrinally compliant incident demo tab.
Doctrinal: Language discipline, sequencing display, no early "critical"
"""
# Get the default scenario data
default_scenario_data = scenarios.get(default_scenario, {})
business_impact = default_scenario_data.get("business_impact", {})
metrics = default_scenario_data.get("metrics", {})
with gr.Row():
# Left Column: Scenario Selection & Live Visualization
with gr.Column(scale=1, variant="panel") as left_col:
# Scenario Selection with rich preview
scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value=default_scenario,
label="π― Select Variance Scenario",
info="Choose a production variance pattern to analyze",
interactive=True,
container=False
)
# ============ HISTORICAL EVIDENCE PANEL FIRST (RECALL DOMINANCE) ============
historical_panel = create_historical_evidence_panel(default_scenario_data)
# Scenario Card with doctrinally compliant language
scenario_card = gr.HTML(f"""
π {default_scenario}
{default_scenario_data.get('severity', 'HIGH_VARIANCE')}
Component:
{default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()}
Users Affected:
{metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'}
Revenue Risk:
${business_impact.get('revenue_risk_per_hour', 0):,}/hour
Detection Time:
45 seconds (Policy System)
{default_scenario_data.get('component', 'unknown').split('_')[0]}
variance
production
pattern
""")
# Visualization section
with gr.Row():
with gr.Column(scale=1):
telemetry_header = gr.Markdown("### π Live Telemetry")
telemetry_viz = gr.Plot(
label="",
show_label=False,
elem_id="telemetry_plot"
)
with gr.Column(scale=1):
impact_header = gr.Markdown("### π° Business Impact")
impact_viz = gr.Plot(
label="",
show_label=False,
elem_id="impact_plot"
)
# Middle Column: Process Workflow (NOT Agent Workflow)
with gr.Column(scale=2, variant="panel") as middle_col:
# ============ OBSERVATION GATE PLACEHOLDER ============
observation_gate_placeholder = create_observation_gate_placeholder()
# ============ SEQUENCING VISUALIZATION ============
sequencing_header = gr.Markdown("### π Sequencing Logic: Dampening β Concurrency β Observe β Scale")
sequencing_panel = create_sequencing_visualization()
# Process Workflow Header (NOT Agent Workflow)
workflow_header = gr.Markdown("## π Policy Process Workflow")
workflow_subheader = gr.Markdown("### How the system transforms variance into policy execution")
# Process Status Cards (NOT Agent Status Cards)
with gr.Row():
detection_process = create_detection_display()
recall_process = create_recall_display()
decision_process = create_decision_display()
# Mode Selection & Safety Controls
with gr.Row():
with gr.Column(scale=1):
approval_toggle = gr.CheckboxGroup(
choices=["π€ Require Human Approval"],
label="Safety Controls",
value=[],
info="Toggle human oversight"
)
with gr.Column(scale=2):
mcp_mode = gr.Radio(
choices=["π‘οΈ Advisory (OSS Only)", "π₯ Approval", "β‘ Autonomous"],
value="π‘οΈ Advisory (OSS Only)",
label="Policy Safety Mode",
info="Control execution safety level",
interactive=True
)
# OSS vs Enterprise Boundary Visualization
boundary_header = gr.Markdown("### π Policy vs Execution: The Safety Boundary")
with gr.Row():
oss_section = create_oss_advisory_section()
enterprise_section = gr.HTML("""
π°
Execution Edition
Full Execution & Learning - Commercial
REQUIRES LICENSE
β‘ Ready to Execute
AUTONOMOUS
βοΈ
Mode
Autonomous (Requires Enterprise license)
β‘
Expected Recovery
12 minutes (vs 45 min manual)
π₯
Users Protected
45,000 β 0 impacted
β
Enterprise executes with MCP safety
Enterprise edition adds execution, learning, and safety guarantees.
MCP safety modes: Advisory β Approval β Autonomous
""")
# Execution Controls
with gr.Row():
with gr.Column(scale=1):
oss_btn = gr.Button(
"π Run Policy Analysis",
variant="secondary",
size="lg"
)
oss_info = gr.Markdown("*Free, policy-only analysis*")
with gr.Column(scale=1):
enterprise_btn = gr.Button(
"π° Execute Enterprise Healing",
variant="primary",
size="lg"
)
enterprise_info = gr.Markdown("*Requires Enterprise license*")
# Timeline visualization
timeline_header = gr.Markdown("### β° Incident Timeline")
timeline_viz = gr.Plot(
create_timeline_comparison_plot(),
label="",
show_label=False,
elem_id="timeline_plot"
)
# Right Column: Results & Metrics
with gr.Column(scale=1, variant="panel") as right_col:
# Real-time Metrics Dashboard
metrics_header = gr.Markdown("## π Performance Metrics")
# Metric Cards Grid - MUST MATCH app.py expectations: detection_time, mttr, auto_heal, savings
detection_time = gr.HTML()
mttr = gr.HTML() # Mean Time to Resolve
auto_heal = gr.HTML() # Auto-heal rate
savings = gr.HTML() # Cost savings
# Results Display Areas
oss_results_header = gr.Markdown("### π Policy Analysis Results")
oss_results_display = gr.JSON(
label="",
value={
"status": "Analysis Pending",
"processes": ["Detection", "Recall", "Decision"],
"mode": "Advisory Only",
"action": "Generate Formal HealingIntent"
},
height=200
)
enterprise_results_header = gr.Markdown("### π° Execution Results")
enterprise_results_display = gr.JSON(
label="",
value={
"status": "Execution Pending",
"requires_license": True,
"available_modes": ["Approval", "Autonomous"],
"expected_outcome": "12m MTTR, $6.3K saved"
},
height=200
)
# Approval Status
approval_display = gr.HTML("""
π€ Human Approval Status
Not Required
Current Mode: Advisory (Policy Only)
Switch to "Approval" mode to enable human-in-the-loop workflows
1. System generates formal HealingIntent
2. Human reviews & approves contraindications
3. System executes with sequencing constraints
""")
# Demo Actions
demo_btn = gr.Button(
"βΆοΈ Run Complete Walkthrough",
variant="secondary",
size="lg"
)
demo_info = gr.Markdown("*Experience the full workflow from detection to resolution*")
# CRITICAL: Return EXACTLY 26 values that app.py expects
return (
# Left column returns (5 values)
scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz,
# Middle column returns (13 values)
observation_gate_placeholder, sequencing_panel, workflow_header, detection_process,
recall_process, decision_process, oss_section, enterprise_section, oss_btn, enterprise_btn,
approval_toggle, mcp_mode, timeline_viz,
# Right column returns (8 values - MUST BE: detection_time, mttr, auto_heal, savings, oss_results_display, enterprise_results_display, approval_display, demo_btn)
detection_time, mttr, auto_heal, savings,
oss_results_display, enterprise_results_display, approval_display, demo_btn
# DO NOT include: oss_info, enterprise_info, demo_info - app.py doesn't expect them
)
# -----------------------------
# NEW: Create Realism Panel (Updated for doctrinal compliance)
# -----------------------------
def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML:
"""
Create doctrinally compliant realism panel.
Updated to show formal HealingIntent fields and sequencing logic.
"""
ranked_actions = scenario_data.get("ranked_actions", [])
# Build ranked actions HTML with formal HealingIntent fields
actions_html = ""
for action in ranked_actions:
category = action.get("category", "unknown")
category_color = {
"dampening": "#3b82f6",
"concurrency_control": "#10b981",
"observation": "#8b5cf6",
"scaling": "#f59e0b"
}.get(category, "#64748b")
rank_color = "#3b82f6" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#64748b"
status = "β
RECOMMENDED" if action["rank"] == 1 else "π‘ SECONDARY" if action["rank"] == 2 else "π΄ CONTRAINDICATED"
# Formal HealingIntent fields
preconditions_html = ""
if action.get("preconditions"):
preconditions_html = f"""
Preconditions:
{"".join([f'
β’ {pre}
' for pre in action["preconditions"]])}
"""
contraindications_html = ""
if action.get("contraindicated_actions"):
contraindications_html = f"""
Contraindicated Actions:
{"".join([f'
β {contra}
' for contra in action["contraindicated_actions"]])}
"""
reversibility_html = ""
if action.get("reversibility_statement"):
reversibility_html = f"""
Reversibility Statement:
{action["reversibility_statement"]}
"""
historical_evidence_html = ""
if action.get("historical_evidence"):
historical_evidence_html = f"""
Historical Evidence:
{"".join([f'
π {evidence}
' for evidence in action["historical_evidence"]])}
"""
actions_html += f"""
{action['rank']}
{status} β’ {action.get('confidence', 0)}% confidence
{category.upper().replace('_', ' ')}
{action.get('action', 'No action specified')}
{action.get('confidence', 0)}%
{preconditions_html}
{contraindications_html}
{reversibility_html}
{historical_evidence_html}
Sequencing: {action.get('category', 'unknown').replace('_', ' ')} β’ {action.get('constraints', ['No constraints'])[0]}
"""
# Combine all panels
full_html = f"""
π― Formal HealingIntent Sequence
Policy-generated intents with preconditions, contraindications, and reversibility statements
DOCTRINAL COMPLIANCE v3.3.9+
{actions_html if actions_html else '
No ranked actions available
'}
π Doctrinal Sequencing Enforcement
1
Dampening
First in sequence
2
Concurrency
Then control
4
Scale
Only if necessary
Doctrinal Constraint: Scaling actions have lower confidence than dampening actions and appear last.
If retry amplification is detected, scaling is contraindicated entirely.
"""
return gr.HTML(full_html)
# -----------------------------
# Tab 2: Business ROI - Updated
# -----------------------------
def create_tab2_business_roi(scenarios=INCIDENT_SCENARIOS) -> tuple:
dashboard_output = gr.Plot(label="Executive Dashboard", show_label=True)
roi_scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value="Cache Miss Storm",
label="Scenario for ROI Analysis",
info="Select the primary incident type for ROI calculation"
)
monthly_slider = gr.Slider(
minimum=1,
maximum=50,
value=15,
step=1,
label="Monthly Incidents",
info="Average number of incidents per month"
)
team_slider = gr.Slider(
minimum=1,
maximum=50,
value=5,
step=1,
label="Team Size",
info="Number of engineers on reliability team"
)
calculate_btn = gr.Button("π Calculate Comprehensive ROI", variant="primary", size="lg")
roi_output = gr.JSON(label="ROI Analysis Results", value={})
roi_chart = gr.Plot(label="ROI Visualization")
return (dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider,
calculate_btn, roi_output, roi_chart)
# -----------------------------
# Tab 3: Enterprise Features - UPDATED WITH INSTALLATION STATUS
# -----------------------------
def create_tab3_enterprise_features() -> tuple:
# Get installation status
try:
from app import get_installation_status
installation = get_installation_status()
license_data = {
"status": "β
OSS Installed" if installation["oss_installed"] else "β οΈ OSS Not Installed",
"oss_version": installation["oss_version"] or "Not installed",
"enterprise_installed": installation["enterprise_installed"],
"enterprise_version": installation["enterprise_version"] or "Not installed",
"execution_allowed": installation["execution_allowed"],
"recommendations": installation["recommendations"],
"badges": installation["badges"]
}
# Update features table based on installation
features_data = [
["ARF OSS Package", "β
Installed" if installation["oss_installed"] else "β Not Installed", "OSS"],
["Self-Healing Core", "β
Active", "Enterprise"],
["RAG Graph Memory", "β
Active", "Both"],
["Predictive Analytics", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
["Audit Trail", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
["Compliance (SOC2)", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"]
]
except ImportError:
# Fallback if installation check fails
license_data = {
"status": "β οΈ Installation Check Failed",
"oss_version": "Unknown",
"enterprise_installed": False,
"recommendations": ["Run installation check"]
}
features_data = [
["Self-Healing Core", "β
Active", "Enterprise"],
["RAG Graph Memory", "β
Active", "Both"],
["Predictive Analytics", "π Enterprise", "Enterprise"],
["Audit Trail", "π Enterprise", "Enterprise"],
["Compliance (SOC2)", "π Enterprise", "Enterprise"],
["Multi-Cloud", "π Enterprise", "Enterprise"]
]
license_display = gr.JSON(
value=license_data,
label="π¦ Package Installation Status"
)
validate_btn = gr.Button("π Validate Installation", variant="secondary")
trial_btn = gr.Button("π Start 30-Day Trial", variant="secondary")
upgrade_btn = gr.Button("π Upgrade to Enterprise", variant="primary")
mcp_mode = gr.Dropdown(
choices=["advisory", "approval", "autonomous"],
value="advisory",
label="MCP Safety Mode"
)
# Initial mode info
mcp_mode_info = gr.JSON(
value={
"current_mode": "advisory",
"description": "OSS Edition - Analysis only, no execution",
"features": ["Incident analysis", "RAG similarity", "HealingIntent creation"],
"package": "agentic-reliability-framework==3.3.7",
"license": "Apache 2.0"
},
label="Mode Details"
)
integrations_data = [
["Prometheus", "β
Connected", "Monitoring"],
["Grafana", "β
Connected", "Visualization"],
["Slack", "π Enterprise", "Notifications"],
["PagerDuty", "π Enterprise", "Alerting"],
["Jira", "π Enterprise", "Ticketing"],
["Datadog", "π Enterprise", "Monitoring"]
]
features_table = gr.Dataframe(
headers=["Feature", "Status", "Edition"],
value=features_data,
label="Feature Comparison"
)
integrations_table = gr.Dataframe(
headers=["Integration", "Status", "Type"],
value=integrations_data,
label="Integration Status"
)
return (license_display, validate_btn, trial_btn, upgrade_btn,
mcp_mode, mcp_mode_info, features_table, integrations_table)
# -----------------------------
# Tab 4: Audit Trail
# -----------------------------
def create_tab4_audit_trail() -> tuple:
refresh_btn = gr.Button("π Refresh Audit Trail", variant="secondary")
clear_btn = gr.Button("ποΈ Clear History", variant="secondary")
export_btn = gr.Button("π₯ Export as JSON", variant="primary")
execution_headers = ["Time", "Scenario", "Mode", "Status", "Savings", "Details"]
incident_headers = ["Time", "Component", "Scenario", "Severity", "Status"]
execution_table = gr.Dataframe(
headers=execution_headers,
value=[],
label="Execution History"
)
incident_table = gr.Dataframe(
headers=incident_headers,
value=[],
label="Incident History"
)
export_text = gr.JSON(
value={"status": "Export ready"},
label="Export Data"
)
return (refresh_btn, clear_btn, export_btn, execution_table, incident_table, export_text)
# -----------------------------
# Tab 5: Learning Engine
# -----------------------------
def create_tab5_learning_engine() -> tuple:
learning_graph = gr.Plot(label="RAG Memory Graph")
graph_type = gr.Dropdown(
choices=["Incident Patterns", "Action-Outcome Chains", "System Dependencies"],
value="Incident Patterns",
label="Graph Type"
)
show_labels = gr.Checkbox(label="Show Labels", value=True)
search_query = gr.Textbox(label="Search Patterns", placeholder="Enter pattern to search...")
search_btn = gr.Button("π Search Patterns", variant="secondary")
clear_btn_search = gr.Button("ποΈ Clear Search", variant="secondary")
search_results = gr.JSON(
value={"status": "Ready for search"},
label="Search Results"
)
stats_display = gr.JSON(
value={"patterns": 42, "incidents": 156, "success_rate": "87.3%"},
label="Learning Statistics"
)
patterns_display = gr.JSON(
value={"common_patterns": ["cache_storm", "db_pool", "memory_leak"]},
label="Pattern Library"
)
performance_display = gr.JSON(
value={"accuracy": "94.2%", "recall": "89.7%", "precision": "92.1%"},
label="Agent Performance"
)
return (learning_graph, graph_type, show_labels, search_query, search_btn,
clear_btn_search, search_results, stats_display, patterns_display, performance_display)
# -----------------------------
# Footer
# -----------------------------
def create_footer() -> gr.HTML:
return gr.HTML("""
Agentic Reliability Framework Β© 2026
Production-grade multi-agent AI for autonomous system reliability intelligence
""")