"""
Gradio-only UI components for ARF
Ensures full compatibility with app.py
NOW WITH DOCTRINAL COMPLIANCE: Psychological Advantage Enforcement
UPDATED: Language discipline, observation gate rendering, recall panel dominance
UPDATED: Metric discipline, sequencing display, no early "critical" terminology
DOCTRINAL VERSION: 3.3.9+restraint
"""
import gradio as gr
from typing import Dict, List, Any
import logging
import datetime
import time
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
logger = logging.getLogger(__name__)
# Try to import scenarios from registry first
try:
from config.scenario_registry import ScenarioRegistry
INCIDENT_SCENARIOS = ScenarioRegistry.load_scenarios()
logger.info(f"Loaded {len(INCIDENT_SCENARIOS)} scenarios from registry")
except ImportError:
logger.warning("Scenario registry not available, falling back to demo scenarios")
from demo.scenarios import INCIDENT_SCENARIOS
# -----------------------------
# Header & Status - DOCTRINAL LANGUAGE
# -----------------------------
def create_header(version="3.3.9") -> gr.HTML:
return gr.HTML(f"""
β οΈ
Observation Gate: Awaiting confirmation.
System restraint engaged
βΈοΈ
Decision Intentionally Deferred
The system has detected uncertainty ({confidence:.1f}% confidence)
and has chosen to observe rather than act.
Historical evidence suggests premature action increases risk by 47%.
Confidence Threshold
70.0%
Required for action
Current Confidence
{confidence:.1f}%
Below threshold β Observe
Observe ({confidence:.1f}%)
Threshold (70%)
Act (75%+)
"""
else:
html_content = f"""
β
OBSERVATION GATE CLEARED
Confidence threshold met
β‘
Proceed with Policy Action
Confidence exceeds threshold. System may proceed with sequenced actions.
Historical evidence will be consulted before any execution.
Confidence Threshold
70.0%
Required for action
Current Confidence
{confidence:.1f}%
Above threshold β Proceed
β
Safety Principle
"When confidence is high, proceed with sequenced actions. Each step is validated
against historical evidence before proceeding to the next."
Ready ({confidence:.1f}%)
Threshold (70%)
Proceed
"""
return gr.HTML(html_content)
def create_sequencing_visualization() -> gr.HTML:
"""Creates the sequencing panel showing dampening-first progression.
Doctrinal: Shows sequencing as policy, not reaction.
"""
return gr.HTML("""
π Doctrinal Sequencing: Policy Over Reaction
System enforces sequencing regardless of prediction confidence
POLICY ENFORCED
1
Dampening
Prevent amplification first
REQUIRED
β
2
Concurrency
Manage load, then observe
REQUIRED
β
3
Observe
Validate trends for 5+ minutes
REQUIRED
β
4
Scale
Only if all previous succeed
OPTIONAL
π―
Doctrinal Constraint: Scaling Cannot Appear First
If retry amplification is detected, scaling is contraindicated entirely.
The system must observe stabilization before considering capacity increases.
Historical evidence shows scaling-first fails 76% of the time during amplification.
Current Sequence State
Waiting for detection process
π Sequence: 0/4
β±οΈ Time: 0s
""")
def create_detection_display() -> gr.HTML:
"""Creates detection process HTML display."""
return gr.HTML("""
π΅οΈββοΈ
Detection Process
Telemetry analysis & pattern recognition
STATUS: ACTIVE
Pattern Match
Retry Amplification
Detection Time
0.8 seconds
β
Detected: Retry amplification pattern with exponential growth (r=1.8)
Telemetry shows request rate doubling every 45 seconds. System flagged for sequencing enforcement.
Next Step:
Activate recall process
π 12 metrics
β±οΈ 0.8s latency
π 3 patterns
""")
def create_recall_display() -> gr.HTML:
"""Creates recall process HTML display with historical evidence dominance."""
return gr.HTML("""
π§
Recall Process
Historical evidence & pattern matching
STATUS: ACTIVE
Recall Dominance: Evidence > Prediction
β Scaling-First Failures
2024-11-15 β’ prod-east
FAILED
Action: Scale during retry storm
Outcome: Amplification increased 300%
"Scaling during amplification worsens the problem"
2024-09-22 β’ staging
FAILED
Action: Add capacity without dampening
Outcome: 45 min outage, $8.2K loss
"New capacity consumed by amplification loop"
β
Dampening-First Successes
2024-12-03 β’ prod-west
SUCCESS
Action: Request coalescing + backoff
Outcome: Resolved in 8 min, $5.1K saved
"Dampening broke amplification cycle"
2024-10-17 β’ prod-eu
SUCCESS
Action: Circuit breaker + observability
Outcome: 12 min recovery, 0 user impact
"Sequencing prevented escalation"
π― RECALL DECISION: Scaling contraindicated due to historical evidence
Historical evidence (76% failure rate) dominates predictive confidence (92%).
System will enforce dampening-first sequencing.
Evidence Weight:
Historical: 85% β’ Predictive: 15%
π 8 incidents
π― 76% failure rate
π recall dominance
""")
def create_decision_display() -> gr.HTML:
"""Creates decision process HTML display."""
return gr.HTML("""
π―
Decision Process
HealingIntent creation & sequencing
STATUS: ACTIVE
Formal HealingIntent Created
Preconditions checked, contraindications listed
CONFIDENCE: 87.3%
Primary Action
Implement request coalescing with exponential backoff (jitter: 25%)
Sequencing Rule
dampening_first_then_observe_then_optional_scale
Preconditions
β’ Retry amplification detected
β’ Confidence > 70%
β’ No scaling contraindicated
Contraindications
β Scale during retry storm
β Add capacity immediately
β Restart during amplification
Reversibility
β
Backoff can be adjusted
β
Coalescing can be disabled
β
No stateful changes
β³
Observation Gate: Awaiting confirmation
System will observe for 5 minutes before proceeding to execution
0%
Threshold: 70%
100%
Next Step:
Wait for observation gate clearance
π formal intent
π sequenced
π― 87% confidence
""")
def create_oss_advisory_section() -> gr.HTML:
"""Creates OSS advisory section showing policy vs execution boundary."""
return gr.HTML("""
π
Policy Edition
Analysis & Advisory Only - Apache 2.0
PERMANENTLY SAFE
π HealingIntent Created
94% confidence
β
Action Recommended
Implement request coalescing with exponential backoff
π§
Pattern Match
Similar incident resolved with dampening (87% success rate)
β οΈ
Contraindications
β
Checked (retry amplification detected)
π
Sequencing Rule
dampening_first_then_observe_then_optional_scale
π« OSS STOPS HERE - No Execution
OSS provides policy advice only. Enterprise edition required for execution.
This architectural boundary ensures safety by design.
""")
def create_timeline_comparison_plot() -> go.Figure:
"""Creates timeline comparison plot for incident resolution."""
fig = go.Figure()
# Timeline data
manual_times = [0, 5, 15, 30, 45, 60]
manual_users = [45000, 45000, 42000, 35000, 20000, 5000]
arf_times = [0, 0.8, 1.5, 3, 8, 12]
arf_users = [45000, 45000, 45000, 42000, 15000, 0]
# Add traces
fig.add_trace(go.Scatter(
x=manual_times,
y=manual_users,
mode='lines+markers',
name='Manual Resolution',
line=dict(color='#ef4444', width=3, dash='dash'),
marker=dict(size=8, color='#ef4444'),
hovertemplate='Time: %{x}min
Users Affected: %{y:,}
'
))
fig.add_trace(go.Scatter(
x=arf_times,
y=arf_users,
mode='lines+markers',
name='ARF Policy Execution',
line=dict(color='#10b981', width=4),
marker=dict(size=10, color='#10b981'),
hovertemplate='Time: %{x}min
Users Affected: %{y:,}
'
))
# Add vertical lines for key events
fig.add_vline(x=0.8, line_width=2, line_dash="dot", line_color="#3b82f6",
annotation_text="Detection", annotation_position="top right")
fig.add_vline(x=1.5, line_width=2, line_dash="dot", line_color="#8b5cf6",
annotation_text="Recall", annotation_position="top right")
fig.add_vline(x=3, line_width=2, line_dash="dot", line_color="#f59e0b",
annotation_text="Decision", annotation_position="top right")
fig.add_vline(x=8, line_width=2, line_dash="dot", line_color="#10b981",
annotation_text="Resolution", annotation_position="top right")
# Update layout
fig.update_layout(
title=dict(
text="β° Incident Timeline: Manual vs Policy Execution",
font=dict(size=18, color='#1e293b'),
x=0.5
),
xaxis=dict(
title="Time (minutes)",
gridcolor='#e2e8f0',
tickformat=',d'
),
yaxis=dict(
title="Users Affected",
gridcolor='#e2e8f0',
tickformat=','
),
plot_bgcolor='white',
paper_bgcolor='white',
font=dict(family="Inter, sans-serif", color="#475569"),
hovermode='x unified',
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="center",
x=0.5,
bgcolor='rgba(255, 255, 255, 0.8)',
bordercolor='#e2e8f0',
borderwidth=1
),
margin=dict(l=50, r=50, t=80, b=50),
height=400
)
# Add annotation for savings
fig.add_annotation(
x=12,
y=0,
text="π 73% faster resolution
π° $6.3K saved",
showarrow=True,
arrowhead=2,
arrowsize=1,
arrowwidth=2,
arrowcolor="#10b981",
ax=50,
ay=-50,
bgcolor="#f0fdf4",
bordercolor="#10b981",
borderwidth=2,
font=dict(size=12, color="#065f46")
)
return fig
# -----------------------------
# NEW: Observation Gate Renderer - CRITICAL PSYCHOLOGICAL FIX
# -----------------------------
def render_observation_gate(healing_intent: Dict[str, Any]) -> gr.HTML:
"""
Render observation gate state as active restraint, not passive waiting.
Doctrinal: Make inaction an explicit, powerful decision.
"""
deferral_reason = healing_intent.get("deferral_reason", "uncertainty_too_high_for_action")
frozen_until = healing_intent.get("decision_frozen_until", "")
confidence = healing_intent.get("confidence", 0.0)
# Parse timestamp for countdown
countdown_text = ""
if frozen_until:
try:
frozen_dt = datetime.datetime.fromisoformat(frozen_until.replace("Z", "+00:00"))
now = datetime.datetime.now(datetime.timezone.utc)
if frozen_dt.tzinfo is None:
frozen_dt = frozen_dt.replace(tzinfo=datetime.timezone.utc)
time_left = frozen_dt - now
minutes_left = max(0, int(time_left.total_seconds() / 60))
countdown_text = f"{minutes_left}m"
except:
countdown_text = "5m"
return gr.HTML(f"""
β³
Decision Intentionally Deferred
System state: observe_only β’ Confidence: {confidence:.1%}
ACTIVE RESTRAINT
REASON FOR DEFERRAL
{deferral_reason.replace('_', ' ').title()}
NEXT EVALUATION
System re-evaluates in: {countdown_text}
π―
This is a System Choice, Not a Limitation
The system is choosing not to act because uncertainty exceeds policy thresholds.
This restraint demonstrates operational maturityβeagerness is a liability in production.
"What you are seeing is not waiting. It is judgment under uncertainty."
PREVENTED ACTIONS (CONTRANDICATED)
scale_during_retry_storm
add_capacity_during_amplification
any_action_during_high_uncertainty
""")
# -----------------------------
# NEW: Historical Evidence Panel - RECALL DOMINANCE
# -----------------------------
def create_historical_evidence_panel(scenario_data: Dict[str, Any]) -> gr.HTML:
"""
Create doctrinally compliant historical evidence panel.
Must be visually dominant with dates/environments.
"""
# Extract from scenario or use defaults
historical_panel = scenario_data.get("historical_evidence_panel", {})
scaling_failures = historical_panel.get("scaling_first_failures", [])
dampening_successes = historical_panel.get("dampening_first_successes", [])
# Build failures HTML
failures_html = ""
for i, failure in enumerate(scaling_failures[:3]): # Show top 3
failures_html += f"""
{failure.get('date', 'Unknown')} β’ {failure.get('environment', 'Unknown')}
FAILED
Action: {failure.get('action', 'Unknown')}
Outcome: {failure.get('outcome', 'Unknown')}
{failure.get('lesson', 'No lesson captured')}
"""
# Build successes HTML
successes_html = ""
for i, success in enumerate(dampening_successes[:3]): # Show top 3
successes_html += f"""
{success.get('date', 'Unknown')} β’ {success.get('environment', 'Unknown')}
SUCCESS
Action: {success.get('action', 'Unknown')}
Outcome: {success.get('outcome', 'Unknown')}
{success.get('lesson', 'No lesson captured')}
"""
return gr.HTML(f"""
π§ Historical Evidence (Why Sequencing Matters)
Real outcomes from similar incidentsβthis evidence dominates decision logic
Historical evidence outweighs model confidence.
β Scaling-First Failures
{failures_html if failures_html else """
π
Scaling-First Failures (Evidence Present)
"""}
β
Dampening-First Successes
{successes_html if successes_html else """
π
Dampening-First Successes (Evidence Present)
"""}
π―
If history shows failure, the system will not repeat it.
The system prioritizes historical evidence over predictive confidence.
If scaling-first failed in similar conditions, scaling is contraindicated regardless of model confidence.
"What happened is more important than what might happen."
""")
# -----------------------------
# Performance Metrics Function - DOCTRINAL METRICS
# -----------------------------
# 1. First, update the update_performance_metrics function to return what app.py expects:
def update_performance_metrics(scenario_name: str, scenarios=INCIDENT_SCENARIOS) -> tuple:
"""
Update performance metrics based on scenario
Returns: (detection_time_html, recall_quality_html, confidence_score_html, sequencing_stage_html)
MUST MATCH what app.py demo_btn.click() expects!
"""
# Scenario-specific metrics mapping WITH GRADIENT COLORS
metrics_config = {
"Cache": {
"detection_time": ("45s", "89% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("92%", "85% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("87%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Dampening", "Step 1/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.85
},
"Database": {
"detection_time": ("38s", "91% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("89%", "82% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("84%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Concurrency", "Step 2/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.82
},
"Kubernetes": {
"detection_time": ("52s", "87% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("91%", "84% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("86%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Observe", "Step 3/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.83
},
"Network": {
"detection_time": ("28s", "93% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("94%", "88% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("89%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Scale", "Step 4/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.88
},
"Default": {
"detection_time": ("42s", "90% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
"recall_quality": ("90%", "85% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
"confidence_score": ("85%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
"sequencing_stage": ("Dampening", "Step 1/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
"savings_multiplier": 0.85
}
}
# Determine scenario type
scenario_type = "Default"
for key in ["Cache", "Database", "Kubernetes", "Network", "Storage"]:
if key.lower() in scenario_name.lower():
scenario_type = key
break
# Get metrics for scenario type
metrics = metrics_config.get(scenario_type, metrics_config["Default"])
# Create HTML for each metric card WITH GRADIENT BORDERS
detection_time_html = f"""
β±οΈ
Detection Time
{metrics['detection_time'][0]}
{metrics['detection_time'][1]} than baseline
"""
recall_quality_html = f"""
π§
Recall Quality
{metrics['recall_quality'][0]}
{metrics['recall_quality'][1]}
"""
confidence_score_html = f"""
π―
Confidence Score
{metrics['confidence_score'][0]}
{metrics['confidence_score'][1]}
"""
sequencing_stage_html = f"""
π
Sequencing Stage
{metrics['sequencing_stage'][0]}
{metrics['sequencing_stage'][1]}
"""
logger.info(f"β
Updated performance metrics for {scenario_name} ({scenario_type} type)")
return detection_time_html, recall_quality_html, confidence_score_html, sequencing_stage_html
# 2. Update create_tab1_incident_demo to use the SAME variable names:
def create_tab1_incident_demo():
"""
Creates Live Demo Tab components with psychological advantage baked in.
Returns EXACTLY 26 values matching app.py expectations.
Canonical Question: "What should we do right now?" (Policy advice only)
No business metrics allowed in this tab.
"""
import gradio as gr
# 1. Core Scenario Selection (Psychological: User controls context)
scenario_dropdown = gr.Dropdown(
choices=["Retry Storm Amplification", "Cascading Dependency Failure",
"Partial Regional Outage", "Latent Configuration Drift"],
value="Retry Storm Amplification",
label="Select Incident Scenario",
info="Choose the operational context for policy evaluation"
)
# 2. Historical Evidence Panel (DOMINANT VISUALLY - psychological advantage)
historical_panel = gr.DataFrame(
value=[
["2024-03-15", "US-East", "Retry Storm", "Dampen β Observe β Scale", "Contained in 42s"],
["2024-02-28", "EU-West", "Cascading Failure", "Dampen β Isolate β Repair", "Contained in 3m 18s"],
["2024-01-12", "AP-South", "Config Drift", "Observe β Diagnose β Fix", "Prevented outage"]
],
headers=["Date", "Environment", "Pattern", "Sequence Applied", "Outcome"],
label="π Historical Evidence Dominates Models",
interactive=False,
height=200
)
# 3. Scenario Context Card
scenario_card = gr.Markdown("""
### Scenario Context: Retry Storm Amplification
**What we're seeing:** Client retries triggering backend amplification (8x retry multiplier)
**Current state:** 42% error rate, rising at 3.2%/minute
**Risk profile:** Service degradation β Partial outage in 8-12 minutes
""")
# 4. Telemetry Visualization (Psychological: Show what's knowable)
telemetry_viz = gr.Plot(
value=create_telemetry_plot(),
label="Real-time Telemetry: Error Rate & Load"
)
# 5. Business Impact Visualization (Psychological: Show consequences)
impact_viz = gr.Plot(
value=create_impact_plot(),
label="Predicted Impact Trajectory"
)
# 6. OBSERVATION GATE (CRITICAL PSYCHOLOGICAL ELEMENT)
observation_gate_placeholder = create_observation_gate_placeholder()
# 7. SEQUENCING VISUALIZATION (Policy, not reaction)
sequencing_panel = create_sequencing_visualization()
# 8. Workflow Header
workflow_header = gr.Markdown("### Policy Execution Sequence")
# 9-11. PROCESS DISPLAYS (Doctrinal: "Process" not "Agent")
detection_process = create_detection_display("DETECTION_PROCESS")
recall_process = create_recall_display("RECALL_PROCESS")
decision_process = create_decision_display("DECISION_PROCESS")
# 12-14. OSS/Enterprise Boundary
oss_section = gr.Markdown("#### π Open Source Capabilities")
enterprise_section = gr.Markdown("#### π Enterprise Governance")
oss_btn = gr.Button("View OSS Implementation", variant="secondary", size="sm")
enterprise_btn = gr.Button("Requires Enterprise License", variant="secondary", size="sm")
# 15-17. Approval & MCP
approval_toggle = gr.Checkbox(
label="Manual Approval Required",
value=True,
info="Policy: Human approval required for scaling actions during retry storms"
)
mcp_mode = gr.Radio(
choices=["Policy Evaluation", "Manual Control", "Full Autonomous"],
value="Policy Evaluation",
label="Execution Mode"
)
timeline_viz = gr.Plot(value=create_timeline_plot())
# 18-21. PERFORMANCE METRICS (CRITICAL: Must match app.py indices 18-21)
detection_time = gr.Number(
value=2.8,
label="Detection Time (seconds)",
precision=1
)
# INDEX 19: MUST BE recall_quality (not mttr)
recall_quality = gr.Number(
value=0.92,
label="Recall Quality Score",
precision=2,
info="Historical match confidence"
)
# INDEX 20: MUST BE confidence_score (not auto_heal)
confidence_score = gr.Number(
value=0.65,
label="System Confidence",
precision=2,
info="Policy execution confidence < 70% triggers observation gate"
)
# INDEX 21: MUST BE sequencing_stage (not savings)
sequencing_stage = gr.Textbox(
value="Dampening",
label="Current Sequencing Stage",
info="Policy: Dampen β Observe β Concurrent Fix β Scale"
)
# 22-24. Results Displays
oss_results_display = gr.Markdown("### OSS Results Will Appear Here")
enterprise_results_display = gr.Markdown("### Enterprise Results Will Appear Here")
approval_display = gr.Markdown("**Status:** Awaiting manual approval per policy")
# 25. DEMO BUTTON (CRITICAL: Index 25 must be demo_btn)
demo_btn = gr.Button(
"βΆοΈ Run Complete Walkthrough",
variant="primary",
size="lg",
elem_id="demo_primary_btn"
)
# RETURN EXACTLY 26 VALUES IN CORRECT ORDER
return (
scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz,
observation_gate_placeholder, sequencing_panel, workflow_header,
detection_process, recall_process, decision_process,
oss_section, enterprise_section, oss_btn, enterprise_btn,
approval_toggle, mcp_mode, timeline_viz,
detection_time, recall_quality, confidence_score, sequencing_stage, # β CORRECT VARIABLES
oss_results_display, enterprise_results_display, approval_display, demo_btn
)
# -----------------------------
# Tab 1: Live Incident Demo - UPDATED WITH RESTORED FUNCTIONS
# -----------------------------
def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple:
"""
Create doctrinally compliant incident demo tab.
Doctrinal: Language discipline, sequencing display, no early "critical"
"""
# Get the default scenario data
default_scenario_data = scenarios.get(default_scenario, {})
business_impact = default_scenario_data.get("business_impact", {})
metrics = default_scenario_data.get("metrics", {})
with gr.Row():
# Left Column: Scenario Selection & Live Visualization
with gr.Column(scale=1, variant="panel") as left_col:
# Scenario Selection with rich preview
scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value=default_scenario,
label="π― Select Variance Scenario",
info="Choose a production variance pattern to analyze",
interactive=True,
container=False
)
# ============ HISTORICAL EVIDENCE PANEL FIRST (RECALL DOMINANCE) ============
historical_panel = create_historical_evidence_panel(default_scenario_data)
# Scenario Card with doctrinally compliant language
scenario_card = gr.HTML(f"""
π {default_scenario}
{default_scenario_data.get('severity', 'HIGH_VARIANCE')}
Component:
{default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()}
Users Affected:
{metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'}
Revenue Risk:
${business_impact.get('revenue_risk_per_hour', 0):,}/hour
Detection Time:
45 seconds (Policy System)
{default_scenario_data.get('component', 'unknown').split('_')[0]}
variance
production
pattern
""")
# Visualization section
with gr.Row():
with gr.Column(scale=1):
telemetry_header = gr.Markdown("### π Live Telemetry")
telemetry_viz = gr.Plot(
label="",
show_label=False,
elem_id="telemetry_plot"
)
with gr.Column(scale=1):
impact_header = gr.Markdown("### π° Business Impact")
impact_viz = gr.Plot(
label="",
show_label=False,
elem_id="impact_plot"
)
# Middle Column: Process Workflow (NOT Agent Workflow)
with gr.Column(scale=2, variant="panel") as middle_col:
# ============ OBSERVATION GATE PLACEHOLDER ============
observation_gate_placeholder = create_observation_gate_placeholder()
# ============ SEQUENCING VISUALIZATION ============
sequencing_header = gr.Markdown("### π Sequencing Logic: Dampening β Concurrency β Observe β Scale")
sequencing_panel = create_sequencing_visualization()
# Process Workflow Header (NOT Agent Workflow)
workflow_header = gr.Markdown("## π Policy Process Workflow")
workflow_subheader = gr.Markdown("### How the system transforms variance into policy execution")
# Process Status Cards (NOT Agent Status Cards)
with gr.Row():
detection_process = create_detection_display()
recall_process = create_recall_display()
decision_process = create_decision_display()
# Mode Selection & Safety Controls
with gr.Row():
with gr.Column(scale=1):
approval_toggle = gr.CheckboxGroup(
choices=["π€ Require Human Approval"],
label="Safety Controls",
value=[],
info="Toggle human oversight"
)
with gr.Column(scale=2):
mcp_mode = gr.Radio(
choices=["π‘οΈ Advisory (OSS Only)", "π₯ Approval", "β‘ Autonomous"],
value="π‘οΈ Advisory (OSS Only)",
label="Policy Safety Mode",
info="Control execution safety level",
interactive=True
)
# OSS vs Enterprise Boundary Visualization
boundary_header = gr.Markdown("### π Policy vs Execution: The Safety Boundary")
with gr.Row():
oss_section = create_oss_advisory_section()
enterprise_section = gr.HTML("""
π°
Execution Edition
Full Execution & Learning - Commercial
REQUIRES LICENSE
β‘ Ready to Execute
AUTONOMOUS
βοΈ
Mode
Autonomous (Requires Enterprise license)
β‘
Expected Recovery
12 minutes (vs 45 min manual)
π₯
Users Protected
45,000 β 0 impacted
β
Enterprise executes with MCP safety
Enterprise edition adds execution, learning, and safety guarantees.
MCP safety modes: Advisory β Approval β Autonomous
""")
# Execution Controls
with gr.Row():
with gr.Column(scale=1):
oss_btn = gr.Button(
"π Run Policy Analysis",
variant="secondary",
size="lg"
)
oss_info = gr.Markdown("*Free, policy-only analysis*")
with gr.Column(scale=1):
enterprise_btn = gr.Button(
"π° Execute Enterprise Healing",
variant="primary",
size="lg"
)
enterprise_info = gr.Markdown("*Requires Enterprise license*")
# Timeline visualization
timeline_header = gr.Markdown("### β° Incident Timeline")
timeline_viz = gr.Plot(
create_timeline_comparison_plot(),
label="",
show_label=False,
elem_id="timeline_plot"
)
# Right Column: Results & Metrics
with gr.Column(scale=1, variant="panel") as right_col:
# Real-time Metrics Dashboard
metrics_header = gr.Markdown("## π Performance Metrics")
# Metric Cards Grid - MUST MATCH app.py expectations: detection_time, mttr, auto_heal, savings
detection_time = gr.HTML()
mttr = gr.HTML() # Mean Time to Resolve
auto_heal = gr.HTML() # Auto-heal rate
savings = gr.HTML() # Cost savings
# Results Display Areas
oss_results_header = gr.Markdown("### π Policy Analysis Results")
oss_results_display = gr.JSON(
label="",
value={
"status": "Analysis Pending",
"processes": ["Detection", "Recall", "Decision"],
"mode": "Advisory Only",
"action": "Generate Formal HealingIntent"
},
height=200
)
enterprise_results_header = gr.Markdown("### π° Execution Results")
enterprise_results_display = gr.JSON(
label="",
value={
"status": "Execution Pending",
"requires_license": True,
"available_modes": ["Approval", "Autonomous"],
"expected_outcome": "12m MTTR, $6.3K saved"
},
height=200
)
# Approval Status
approval_display = gr.HTML("""
π€ Human Approval Status
Not Required
Current Mode: Advisory (Policy Only)
Switch to "Approval" mode to enable human-in-the-loop workflows
1. System generates formal HealingIntent
2. Human reviews & approves contraindications
3. System executes with sequencing constraints
""")
# Demo Actions
demo_btn = gr.Button(
"βΆοΈ Run Complete Walkthrough",
variant="secondary",
size="lg"
)
demo_info = gr.Markdown("*Experience the full workflow from detection to resolution*")
# CRITICAL: Return EXACTLY 26 values that app.py expects
return (
# Left column returns (5 values)
scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz,
# Middle column returns (13 values)
observation_gate_placeholder, sequencing_panel, workflow_header, detection_process,
recall_process, decision_process, oss_section, enterprise_section, oss_btn, enterprise_btn,
approval_toggle, mcp_mode, timeline_viz,
# Right column returns (8 values - MUST BE: detection_time, mttr, auto_heal, savings, oss_results_display, enterprise_results_display, approval_display, demo_btn)
detection_time, mttr, auto_heal, savings,
oss_results_display, enterprise_results_display, approval_display, demo_btn
# DO NOT include: oss_info, enterprise_info, demo_info - app.py doesn't expect them
)
# -----------------------------
# NEW: Create Realism Panel (Updated for doctrinal compliance)
# -----------------------------
def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML:
"""
Create doctrinally compliant realism panel.
Updated to show formal HealingIntent fields and sequencing logic.
"""
ranked_actions = scenario_data.get("ranked_actions", [])
# Build ranked actions HTML with formal HealingIntent fields
actions_html = ""
for action in ranked_actions:
category = action.get("category", "unknown")
category_color = {
"dampening": "#3b82f6",
"concurrency_control": "#10b981",
"observation": "#8b5cf6",
"scaling": "#f59e0b"
}.get(category, "#64748b")
rank_color = "#3b82f6" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#64748b"
status = "β
RECOMMENDED" if action["rank"] == 1 else "π‘ SECONDARY" if action["rank"] == 2 else "π΄ CONTRAINDICATED"
# Formal HealingIntent fields
preconditions_html = ""
if action.get("preconditions"):
preconditions_html = f"""
Preconditions:
{"".join([f'
β’ {pre}
' for pre in action["preconditions"]])}
"""
contraindications_html = ""
if action.get("contraindicated_actions"):
contraindications_html = f"""
Contraindicated Actions:
{"".join([f'
β {contra}
' for contra in action["contraindicated_actions"]])}
"""
reversibility_html = ""
if action.get("reversibility_statement"):
reversibility_html = f"""
Reversibility Statement:
{action["reversibility_statement"]}
"""
historical_evidence_html = ""
if action.get("historical_evidence"):
historical_evidence_html = f"""
Historical Evidence:
{"".join([f'
π {evidence}
' for evidence in action["historical_evidence"]])}
"""
actions_html += f"""
{action['rank']}
{status} β’ {action.get('confidence', 0)}% confidence
{category.upper().replace('_', ' ')}
{action.get('action', 'No action specified')}
{action.get('confidence', 0)}%
{preconditions_html}
{contraindications_html}
{reversibility_html}
{historical_evidence_html}
Sequencing: {action.get('category', 'unknown').replace('_', ' ')} β’ {action.get('constraints', ['No constraints'])[0]}
"""
# Combine all panels
full_html = f"""
π― Formal HealingIntent Sequence
Policy-generated intents with preconditions, contraindications, and reversibility statements
DOCTRINAL COMPLIANCE v3.3.9+
{actions_html if actions_html else '
No ranked actions available
'}
π Doctrinal Sequencing Enforcement
1
Dampening
First in sequence
2
Concurrency
Then control
4
Scale
Only if necessary
Doctrinal Constraint: Scaling actions have lower confidence than dampening actions and appear last.
If retry amplification is detected, scaling is contraindicated entirely.
"""
return gr.HTML(full_html)
# -----------------------------
# Tab 2: Business ROI - Updated
# -----------------------------
def create_tab2_business_roi(scenarios=INCIDENT_SCENARIOS) -> tuple:
dashboard_output = gr.Plot(label="Executive Dashboard", show_label=True)
roi_scenario_dropdown = gr.Dropdown(
choices=list(scenarios.keys()),
value="Cache Miss Storm",
label="Scenario for ROI Analysis",
info="Select the primary incident type for ROI calculation"
)
monthly_slider = gr.Slider(
minimum=1,
maximum=50,
value=15,
step=1,
label="Monthly Incidents",
info="Average number of incidents per month"
)
team_slider = gr.Slider(
minimum=1,
maximum=50,
value=5,
step=1,
label="Team Size",
info="Number of engineers on reliability team"
)
calculate_btn = gr.Button("π Calculate Comprehensive ROI", variant="primary", size="lg")
roi_output = gr.JSON(label="ROI Analysis Results", value={})
roi_chart = gr.Plot(label="ROI Visualization")
return (dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider,
calculate_btn, roi_output, roi_chart)
# -----------------------------
# Tab 3: Enterprise Features - UPDATED WITH INSTALLATION STATUS
# -----------------------------
def create_tab3_enterprise_features() -> tuple:
# Get installation status
try:
from app import get_installation_status
installation = get_installation_status()
license_data = {
"status": "β
OSS Installed" if installation["oss_installed"] else "β οΈ OSS Not Installed",
"oss_version": installation["oss_version"] or "Not installed",
"enterprise_installed": installation["enterprise_installed"],
"enterprise_version": installation["enterprise_version"] or "Not installed",
"execution_allowed": installation["execution_allowed"],
"recommendations": installation["recommendations"],
"badges": installation["badges"]
}
# Update features table based on installation
features_data = [
["ARF OSS Package", "β
Installed" if installation["oss_installed"] else "β Not Installed", "OSS"],
["Self-Healing Core", "β
Active", "Enterprise"],
["RAG Graph Memory", "β
Active", "Both"],
["Predictive Analytics", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
["Audit Trail", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
["Compliance (SOC2)", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"]
]
except ImportError:
# Fallback if installation check fails
license_data = {
"status": "β οΈ Installation Check Failed",
"oss_version": "Unknown",
"enterprise_installed": False,
"recommendations": ["Run installation check"]
}
features_data = [
["Self-Healing Core", "β
Active", "Enterprise"],
["RAG Graph Memory", "β
Active", "Both"],
["Predictive Analytics", "π Enterprise", "Enterprise"],
["Audit Trail", "π Enterprise", "Enterprise"],
["Compliance (SOC2)", "π Enterprise", "Enterprise"],
["Multi-Cloud", "π Enterprise", "Enterprise"]
]
license_display = gr.JSON(
value=license_data,
label="π¦ Package Installation Status"
)
validate_btn = gr.Button("π Validate Installation", variant="secondary")
trial_btn = gr.Button("π Start 30-Day Trial", variant="secondary")
upgrade_btn = gr.Button("π Upgrade to Enterprise", variant="primary")
mcp_mode = gr.Dropdown(
choices=["advisory", "approval", "autonomous"],
value="advisory",
label="MCP Safety Mode"
)
# Initial mode info
mcp_mode_info = gr.JSON(
value={
"current_mode": "advisory",
"description": "OSS Edition - Analysis only, no execution",
"features": ["Incident analysis", "RAG similarity", "HealingIntent creation"],
"package": "agentic-reliability-framework==3.3.7",
"license": "Apache 2.0"
},
label="Mode Details"
)
integrations_data = [
["Prometheus", "β
Connected", "Monitoring"],
["Grafana", "β
Connected", "Visualization"],
["Slack", "π Enterprise", "Notifications"],
["PagerDuty", "π Enterprise", "Alerting"],
["Jira", "π Enterprise", "Ticketing"],
["Datadog", "π Enterprise", "Monitoring"]
]
features_table = gr.Dataframe(
headers=["Feature", "Status", "Edition"],
value=features_data,
label="Feature Comparison"
)
integrations_table = gr.Dataframe(
headers=["Integration", "Status", "Type"],
value=integrations_data,
label="Integration Status"
)
return (license_display, validate_btn, trial_btn, upgrade_btn,
mcp_mode, mcp_mode_info, features_table, integrations_table)
# -----------------------------
# Tab 4: Audit Trail
# -----------------------------
def create_tab4_audit_trail() -> tuple:
refresh_btn = gr.Button("π Refresh Audit Trail", variant="secondary")
clear_btn = gr.Button("ποΈ Clear History", variant="secondary")
export_btn = gr.Button("π₯ Export as JSON", variant="primary")
execution_headers = ["Time", "Scenario", "Mode", "Status", "Savings", "Details"]
incident_headers = ["Time", "Component", "Scenario", "Severity", "Status"]
execution_table = gr.Dataframe(
headers=execution_headers,
value=[],
label="Execution History"
)
incident_table = gr.Dataframe(
headers=incident_headers,
value=[],
label="Incident History"
)
export_text = gr.JSON(
value={"status": "Export ready"},
label="Export Data"
)
return (refresh_btn, clear_btn, export_btn, execution_table, incident_table, export_text)
# -----------------------------
# Tab 5: Learning Engine
# -----------------------------
def create_tab5_learning_engine() -> tuple:
learning_graph = gr.Plot(label="RAG Memory Graph")
graph_type = gr.Dropdown(
choices=["Incident Patterns", "Action-Outcome Chains", "System Dependencies"],
value="Incident Patterns",
label="Graph Type"
)
show_labels = gr.Checkbox(label="Show Labels", value=True)
search_query = gr.Textbox(label="Search Patterns", placeholder="Enter pattern to search...")
search_btn = gr.Button("π Search Patterns", variant="secondary")
clear_btn_search = gr.Button("ποΈ Clear Search", variant="secondary")
search_results = gr.JSON(
value={"status": "Ready for search"},
label="Search Results"
)
stats_display = gr.JSON(
value={"patterns": 42, "incidents": 156, "success_rate": "87.3%"},
label="Learning Statistics"
)
patterns_display = gr.JSON(
value={"common_patterns": ["cache_storm", "db_pool", "memory_leak"]},
label="Pattern Library"
)
performance_display = gr.JSON(
value={"accuracy": "94.2%", "recall": "89.7%", "precision": "92.1%"},
label="Agent Performance"
)
return (learning_graph, graph_type, show_labels, search_query, search_btn,
clear_btn_search, search_results, stats_display, patterns_display, performance_display)
# -----------------------------
# Footer
# -----------------------------
def create_footer() -> gr.HTML:
return gr.HTML("""
Agentic Reliability Framework Β© 2026
Production-grade multi-agent AI for autonomous system reliability intelligence
""")