diff --git "a/ui/components.py" "b/ui/components.py"
deleted file mode 100644--- "a/ui/components.py"
+++ /dev/null
@@ -1,2104 +0,0 @@
-"""
-Gradio-only UI components for ARF
-Ensures full compatibility with app.py
-NOW WITH DOCTRINAL COMPLIANCE: Psychological Advantage Enforcement
-UPDATED: Language discipline, observation gate rendering, recall panel dominance
-UPDATED: Metric discipline, sequencing display, no early "critical" terminology
-DOCTRINAL VERSION: 3.3.9+restraint
-"""
-
-import gradio as gr
-from typing import Dict, List, Any
-import logging
-import datetime
-import time
-import plotly.graph_objects as go
-import plotly.express as px
-import numpy as np
-
-logger = logging.getLogger(__name__)
-
-# Try to import scenarios from registry first
-try:
- from config.scenario_registry import ScenarioRegistry
- INCIDENT_SCENARIOS = ScenarioRegistry.load_scenarios()
- logger.info(f"Loaded {len(INCIDENT_SCENARIOS)} scenarios from registry")
-except ImportError:
- logger.warning("Scenario registry not available, falling back to demo scenarios")
- from demo.scenarios import INCIDENT_SCENARIOS
-
-# -----------------------------
-# Header & Status - DOCTRINAL LANGUAGE
-# -----------------------------
-def create_header(version="3.3.9") -> gr.HTML:
- return gr.HTML(f"""
-
-
-
- β οΈ
-
-
-
- Observation Gate: Awaiting confirmation.
-
-
- System restraint engaged
-
-
-
-
-
-
-
βΈοΈ
-
-
- Decision Intentionally Deferred
-
-
- The system has detected uncertainty ({confidence:.1f}% confidence)
- and has chosen to observe rather than act.
- Historical evidence indicates premature action increases risk by 47%, so the system is enforcing an observation-first policy.
-
-
-
-
-
-
-
-
- Confidence Threshold
-
-
- 70.0%
-
-
- Required for action
-
-
-
-
-
- Current Confidence
-
-
- {confidence:.1f}%
-
-
- Below threshold β Observe
-
-
-
-
-
-
-
-
-
- Observe ({confidence:.1f}%)
- Threshold (70%)
- Act (75%+)
-
-
-
- """
- else:
- html_content = f"""
-
-
-
- β
-
-
-
- OBSERVATION GATE CLEARED
-
-
- Confidence threshold met
-
-
-
-
-
-
-
β‘
-
-
- Proceed with Policy Action
-
-
- Confidence exceeds threshold. System may proceed with sequenced actions.
- Historical evidence will be consulted before any execution.
-
-
-
-
-
-
-
-
- Confidence Threshold
-
-
- 70.0%
-
-
- Required for action
-
-
-
-
-
- Current Confidence
-
-
- {confidence:.1f}%
-
-
- Above threshold β Proceed
-
-
-
-
-
-
- β
Safety Principle
-
-
- "When confidence is high, proceed with sequenced actions. Each step is validated
- against historical evidence before proceeding to the next."
-
-
-
-
-
-
- Ready ({confidence:.1f}%)
- Threshold (70%)
- Proceed
-
-
-
- """
-
- return gr.HTML(html_content)
-
-def create_sequencing_visualization() -> gr.HTML:
- """Creates the sequencing panel showing dampening-first progression.
-
- Doctrinal: Shows sequencing as policy, not reaction.
- """
- return gr.HTML("""
-
-
-
-
- π Doctrinal Sequencing: Policy Over Reaction
-
-
- System enforces sequencing regardless of prediction confidence
-
-
-
- POLICY ENFORCED
-
-
-
-
-
-
-
-
-
-
-
- 1
-
-
Dampening
-
Prevent amplification first
-
- REQUIRED
-
-
-
-
-
β
-
-
-
-
- 2
-
-
Concurrency
-
Manage load, then observe
-
- REQUIRED
-
-
-
-
-
β
-
-
-
-
- 3
-
-
Observe
-
Validate trends for 5+ minutes
-
- REQUIRED
-
-
-
-
-
β
-
-
-
-
- 4
-
-
Scale
-
Only if all previous succeed
-
- OPTIONAL
-
-
-
-
-
-
-
-
π―
-
-
- Doctrinal Constraint: Scaling Cannot Appear First
-
-
- If retry amplification is detected, scaling is contraindicated entirely.
- The system must observe stabilization before considering capacity increases.
- Historical evidence shows scaling-first fails 76% of the time during amplification.
-
-
-
-
-
-
-
-
-
Current Sequence State
-
Waiting for detection process
-
-
-
- π Sequence: 0/4
-
-
- β±οΈ Time: 0s
-
-
-
-
- """)
-
-def create_detection_display() -> gr.HTML:
- """Creates detection process HTML display."""
- return gr.HTML("""
-
-
-
-
- π΅οΈββοΈ
-
-
-
Detection Process
-
Telemetry analysis & pattern recognition
-
-
-
- STATUS: ACTIVE
-
-
-
-
-
-
Pattern Match
-
Retry Amplification
-
-
-
-
Detection Time
-
0.8 seconds
-
-
-
Severity
-
HIGH_VARIANCE
-
-
-
-
-
- β
Detected: Retry amplification pattern with exponential growth (r=1.8)
-
-
- Telemetry shows request rate doubling every 45 seconds. System flagged for sequencing enforcement.
-
-
-
-
-
-
Next Step:
-
Activate recall process
-
-
- π 12 metrics
- β±οΈ 0.8s latency
- π 3 patterns
-
-
-
- """)
-
-def create_recall_display() -> gr.HTML:
- """Creates recall process HTML display with historical evidence dominance."""
- return gr.HTML("""
-
-
-
-
- π§
-
-
-
Recall Process
-
Historical evidence & pattern matching
-
-
-
- STATUS: ACTIVE
-
-
-
-
-
- Recall Dominance: Evidence > Prediction
-
-
-
-
-
-
- β Scaling-First Failures
-
-
-
-
-
2024-11-15 β’ prod-east
-
FAILED
-
-
- Action: Scale during retry storm
-
-
- Outcome: Amplification increased 300%
-
-
- "Scaling during amplification worsens the problem"
-
-
-
-
-
-
2024-09-22 β’ staging
-
FAILED
-
-
- Action: Add capacity without dampening
-
-
- Outcome: 45 min outage, $8.2K loss
-
-
- "New capacity consumed by amplification loop"
-
-
-
-
-
-
-
- β
Dampening-First Successes
-
-
-
-
-
2024-12-03 β’ prod-west
-
SUCCESS
-
-
- Action: Request coalescing + backoff
-
-
- Outcome: Resolved in 8 min, $5.1K saved
-
-
- "Dampening broke amplification cycle"
-
-
-
-
-
-
2024-10-17 β’ prod-eu
-
SUCCESS
-
-
- Action: Circuit breaker + observability
-
-
- Outcome: 12 min recovery, 0 user impact
-
-
- "Sequencing prevented escalation"
-
-
-
-
-
-
-
-
- π― RECALL DECISION: Scaling contraindicated due to historical evidence
-
-
- Historical evidence (76% failure rate) dominates predictive confidence (92%).
- System will enforce dampening-first sequencing.
-
-
-
-
-
-
Evidence Weight:
-
Historical: 85% β’ Predictive: 15%
-
-
- π 8 incidents
- π― 76% failure rate
- π recall dominance
-
-
-
- """)
-
-def create_decision_display() -> gr.HTML:
- """Creates decision process HTML display."""
- return gr.HTML("""
-
-
-
-
- π―
-
-
-
Decision Process
-
HealingIntent creation & sequencing
-
-
-
- STATUS: ACTIVE
-
-
-
-
-
-
-
-
Formal HealingIntent Created
-
Preconditions checked, contraindications listed
-
-
- CONFIDENCE: 87.3%
-
-
-
-
-
-
Primary Action
-
-
- Implement request coalescing with exponential backoff (jitter: 25%)
-
-
-
-
-
-
Sequencing Rule
-
-
- dampening_first_then_observe_then_optional_scale
-
-
-
-
-
-
-
-
Preconditions
-
β’ Retry amplification detected
β’ Confidence > 70%
β’ No scaling contraindicated
-
-
-
-
Contraindications
-
β Scale during retry storm
β Add capacity immediately
β Restart during amplification
-
-
-
-
Reversibility
-
β
Backoff can be adjusted
β
Coalescing can be disabled
β
No stateful changes
-
-
-
-
-
-
-
-
β³
-
-
- Observation Gate: Awaiting confirmation
-
-
- System will observe for 5 minutes before proceeding to execution
-
-
-
-
-
-
-
- 0%
- Threshold: 70%
- 100%
-
-
-
-
-
-
Next Step:
-
Wait for observation gate clearance
-
-
- π formal intent
- π sequenced
- π― 87% confidence
-
-
-
- """)
-
-def create_oss_advisory_section() -> gr.HTML:
- """Creates OSS advisory section showing policy vs execution boundary."""
- return gr.HTML("""
-
-
-
π
-
-
Policy Edition
-
Analysis & Advisory Only - Apache 2.0
-
-
- PERMANENTLY SAFE
-
-
-
-
-
-
π HealingIntent Created
- 94% confidence
-
-
-
-
-
- β
-
-
-
Action Recommended
-
Implement request coalescing with exponential backoff
-
-
-
-
-
- π§
-
-
-
Pattern Match
-
Similar incident resolved with dampening (87% success rate)
-
-
-
-
-
- β οΈ
-
-
-
Contraindications
-
β
Checked (retry amplification detected)
-
-
-
-
-
- π
-
-
-
Sequencing Rule
-
dampening_first_then_observe_then_optional_scale
-
-
-
-
-
-
-
-
-
- π« OSS STOPS HERE - No Execution
-
-
-
-
-
- OSS provides policy advice only. Enterprise edition required for execution.
-
-
- This architectural boundary ensures safety by design.
-
-
-
-
- """)
-
-def create_timeline_comparison_plot() -> go.Figure:
- """Creates timeline comparison plot for incident resolution."""
- fig = go.Figure()
-
- # Timeline data
- manual_times = [0, 5, 15, 30, 45, 60]
- manual_users = [45000, 45000, 42000, 35000, 20000, 5000]
-
- arf_times = [0, 0.8, 1.5, 3, 8, 12]
- arf_users = [45000, 45000, 45000, 42000, 15000, 0]
-
- # Add traces
- fig.add_trace(go.Scatter(
- x=manual_times,
- y=manual_users,
- mode='lines+markers',
- name='Manual Resolution',
- line=dict(color='#ef4444', width=3, dash='dash'),
- marker=dict(size=8, color='#ef4444'),
- hovertemplate='Time: %{x}min
Users Affected: %{y:,}
'
- ))
-
- fig.add_trace(go.Scatter(
- x=arf_times,
- y=arf_users,
- mode='lines+markers',
- name='ARF Policy Execution',
- line=dict(color='#10b981', width=4),
- marker=dict(size=10, color='#10b981'),
- hovertemplate='Time: %{x}min
Users Affected: %{y:,}
'
- ))
-
- # Add vertical lines for key events
- fig.add_vline(x=0.8, line_width=2, line_dash="dot", line_color="#3b82f6",
- annotation_text="Detection", annotation_position="top right")
- fig.add_vline(x=1.5, line_width=2, line_dash="dot", line_color="#8b5cf6",
- annotation_text="Recall", annotation_position="top right")
- fig.add_vline(x=3, line_width=2, line_dash="dot", line_color="#f59e0b",
- annotation_text="Decision", annotation_position="top right")
- fig.add_vline(x=8, line_width=2, line_dash="dot", line_color="#10b981",
- annotation_text="Resolution", annotation_position="top right")
-
- # Update layout
- fig.update_layout(
- title=dict(
- text="β° Incident Timeline: Manual vs Policy Execution",
- font=dict(size=18, color='#1e293b'),
- x=0.5
- ),
- xaxis=dict(
- title="Time (minutes)",
- gridcolor='#e2e8f0',
- tickformat=',d'
- ),
- yaxis=dict(
- title="Users Affected",
- gridcolor='#e2e8f0',
- tickformat=','
- ),
- plot_bgcolor='white',
- paper_bgcolor='white',
- font=dict(family="Inter, sans-serif", color="#475569"),
- hovermode='x unified',
- legend=dict(
- orientation="h",
- yanchor="bottom",
- y=1.02,
- xanchor="center",
- x=0.5,
- bgcolor='rgba(255, 255, 255, 0.8)',
- bordercolor='#e2e8f0',
- borderwidth=1
- ),
- margin=dict(l=50, r=50, t=80, b=50),
- height=400
- )
-
- # Add annotation for savings
- fig.add_annotation(
- x=12,
- y=0,
- text="π 73% faster resolution
π° $6.3K saved",
- showarrow=True,
- arrowhead=2,
- arrowsize=1,
- arrowwidth=2,
- arrowcolor="#10b981",
- ax=50,
- ay=-50,
- bgcolor="#f0fdf4",
- bordercolor="#10b981",
- borderwidth=2,
- font=dict(size=12, color="#065f46")
- )
-
- return fig
-
-# -----------------------------
-# NEW: Observation Gate Renderer - CRITICAL PSYCHOLOGICAL FIX
-# -----------------------------
-def render_observation_gate(healing_intent: Dict[str, Any]) -> gr.HTML:
- """
- Render observation gate state as active restraint, not passive waiting.
- Doctrinal: Make inaction an explicit, powerful decision.
- """
- deferral_reason = healing_intent.get("deferral_reason", "uncertainty_too_high_for_action")
- frozen_until = healing_intent.get("decision_frozen_until", "")
- confidence = healing_intent.get("confidence", 0.0)
-
- # Parse timestamp for countdown
- countdown_text = ""
- if frozen_until:
- try:
- frozen_dt = datetime.datetime.fromisoformat(frozen_until.replace("Z", "+00:00"))
- now = datetime.datetime.now(datetime.timezone.utc)
- if frozen_dt.tzinfo is None:
- frozen_dt = frozen_dt.replace(tzinfo=datetime.timezone.utc)
- time_left = frozen_dt - now
- minutes_left = max(0, int(time_left.total_seconds() / 60))
- countdown_text = f"{minutes_left}m"
- except:
- countdown_text = "5m"
-
- return gr.HTML(f"""
-
-
-
β³
-
-
- Decision Intentionally Deferred
-
-
- System state: observe_only β’ Confidence: {confidence:.1%}
-
-
-
- ACTIVE RESTRAINT
-
-
-
-
-
-
REASON FOR DEFERRAL
-
- {deferral_reason.replace('_', ' ').title()}
-
-
-
-
-
NEXT EVALUATION
-
- System re-evaluates in: {countdown_text}
-
-
-
-
-
-
-
π―
-
-
- This is a System Choice, Not a Limitation
-
-
- The system is choosing not to act because uncertainty exceeds policy thresholds.
- This restraint demonstrates operational maturityβeagerness is a liability in production.
-
- "What you are seeing is not waiting. It is judgment under uncertainty."
-
-
-
-
-
-
-
PREVENTED ACTIONS (CONTRANDICATED)
-
- scale_during_retry_storm
- add_capacity_during_amplification
- any_action_during_high_uncertainty
-
-
-
- """)
-
-# -----------------------------
-# NEW: Historical Evidence Panel - RECALL DOMINANCE
-# -----------------------------
-def create_historical_evidence_panel(scenario_data: Dict[str, Any]) -> gr.HTML:
- """
- Create doctrinally compliant historical evidence panel.
- Must be visually dominant with dates/environments.
- """
- # Extract from scenario or use defaults
- historical_panel = scenario_data.get("historical_evidence_panel", {})
-
- scaling_failures = historical_panel.get("scaling_first_failures", [])
- dampening_successes = historical_panel.get("dampening_first_successes", [])
-
- # Build failures HTML
- failures_html = ""
- for i, failure in enumerate(scaling_failures[:3]): # Show top 3
- failures_html += f"""
-
-
-
{failure.get('date', 'Unknown')} β’ {failure.get('environment', 'Unknown')}
-
FAILED
-
-
- Action: {failure.get('action', 'Unknown')}
-
-
- Outcome: {failure.get('outcome', 'Unknown')}
-
-
- {failure.get('lesson', 'No lesson captured')}
-
-
- """
-
- # Build successes HTML
- successes_html = ""
- for i, success in enumerate(dampening_successes[:3]): # Show top 3
- successes_html += f"""
-
-
-
{success.get('date', 'Unknown')} β’ {success.get('environment', 'Unknown')}
-
SUCCESS
-
-
- Action: {success.get('action', 'Unknown')}
-
-
- Outcome: {success.get('outcome', 'Unknown')}
-
-
- {success.get('lesson', 'No lesson captured')}
-
-
- """
-
- return gr.HTML(f"""
-
-
-
-
- π§ Historical Evidence (Why Sequencing Matters)
-
-
- Real outcomes from similar incidentsβthis evidence dominates decision logic
-
-
-
- Historical evidence outweighs model confidence.
-
-
-
-
-
-
- β Scaling-First Failures
-
- {failures_html if failures_html else """
-
-
π
-
Scaling-First Failures (Evidence Present)
-
- """}
-
-
-
-
- β
Dampening-First Successes
-
- {successes_html if successes_html else """
-
-
π
-
Dampening-First Successes (Evidence Present)
-
- """}
-
-
-
-
-
-
π―
-
-
- If history shows failure, the system will not repeat it.
-
-
- The system prioritizes historical evidence over predictive confidence.
- If scaling-first failed in similar conditions, scaling is contraindicated regardless of model confidence.
-
- "What happened is more important than what might happen."
-
-
-
-
-
- """)
-
-# -----------------------------
-# Performance Metrics Function - DOCTRINAL METRICS
-# -----------------------------
-# 1. First, update the update_performance_metrics function to return what app.py expects:
-def update_performance_metrics(scenario_name: str, scenarios=INCIDENT_SCENARIOS) -> tuple:
- """
- Update performance metrics based on scenario
- Returns: (detection_time_html, recall_quality_html, confidence_score_html, sequencing_stage_html)
- MUST MATCH what app.py demo_btn.click() expects!
- """
-
- # Scenario-specific metrics mapping WITH GRADIENT COLORS
- metrics_config = {
- "Cache": {
- "detection_time": ("45s", "89% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
- "recall_quality": ("92%", "85% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
- "confidence_score": ("87%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
- "sequencing_stage": ("Dampening", "Step 1/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
- "savings_multiplier": 0.85
- },
- "Database": {
- "detection_time": ("38s", "91% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
- "recall_quality": ("89%", "82% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
- "confidence_score": ("84%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
- "sequencing_stage": ("Concurrency", "Step 2/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
- "savings_multiplier": 0.82
- },
- "Kubernetes": {
- "detection_time": ("52s", "87% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
- "recall_quality": ("91%", "84% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
- "confidence_score": ("86%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
- "sequencing_stage": ("Observe", "Step 3/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
- "savings_multiplier": 0.83
- },
- "Network": {
- "detection_time": ("28s", "93% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
- "recall_quality": ("94%", "88% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
- "confidence_score": ("89%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
- "sequencing_stage": ("Scale", "Step 4/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
- "savings_multiplier": 0.88
- },
- "Default": {
- "detection_time": ("42s", "90% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"),
- "recall_quality": ("90%", "85% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"),
- "confidence_score": ("85%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"),
- "sequencing_stage": ("Dampening", "Step 1/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"),
- "savings_multiplier": 0.85
- }
- }
-
- # Determine scenario type
- scenario_type = "Default"
- for key in ["Cache", "Database", "Kubernetes", "Network", "Storage"]:
- if key.lower() in scenario_name.lower():
- scenario_type = key
- break
-
- # Get metrics for scenario type
- metrics = metrics_config.get(scenario_type, metrics_config["Default"])
-
- # Create HTML for each metric card WITH GRADIENT BORDERS
- detection_time_html = f"""
-
-
β±οΈ
-
-
Detection Time
-
{metrics['detection_time'][0]}
-
{metrics['detection_time'][1]} than baseline
-
-
- """
-
- recall_quality_html = f"""
-
-
π§
-
-
Recall Quality
-
{metrics['recall_quality'][0]}
-
{metrics['recall_quality'][1]}
-
-
- """
-
- confidence_score_html = f"""
-
-
π―
-
-
Confidence Score
-
{metrics['confidence_score'][0]}
-
{metrics['confidence_score'][1]}
-
-
- """
-
- sequencing_stage_html = f"""
-
-
π
-
-
Sequencing Stage
-
{metrics['sequencing_stage'][0]}
-
{metrics['sequencing_stage'][1]}
-
-
- """
-
- logger.info(f"β
Updated performance metrics for {scenario_name} ({scenario_type} type)")
- return detection_time_html, recall_quality_html, confidence_score_html, sequencing_stage_html
-
-# 2. Update create_tab1_incident_demo to use the SAME variable names:
-
-def create_tab1_incident_demo():
- """
- Creates Live Demo Tab components with psychological advantage baked in.
- Returns EXACTLY 26 values matching app.py expectations.
-
- Canonical Question: "What should we do right now?" (Policy advice only)
- No business metrics allowed in this tab.
- """
- import gradio as gr
-
- # 1. Core Scenario Selection (Psychological: User controls context)
- scenario_dropdown = gr.Dropdown(
- choices=["Retry Storm Amplification", "Cascading Dependency Failure",
- "Partial Regional Outage", "Latent Configuration Drift"],
- value="Retry Storm Amplification",
- label="Select Incident Scenario",
- info="Choose the operational context for policy evaluation"
- )
-
- # 2. Historical Evidence Panel (DOMINANT VISUALLY - psychological advantage)
- historical_panel = gr.DataFrame(
- value=[
- ["2024-03-15", "US-East", "Retry Storm", "Dampen β Observe β Scale", "Contained in 42s"],
- ["2024-02-28", "EU-West", "Cascading Failure", "Dampen β Isolate β Repair", "Contained in 3m 18s"],
- ["2024-01-12", "AP-South", "Config Drift", "Observe β Diagnose β Fix", "Prevented outage"]
- ],
- headers=["Date", "Environment", "Pattern", "Sequence Applied", "Outcome"],
- label="π Historical Evidence Dominates Models",
- interactive=False,
- height=200
- )
-
- # 3. Scenario Context Card
- scenario_card = gr.Markdown("""
- ### Scenario Context: Retry Storm Amplification
- **What we're seeing:** Client retries triggering backend amplification (8x retry multiplier)
- **Current state:** 42% error rate, rising at 3.2%/minute
- **Risk profile:** Service degradation β Partial outage in 8-12 minutes
- """)
-
- # 4. Telemetry Visualization (Psychological: Show what's knowable)
- telemetry_viz = gr.Plot(
- value=create_telemetry_plot(),
- label="Real-time Telemetry: Error Rate & Load"
- )
-
- # 5. Business Impact Visualization (Psychological: Show consequences)
- impact_viz = gr.Plot(
- value=create_impact_plot(),
- label="Predicted Impact Trajectory"
- )
-
- # 6. OBSERVATION GATE (CRITICAL PSYCHOLOGICAL ELEMENT)
- observation_gate_placeholder = create_observation_gate_placeholder()
-
- # 7. SEQUENCING VISUALIZATION (Policy, not reaction)
- sequencing_panel = create_sequencing_visualization()
-
- # 8. Workflow Header
- workflow_header = gr.Markdown("### Policy Execution Sequence")
-
- # 9-11. PROCESS DISPLAYS (Doctrinal: "Process" not "Agent")
- detection_process = create_detection_display("DETECTION_PROCESS")
- recall_process = create_recall_display("RECALL_PROCESS")
- decision_process = create_decision_display("DECISION_PROCESS")
-
- # 12-14. OSS/Enterprise Boundary
- oss_section = gr.Markdown("#### π Open Source Capabilities")
- enterprise_section = gr.Markdown("#### π Enterprise Governance")
- oss_btn = gr.Button("View OSS Implementation", variant="secondary", size="sm")
- enterprise_btn = gr.Button("Requires Enterprise License", variant="secondary", size="sm")
-
- # 15-17. Approval & MCP
- approval_toggle = gr.Checkbox(
- label="Manual Approval Required",
- value=True,
- info="Policy: Human approval required for scaling actions during retry storms"
- )
- mcp_mode = gr.Radio(
- choices=["Policy Evaluation", "Manual Control", "Full Autonomous"],
- value="Policy Evaluation",
- label="Execution Mode"
- )
- timeline_viz = gr.Plot(value=create_timeline_plot())
-
- # 18-21. PERFORMANCE METRICS (CRITICAL: Must match app.py indices 18-21)
- detection_time = gr.Number(
- value=2.8,
- label="Detection Time (seconds)",
- precision=1
- )
-
- # INDEX 19: MUST BE recall_quality (not mttr)
- recall_quality = gr.Number(
- value=0.92,
- label="Recall Quality Score",
- precision=2,
- info="Historical match confidence"
- )
-
- # INDEX 20: MUST BE confidence_score (not auto_heal)
- confidence_score = gr.Number(
- value=0.65,
- label="System Confidence",
- precision=2,
- info="Policy execution confidence < 70% triggers observation gate"
- )
-
- # INDEX 21: MUST BE sequencing_stage (not savings)
- sequencing_stage = gr.Textbox(
- value="Dampening",
- label="Current Sequencing Stage",
- info="Policy: Dampen β Observe β Concurrent Fix β Scale"
- )
-
- # 22-24. Results Displays
- oss_results_display = gr.Markdown("### OSS Results Will Appear Here")
- enterprise_results_display = gr.Markdown("### Enterprise Results Will Appear Here")
- approval_display = gr.Markdown("**Status:** Awaiting manual approval per policy")
-
- # 25. DEMO BUTTON (CRITICAL: Index 25 must be demo_btn)
- demo_btn = gr.Button(
- "βΆοΈ Run Complete Walkthrough",
- variant="primary",
- size="lg",
- elem_id="demo_primary_btn"
- )
-
- # RETURN EXACTLY 26 VALUES IN CORRECT ORDER
- return (
- scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz,
- observation_gate_placeholder, sequencing_panel, workflow_header,
- detection_process, recall_process, decision_process,
- oss_section, enterprise_section, oss_btn, enterprise_btn,
- approval_toggle, mcp_mode, timeline_viz,
- detection_time, recall_quality, confidence_score, sequencing_stage, # β CORRECT VARIABLES
- oss_results_display, enterprise_results_display, approval_display, demo_btn
- )
-# -----------------------------
-# Tab 1: Live Incident Demo - UPDATED WITH RESTORED FUNCTIONS
-# -----------------------------
-def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple:
- """
- Create doctrinally compliant incident demo tab.
- Doctrinal: Language discipline, sequencing display, no early "critical"
- """
-
- # Get the default scenario data
- default_scenario_data = scenarios.get(default_scenario, {})
- business_impact = default_scenario_data.get("business_impact", {})
- metrics = default_scenario_data.get("metrics", {})
-
- with gr.Row():
- # Left Column: Scenario Selection & Live Visualization
- with gr.Column(scale=1, variant="panel") as left_col:
- # Scenario Selection with rich preview
- scenario_dropdown = gr.Dropdown(
- choices=list(scenarios.keys()),
- value=default_scenario,
- label="π― Select Variance Scenario",
- info="Choose a production variance pattern to analyze",
- interactive=True,
- container=False
- )
-
- # ============ HISTORICAL EVIDENCE PANEL FIRST (RECALL DOMINANCE) ============
- historical_panel = create_historical_evidence_panel(default_scenario_data)
-
- # Scenario Card with doctrinally compliant language
- scenario_card = gr.HTML(f"""
-
-
-
π {default_scenario}
- {default_scenario_data.get('severity', 'HIGH_VARIANCE')}
-
-
-
- Component:
- {default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()}
-
-
- Users Affected:
- {metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'}
-
-
- Revenue Risk:
- ${business_impact.get('revenue_risk_per_hour', 0):,}/hour
-
-
- Detection Time:
- 45 seconds (Policy System)
-
-
- {default_scenario_data.get('component', 'unknown').split('_')[0]}
- variance
- production
- pattern
-
-
-
- """)
-
- # Visualization section
- with gr.Row():
- with gr.Column(scale=1):
- telemetry_header = gr.Markdown("### π Live Telemetry")
- telemetry_viz = gr.Plot(
- label="",
- show_label=False,
- elem_id="telemetry_plot"
- )
-
- with gr.Column(scale=1):
- impact_header = gr.Markdown("### π° Business Impact")
- impact_viz = gr.Plot(
- label="",
- show_label=False,
- elem_id="impact_plot"
- )
-
- # Middle Column: Process Workflow (NOT Agent Workflow)
- with gr.Column(scale=2, variant="panel") as middle_col:
- # ============ OBSERVATION GATE PLACEHOLDER ============
- observation_gate_placeholder = create_observation_gate_placeholder()
-
- # ============ SEQUENCING VISUALIZATION ============
- sequencing_header = gr.Markdown("### π Sequencing Logic: Dampening β Concurrency β Observe β Scale")
- sequencing_panel = create_sequencing_visualization()
-
- # Process Workflow Header (NOT Agent Workflow)
- workflow_header = gr.Markdown("## π Policy Process Workflow")
- workflow_subheader = gr.Markdown("### How the system transforms variance into policy execution")
-
- # Process Status Cards (NOT Agent Status Cards)
- with gr.Row():
- detection_process = create_detection_display()
- recall_process = create_recall_display()
- decision_process = create_decision_display()
-
- # Mode Selection & Safety Controls
- with gr.Row():
- with gr.Column(scale=1):
- approval_toggle = gr.CheckboxGroup(
- choices=["π€ Require Human Approval"],
- label="Safety Controls",
- value=[],
- info="Toggle human oversight"
- )
-
- with gr.Column(scale=2):
- mcp_mode = gr.Radio(
- choices=["π‘οΈ Advisory (OSS Only)", "π₯ Approval", "β‘ Autonomous"],
- value="π‘οΈ Advisory (OSS Only)",
- label="Policy Safety Mode",
- info="Control execution safety level",
- interactive=True
- )
-
- # OSS vs Enterprise Boundary Visualization
- boundary_header = gr.Markdown("### π Policy vs Execution: The Safety Boundary")
-
- with gr.Row():
- oss_section = create_oss_advisory_section()
-
- enterprise_section = gr.HTML("""
-
-
-
π°
-
-
Execution Edition
-
Full Execution & Learning - Commercial
-
-
- REQUIRES LICENSE
-
-
-
-
-
-
β‘ Ready to Execute
- AUTONOMOUS
-
-
-
-
-
- βοΈ
-
-
-
Mode
-
Autonomous (Requires Enterprise license)
-
-
-
-
-
- β‘
-
-
-
Expected Recovery
-
12 minutes (vs 45 min manual)
-
-
-
-
-
- π°
-
-
-
Cost Avoided
-
$6,375
-
-
-
-
-
- π₯
-
-
-
Users Protected
-
45,000 β 0 impacted
-
-
-
-
-
-
-
-
-
- β
Enterprise executes with MCP safety
-
-
-
-
-
- Enterprise edition adds execution, learning, and safety guarantees.
-
-
- MCP safety modes: Advisory β Approval β Autonomous
-
-
-
-
- """)
-
- # Execution Controls
- with gr.Row():
- with gr.Column(scale=1):
- oss_btn = gr.Button(
- "π Run Policy Analysis",
- variant="secondary",
- size="lg"
- )
- oss_info = gr.Markdown("*Free, policy-only analysis*")
-
- with gr.Column(scale=1):
- enterprise_btn = gr.Button(
- "π° Execute Enterprise Healing",
- variant="primary",
- size="lg"
- )
- enterprise_info = gr.Markdown("*Requires Enterprise license*")
-
- # Timeline visualization
- timeline_header = gr.Markdown("### β° Incident Timeline")
- timeline_viz = gr.Plot(
- create_timeline_comparison_plot(),
- label="",
- show_label=False,
- elem_id="timeline_plot"
- )
-
- # Right Column: Results & Metrics
- with gr.Column(scale=1, variant="panel") as right_col:
- # Real-time Metrics Dashboard
- metrics_header = gr.Markdown("## π Performance Metrics")
-
- # Metric Cards Grid - MUST MATCH app.py expectations: detection_time, mttr, auto_heal, savings
- detection_time = gr.HTML()
- mttr = gr.HTML() # Mean Time to Resolve
- auto_heal = gr.HTML() # Auto-heal rate
- savings = gr.HTML() # Cost savings
-
- # Results Display Areas
- oss_results_header = gr.Markdown("### π Policy Analysis Results")
- oss_results_display = gr.JSON(
- label="",
- value={
- "status": "Analysis Pending",
- "processes": ["Detection", "Recall", "Decision"],
- "mode": "Advisory Only",
- "action": "Generate Formal HealingIntent"
- },
- height=200
- )
-
- enterprise_results_header = gr.Markdown("### π° Execution Results")
- enterprise_results_display = gr.JSON(
- label="",
- value={
- "status": "Execution Pending",
- "requires_license": True,
- "available_modes": ["Approval", "Autonomous"],
- "expected_outcome": "12m MTTR, $6.3K saved"
- },
- height=200
- )
-
- # Approval Status
- approval_display = gr.HTML("""
-
-
-
π€ Human Approval Status
- Not Required
-
-
-
Current Mode: Advisory (Policy Only)
-
Switch to "Approval" mode to enable human-in-the-loop workflows
-
-
1. System generates formal HealingIntent
-
2. Human reviews & approves contraindications
-
3. System executes with sequencing constraints
-
-
-
- """)
-
- # Demo Actions
- demo_btn = gr.Button(
- "βΆοΈ Run Complete Walkthrough",
- variant="secondary",
- size="lg"
- )
- demo_info = gr.Markdown("*Experience the full workflow from detection to resolution*")
-
- # CRITICAL: Return EXACTLY 26 values that app.py expects
- return (
- # Left column returns (5 values)
- scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz,
- # Middle column returns (13 values)
- observation_gate_placeholder, sequencing_panel, workflow_header, detection_process,
- recall_process, decision_process, oss_section, enterprise_section, oss_btn, enterprise_btn,
- approval_toggle, mcp_mode, timeline_viz,
- # Right column returns (8 values - MUST BE: detection_time, mttr, auto_heal, savings, oss_results_display, enterprise_results_display, approval_display, demo_btn)
- detection_time, mttr, auto_heal, savings,
- oss_results_display, enterprise_results_display, approval_display, demo_btn
- # DO NOT include: oss_info, enterprise_info, demo_info - app.py doesn't expect them
- )
-
-# -----------------------------
-# NEW: Create Realism Panel (Updated for doctrinal compliance)
-# -----------------------------
-def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML:
- """
- Create doctrinally compliant realism panel.
- Updated to show formal HealingIntent fields and sequencing logic.
- """
- ranked_actions = scenario_data.get("ranked_actions", [])
-
- # Build ranked actions HTML with formal HealingIntent fields
- actions_html = ""
- for action in ranked_actions:
- category = action.get("category", "unknown")
- category_color = {
- "dampening": "#3b82f6",
- "concurrency_control": "#10b981",
- "observation": "#8b5cf6",
- "scaling": "#f59e0b"
- }.get(category, "#64748b")
-
- rank_color = "#3b82f6" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#64748b"
- status = "β
RECOMMENDED" if action["rank"] == 1 else "π‘ SECONDARY" if action["rank"] == 2 else "π΄ CONTRAINDICATED"
-
- # Formal HealingIntent fields
- preconditions_html = ""
- if action.get("preconditions"):
- preconditions_html = f"""
-
-
Preconditions:
- {"".join([f'
β’ {pre}
' for pre in action["preconditions"]])}
-
- """
-
- contraindications_html = ""
- if action.get("contraindicated_actions"):
- contraindications_html = f"""
-
-
Contraindicated Actions:
- {"".join([f'
β {contra}
' for contra in action["contraindicated_actions"]])}
-
- """
-
- reversibility_html = ""
- if action.get("reversibility_statement"):
- reversibility_html = f"""
-
-
Reversibility Statement:
-
{action["reversibility_statement"]}
-
- """
-
- historical_evidence_html = ""
- if action.get("historical_evidence"):
- historical_evidence_html = f"""
-
-
Historical Evidence:
- {"".join([f'
π {evidence}
' for evidence in action["historical_evidence"]])}
-
- """
-
- actions_html += f"""
-
-
-
-
-
- {action['rank']}
-
-
- {status} β’ {action.get('confidence', 0)}% confidence
-
-
- {category.upper().replace('_', ' ')}
-
-
-
- {action.get('action', 'No action specified')}
-
-
-
- {action.get('confidence', 0)}%
-
-
-
- {preconditions_html}
- {contraindications_html}
- {reversibility_html}
- {historical_evidence_html}
-
-
- Sequencing: {action.get('category', 'unknown').replace('_', ' ')} β’ {action.get('constraints', ['No constraints'])[0]}
-
-
- """
-
- # Combine all panels
- full_html = f"""
-
-
-
-
-
- π― Formal HealingIntent Sequence
-
-
- Policy-generated intents with preconditions, contraindications, and reversibility statements
-
-
-
- DOCTRINAL COMPLIANCE v3.3.9+
-
-
-
- {actions_html if actions_html else '
No ranked actions available
'}
-
-
-
-
- π Doctrinal Sequencing Enforcement
-
-
-
-
-
1
-
Dampening
-
First in sequence
-
-
-
2
-
Concurrency
-
Then control
-
-
-
3
-
Observe
-
Then validate
-
-
-
4
-
Scale
-
Only if necessary
-
-
-
-
- Doctrinal Constraint: Scaling actions have lower confidence than dampening actions and appear last.
- If retry amplification is detected, scaling is contraindicated entirely.
-
-
-
-
-
- """
-
- return gr.HTML(full_html)
-
-# -----------------------------
-# Tab 2: Business ROI - Updated
-# -----------------------------
-def create_tab2_business_roi(scenarios=INCIDENT_SCENARIOS) -> tuple:
- dashboard_output = gr.Plot(label="Executive Dashboard", show_label=True)
- roi_scenario_dropdown = gr.Dropdown(
- choices=list(scenarios.keys()),
- value="Cache Miss Storm",
- label="Scenario for ROI Analysis",
- info="Select the primary incident type for ROI calculation"
- )
- monthly_slider = gr.Slider(
- minimum=1,
- maximum=50,
- value=15,
- step=1,
- label="Monthly Incidents",
- info="Average number of incidents per month"
- )
- team_slider = gr.Slider(
- minimum=1,
- maximum=50,
- value=5,
- step=1,
- label="Team Size",
- info="Number of engineers on reliability team"
- )
- calculate_btn = gr.Button("π Calculate Comprehensive ROI", variant="primary", size="lg")
- roi_output = gr.JSON(label="ROI Analysis Results", value={})
- roi_chart = gr.Plot(label="ROI Visualization")
-
- return (dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider,
- calculate_btn, roi_output, roi_chart)
-
-# -----------------------------
-# Tab 3: Enterprise Features - UPDATED WITH INSTALLATION STATUS
-# -----------------------------
-def create_tab3_enterprise_features() -> tuple:
- # Get installation status
- try:
- from app import get_installation_status
- installation = get_installation_status()
-
- license_data = {
- "status": "β
OSS Installed" if installation["oss_installed"] else "β οΈ OSS Not Installed",
- "oss_version": installation["oss_version"] or "Not installed",
- "enterprise_installed": installation["enterprise_installed"],
- "enterprise_version": installation["enterprise_version"] or "Not installed",
- "execution_allowed": installation["execution_allowed"],
- "recommendations": installation["recommendations"],
- "badges": installation["badges"]
- }
-
- # Update features table based on installation
- features_data = [
- ["ARF OSS Package", "β
Installed" if installation["oss_installed"] else "β Not Installed", "OSS"],
- ["Self-Healing Core", "β
Active", "Enterprise"],
- ["RAG Graph Memory", "β
Active", "Both"],
- ["Predictive Analytics", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
- ["Audit Trail", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"],
- ["Compliance (SOC2)", "π Enterprise" if not installation["enterprise_installed"] else "β
Available", "Enterprise"]
- ]
-
- except ImportError:
- # Fallback if installation check fails
- license_data = {
- "status": "β οΈ Installation Check Failed",
- "oss_version": "Unknown",
- "enterprise_installed": False,
- "recommendations": ["Run installation check"]
- }
- features_data = [
- ["Self-Healing Core", "β
Active", "Enterprise"],
- ["RAG Graph Memory", "β
Active", "Both"],
- ["Predictive Analytics", "π Enterprise", "Enterprise"],
- ["Audit Trail", "π Enterprise", "Enterprise"],
- ["Compliance (SOC2)", "π Enterprise", "Enterprise"],
- ["Multi-Cloud", "π Enterprise", "Enterprise"]
- ]
-
- license_display = gr.JSON(
- value=license_data,
- label="π¦ Package Installation Status"
- )
-
- validate_btn = gr.Button("π Validate Installation", variant="secondary")
- trial_btn = gr.Button("π Start 30-Day Trial", variant="secondary")
- upgrade_btn = gr.Button("π Upgrade to Enterprise", variant="primary")
-
- mcp_mode = gr.Dropdown(
- choices=["advisory", "approval", "autonomous"],
- value="advisory",
- label="MCP Safety Mode"
- )
-
- # Initial mode info
- mcp_mode_info = gr.JSON(
- value={
- "current_mode": "advisory",
- "description": "OSS Edition - Analysis only, no execution",
- "features": ["Incident analysis", "RAG similarity", "HealingIntent creation"],
- "package": "agentic-reliability-framework==3.3.7",
- "license": "Apache 2.0"
- },
- label="Mode Details"
- )
-
- integrations_data = [
- ["Prometheus", "β
Connected", "Monitoring"],
- ["Grafana", "β
Connected", "Visualization"],
- ["Slack", "π Enterprise", "Notifications"],
- ["PagerDuty", "π Enterprise", "Alerting"],
- ["Jira", "π Enterprise", "Ticketing"],
- ["Datadog", "π Enterprise", "Monitoring"]
- ]
-
- features_table = gr.Dataframe(
- headers=["Feature", "Status", "Edition"],
- value=features_data,
- label="Feature Comparison"
- )
- integrations_table = gr.Dataframe(
- headers=["Integration", "Status", "Type"],
- value=integrations_data,
- label="Integration Status"
- )
-
- return (license_display, validate_btn, trial_btn, upgrade_btn,
- mcp_mode, mcp_mode_info, features_table, integrations_table)
-
-# -----------------------------
-# Tab 4: Audit Trail
-# -----------------------------
-def create_tab4_audit_trail() -> tuple:
- refresh_btn = gr.Button("π Refresh Audit Trail", variant="secondary")
- clear_btn = gr.Button("ποΈ Clear History", variant="secondary")
- export_btn = gr.Button("π₯ Export as JSON", variant="primary")
-
- execution_headers = ["Time", "Scenario", "Mode", "Status", "Savings", "Details"]
- incident_headers = ["Time", "Component", "Scenario", "Severity", "Status"]
-
- execution_table = gr.Dataframe(
- headers=execution_headers,
- value=[],
- label="Execution History"
- )
- incident_table = gr.Dataframe(
- headers=incident_headers,
- value=[],
- label="Incident History"
- )
- export_text = gr.JSON(
- value={"status": "Export ready"},
- label="Export Data"
- )
-
- return (refresh_btn, clear_btn, export_btn, execution_table, incident_table, export_text)
-
-# -----------------------------
-# Tab 5: Learning Engine
-# -----------------------------
-def create_tab5_learning_engine() -> tuple:
- learning_graph = gr.Plot(label="RAG Memory Graph")
- graph_type = gr.Dropdown(
- choices=["Incident Patterns", "Action-Outcome Chains", "System Dependencies"],
- value="Incident Patterns",
- label="Graph Type"
- )
- show_labels = gr.Checkbox(label="Show Labels", value=True)
- search_query = gr.Textbox(label="Search Patterns", placeholder="Enter pattern to search...")
- search_btn = gr.Button("π Search Patterns", variant="secondary")
- clear_btn_search = gr.Button("ποΈ Clear Search", variant="secondary")
- search_results = gr.JSON(
- value={"status": "Ready for search"},
- label="Search Results"
- )
- stats_display = gr.JSON(
- value={"patterns": 42, "incidents": 156, "success_rate": "87.3%"},
- label="Learning Statistics"
- )
- patterns_display = gr.JSON(
- value={"common_patterns": ["cache_storm", "db_pool", "memory_leak"]},
- label="Pattern Library"
- )
- performance_display = gr.JSON(
- value={"accuracy": "94.2%", "recall": "89.7%", "precision": "92.1%"},
- label="Agent Performance"
- )
-
- return (learning_graph, graph_type, show_labels, search_query, search_btn,
- clear_btn_search, search_results, stats_display, patterns_display, performance_display)
-
-# -----------------------------
-# Footer
-# -----------------------------
-def create_footer() -> gr.HTML:
- return gr.HTML("""
-
-
Agentic Reliability Framework Β© 2026
-
Production-grade multi-agent AI for autonomous system reliability intelligence
-
-
- """)
\ No newline at end of file