diff --git "a/ui/components.py" "b/ui/components.py" deleted file mode 100644--- "a/ui/components.py" +++ /dev/null @@ -1,2104 +0,0 @@ -""" -Gradio-only UI components for ARF -Ensures full compatibility with app.py -NOW WITH DOCTRINAL COMPLIANCE: Psychological Advantage Enforcement -UPDATED: Language discipline, observation gate rendering, recall panel dominance -UPDATED: Metric discipline, sequencing display, no early "critical" terminology -DOCTRINAL VERSION: 3.3.9+restraint -""" - -import gradio as gr -from typing import Dict, List, Any -import logging -import datetime -import time -import plotly.graph_objects as go -import plotly.express as px -import numpy as np - -logger = logging.getLogger(__name__) - -# Try to import scenarios from registry first -try: - from config.scenario_registry import ScenarioRegistry - INCIDENT_SCENARIOS = ScenarioRegistry.load_scenarios() - logger.info(f"Loaded {len(INCIDENT_SCENARIOS)} scenarios from registry") -except ImportError: - logger.warning("Scenario registry not available, falling back to demo scenarios") - from demo.scenarios import INCIDENT_SCENARIOS - -# ----------------------------- -# Header & Status - DOCTRINAL LANGUAGE -# ----------------------------- -def create_header(version="3.3.9") -> gr.HTML: - return gr.HTML(f""" -
-
- Reliability Framework -
-

- v{version} (Policy + Enterprise Edition) -

-

- Production-grade policy execution for system reliability intelligence -

- - -
- πŸ—οΈ Architecture: OSS advises β†’ Enterprise executes -
-
-
-
- """) - -def create_status_bar() -> gr.HTML: - return gr.HTML(""" -
- - βœ… Policy System Online - - - βœ… ARF OSS v3.3.9 - - - 🏒 Enterprise Execution - -
- """) - -# ----------------------------- -# CRITICAL RESTORED FUNCTIONS - Missing from current version -# ----------------------------- - -def create_observation_gate_placeholder(confidence: float = 65.0) -> gr.HTML: - """Observation gate that demonstrates psychological restraint. - - Shows 'Decision Intentionally Deferred' when confidence is below threshold. - This is a critical doctrinal component showing restraint as a system choice. - """ - if confidence < 70.0: - html_content = f""" -
-
-
- ⚠️ -
-
-

- Observation Gate: Awaiting confirmation. -

-

- System restraint engaged -

-
-
- -
-
-
⏸️
-
-

- Decision Intentionally Deferred -

-

- The system has detected uncertainty ({confidence:.1f}% confidence) - and has chosen to observe rather than act. - Historical evidence indicates premature action increases risk by 47%, so the system is enforcing an observation-first policy. -

-
-
-
- -
-
-
- Confidence Threshold -
-
- 70.0% -
-
- Required for action -
-
- -
-
- Current Confidence -
-
- {confidence:.1f}% -
-
- Below threshold β†’ Observe -
-
-
- -
- -
-
-
-
-
-
-
-
-
- Observe ({confidence:.1f}%) - Threshold (70%) - Act (75%+) -
-
-
- """ - else: - html_content = f""" -
-
-
- βœ… -
-
-

- OBSERVATION GATE CLEARED -

-

- Confidence threshold met -

-
-
- -
-
-
⚑
-
-

- Proceed with Policy Action -

-

- Confidence exceeds threshold. System may proceed with sequenced actions. - Historical evidence will be consulted before any execution. -

-
-
-
- -
-
-
- Confidence Threshold -
-
- 70.0% -
-
- Required for action -
-
- -
-
- Current Confidence -
-
- {confidence:.1f}% -
-
- Above threshold β†’ Proceed -
-
-
- -
-
- βœ… Safety Principle -
-

- "When confidence is high, proceed with sequenced actions. Each step is validated - against historical evidence before proceeding to the next." -

-
- -
-
-
-
-
-
-
-
-
- Ready ({confidence:.1f}%) - Threshold (70%) - Proceed -
-
-
- """ - - return gr.HTML(html_content) - -def create_sequencing_visualization() -> gr.HTML: - """Creates the sequencing panel showing dampening-first progression. - - Doctrinal: Shows sequencing as policy, not reaction. - """ - return gr.HTML(""" -
-
-
-

- πŸ”„ Doctrinal Sequencing: Policy Over Reaction -

-

- System enforces sequencing regardless of prediction confidence -

-
-
- POLICY ENFORCED -
-
- - -
- -
- - -
-
- 1 -
-
Dampening
-
Prevent amplification first
-
- REQUIRED -
-
- - -
β†’
- - -
-
- 2 -
-
Concurrency
-
Manage load, then observe
-
- REQUIRED -
-
- - -
β†’
- - -
-
- 3 -
-
Observe
-
Validate trends for 5+ minutes
-
- REQUIRED -
-
- - -
β†’
- - -
-
- 4 -
-
Scale
-
Only if all previous succeed
-
- OPTIONAL -
-
-
- - -
-
-
🎯
-
-
- Doctrinal Constraint: Scaling Cannot Appear First -
-
- If retry amplification is detected, scaling is contraindicated entirely. - The system must observe stabilization before considering capacity increases. - Historical evidence shows scaling-first fails 76% of the time during amplification. -
-
-
-
- - -
-
-
Current Sequence State
-
Waiting for detection process
-
-
-
- πŸ”„ Sequence: 0/4 -
-
- ⏱️ Time: 0s -
-
-
-
- """) - -def create_detection_display() -> gr.HTML: - """Creates detection process HTML display.""" - return gr.HTML(""" -
-
-
-
- πŸ•΅οΈβ€β™‚οΈ -
-
-

Detection Process

-

Telemetry analysis & pattern recognition

-
-
-
- STATUS: ACTIVE -
-
- -
-
-
Pattern Match
-
Retry Amplification
-
-
-
Confidence
-
92.7%
-
-
-
Detection Time
-
0.8 seconds
-
-
-
Severity
-
HIGH_VARIANCE
-
-
- -
-
- βœ… Detected: Retry amplification pattern with exponential growth (r=1.8) -
-
- Telemetry shows request rate doubling every 45 seconds. System flagged for sequencing enforcement. -
-
- -
-
-
Next Step:
-
Activate recall process
-
-
- πŸ“Š 12 metrics - ⏱️ 0.8s latency - πŸ” 3 patterns -
-
-
- """) - -def create_recall_display() -> gr.HTML: - """Creates recall process HTML display with historical evidence dominance.""" - return gr.HTML(""" -
-
-
-
- 🧠 -
-
-

Recall Process

-

Historical evidence & pattern matching

-
-
-
- STATUS: ACTIVE -
-
- -
-
- Recall Dominance: Evidence > Prediction -
- -
- -
-
- β›” Scaling-First Failures -
- -
-
-
2024-11-15 β€’ prod-east
-
FAILED
-
-
- Action: Scale during retry storm -
-
- Outcome: Amplification increased 300% -
-
- "Scaling during amplification worsens the problem" -
-
- -
-
-
2024-09-22 β€’ staging
-
FAILED
-
-
- Action: Add capacity without dampening -
-
- Outcome: 45 min outage, $8.2K loss -
-
- "New capacity consumed by amplification loop" -
-
-
- - -
-
- βœ… Dampening-First Successes -
- -
-
-
2024-12-03 β€’ prod-west
-
SUCCESS
-
-
- Action: Request coalescing + backoff -
-
- Outcome: Resolved in 8 min, $5.1K saved -
-
- "Dampening broke amplification cycle" -
-
- -
-
-
2024-10-17 β€’ prod-eu
-
SUCCESS
-
-
- Action: Circuit breaker + observability -
-
- Outcome: 12 min recovery, 0 user impact -
-
- "Sequencing prevented escalation" -
-
-
-
-
- -
-
- 🎯 RECALL DECISION: Scaling contraindicated due to historical evidence -
-
- Historical evidence (76% failure rate) dominates predictive confidence (92%). - System will enforce dampening-first sequencing. -
-
- -
-
-
Evidence Weight:
-
Historical: 85% β€’ Predictive: 15%
-
-
- πŸ“š 8 incidents - 🎯 76% failure rate - πŸ† recall dominance -
-
-
- """) - -def create_decision_display() -> gr.HTML: - """Creates decision process HTML display.""" - return gr.HTML(""" -
-
-
-
- 🎯 -
-
-

Decision Process

-

HealingIntent creation & sequencing

-
-
-
- STATUS: ACTIVE -
-
- - -
-
-
-
Formal HealingIntent Created
-

Preconditions checked, contraindications listed

-
-
- CONFIDENCE: 87.3% -
-
- -
-
-
Primary Action
-
-
- Implement request coalescing with exponential backoff (jitter: 25%) -
-
-
- -
-
Sequencing Rule
-
-
- dampening_first_then_observe_then_optional_scale -
-
-
-
- -
-
-
Preconditions
-
β€’ Retry amplification detected
β€’ Confidence > 70%
β€’ No scaling contraindicated
-
- -
-
Contraindications
-
β›” Scale during retry storm
β›” Add capacity immediately
β›” Restart during amplification
-
- -
-
Reversibility
-
βœ… Backoff can be adjusted
βœ… Coalescing can be disabled
βœ… No stateful changes
-
-
-
- - -
-
-
⏳
-
-
- Observation Gate: Awaiting confirmation -
-
- System will observe for 5 minutes before proceeding to execution -
-
-
- -
-
-
- -
- 0% - Threshold: 70% - 100% -
-
- -
-
-
Next Step:
-
Wait for observation gate clearance
-
-
- πŸ“ formal intent - πŸ”„ sequenced - 🎯 87% confidence -
-
-
- """) - -def create_oss_advisory_section() -> gr.HTML: - """Creates OSS advisory section showing policy vs execution boundary.""" - return gr.HTML(""" -
-
-
πŸ†“
-
-

Policy Edition

-

Analysis & Advisory Only - Apache 2.0

-
-
- PERMANENTLY SAFE -
-
- -
-
-

πŸ“ HealingIntent Created

- 94% confidence -
- -
-
-
- βœ… -
-
-
Action Recommended
-
Implement request coalescing with exponential backoff
-
-
- -
-
- 🧠 -
-
-
Pattern Match
-
Similar incident resolved with dampening (87% success rate)
-
-
- -
-
- ⚠️ -
-
-
Contraindications
-
βœ… Checked (retry amplification detected)
-
-
- -
-
- πŸ”„ -
-
-
Sequencing Rule
-
dampening_first_then_observe_then_optional_scale
-
-
-
-
- - -
-
-
- 🚫 OSS STOPS HERE - No Execution -
-
- -
-
- OSS provides policy advice only. Enterprise edition required for execution. -
-
- This architectural boundary ensures safety by design. -
-
-
-
- """) - -def create_timeline_comparison_plot() -> go.Figure: - """Creates timeline comparison plot for incident resolution.""" - fig = go.Figure() - - # Timeline data - manual_times = [0, 5, 15, 30, 45, 60] - manual_users = [45000, 45000, 42000, 35000, 20000, 5000] - - arf_times = [0, 0.8, 1.5, 3, 8, 12] - arf_users = [45000, 45000, 45000, 42000, 15000, 0] - - # Add traces - fig.add_trace(go.Scatter( - x=manual_times, - y=manual_users, - mode='lines+markers', - name='Manual Resolution', - line=dict(color='#ef4444', width=3, dash='dash'), - marker=dict(size=8, color='#ef4444'), - hovertemplate='Time: %{x}min
Users Affected: %{y:,}' - )) - - fig.add_trace(go.Scatter( - x=arf_times, - y=arf_users, - mode='lines+markers', - name='ARF Policy Execution', - line=dict(color='#10b981', width=4), - marker=dict(size=10, color='#10b981'), - hovertemplate='Time: %{x}min
Users Affected: %{y:,}' - )) - - # Add vertical lines for key events - fig.add_vline(x=0.8, line_width=2, line_dash="dot", line_color="#3b82f6", - annotation_text="Detection", annotation_position="top right") - fig.add_vline(x=1.5, line_width=2, line_dash="dot", line_color="#8b5cf6", - annotation_text="Recall", annotation_position="top right") - fig.add_vline(x=3, line_width=2, line_dash="dot", line_color="#f59e0b", - annotation_text="Decision", annotation_position="top right") - fig.add_vline(x=8, line_width=2, line_dash="dot", line_color="#10b981", - annotation_text="Resolution", annotation_position="top right") - - # Update layout - fig.update_layout( - title=dict( - text="⏰ Incident Timeline: Manual vs Policy Execution", - font=dict(size=18, color='#1e293b'), - x=0.5 - ), - xaxis=dict( - title="Time (minutes)", - gridcolor='#e2e8f0', - tickformat=',d' - ), - yaxis=dict( - title="Users Affected", - gridcolor='#e2e8f0', - tickformat=',' - ), - plot_bgcolor='white', - paper_bgcolor='white', - font=dict(family="Inter, sans-serif", color="#475569"), - hovermode='x unified', - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="center", - x=0.5, - bgcolor='rgba(255, 255, 255, 0.8)', - bordercolor='#e2e8f0', - borderwidth=1 - ), - margin=dict(l=50, r=50, t=80, b=50), - height=400 - ) - - # Add annotation for savings - fig.add_annotation( - x=12, - y=0, - text="πŸš€ 73% faster resolution
πŸ’° $6.3K saved", - showarrow=True, - arrowhead=2, - arrowsize=1, - arrowwidth=2, - arrowcolor="#10b981", - ax=50, - ay=-50, - bgcolor="#f0fdf4", - bordercolor="#10b981", - borderwidth=2, - font=dict(size=12, color="#065f46") - ) - - return fig - -# ----------------------------- -# NEW: Observation Gate Renderer - CRITICAL PSYCHOLOGICAL FIX -# ----------------------------- -def render_observation_gate(healing_intent: Dict[str, Any]) -> gr.HTML: - """ - Render observation gate state as active restraint, not passive waiting. - Doctrinal: Make inaction an explicit, powerful decision. - """ - deferral_reason = healing_intent.get("deferral_reason", "uncertainty_too_high_for_action") - frozen_until = healing_intent.get("decision_frozen_until", "") - confidence = healing_intent.get("confidence", 0.0) - - # Parse timestamp for countdown - countdown_text = "" - if frozen_until: - try: - frozen_dt = datetime.datetime.fromisoformat(frozen_until.replace("Z", "+00:00")) - now = datetime.datetime.now(datetime.timezone.utc) - if frozen_dt.tzinfo is None: - frozen_dt = frozen_dt.replace(tzinfo=datetime.timezone.utc) - time_left = frozen_dt - now - minutes_left = max(0, int(time_left.total_seconds() / 60)) - countdown_text = f"{minutes_left}m" - except: - countdown_text = "5m" - - return gr.HTML(f""" -
-
-
⏳
-
-

- Decision Intentionally Deferred -

-

- System state: observe_only β€’ Confidence: {confidence:.1%} -

-
-
- ACTIVE RESTRAINT -
-
- -
-
-
REASON FOR DEFERRAL
-
- {deferral_reason.replace('_', ' ').title()} -
-
- -
-
NEXT EVALUATION
-
- System re-evaluates in: {countdown_text} -
-
-
- -
-
-
🎯
-
-
- This is a System Choice, Not a Limitation -
-
- The system is choosing not to act because uncertainty exceeds policy thresholds. - This restraint demonstrates operational maturityβ€”eagerness is a liability in production. -

- "What you are seeing is not waiting. It is judgment under uncertainty." -
-
-
-
- -
-
PREVENTED ACTIONS (CONTRANDICATED)
-
- scale_during_retry_storm - add_capacity_during_amplification - any_action_during_high_uncertainty -
-
-
- """) - -# ----------------------------- -# NEW: Historical Evidence Panel - RECALL DOMINANCE -# ----------------------------- -def create_historical_evidence_panel(scenario_data: Dict[str, Any]) -> gr.HTML: - """ - Create doctrinally compliant historical evidence panel. - Must be visually dominant with dates/environments. - """ - # Extract from scenario or use defaults - historical_panel = scenario_data.get("historical_evidence_panel", {}) - - scaling_failures = historical_panel.get("scaling_first_failures", []) - dampening_successes = historical_panel.get("dampening_first_successes", []) - - # Build failures HTML - failures_html = "" - for i, failure in enumerate(scaling_failures[:3]): # Show top 3 - failures_html += f""" -
-
-
{failure.get('date', 'Unknown')} β€’ {failure.get('environment', 'Unknown')}
-
FAILED
-
-
- Action: {failure.get('action', 'Unknown')} -
-
- Outcome: {failure.get('outcome', 'Unknown')} -
-
- {failure.get('lesson', 'No lesson captured')} -
-
- """ - - # Build successes HTML - successes_html = "" - for i, success in enumerate(dampening_successes[:3]): # Show top 3 - successes_html += f""" -
-
-
{success.get('date', 'Unknown')} β€’ {success.get('environment', 'Unknown')}
-
SUCCESS
-
-
- Action: {success.get('action', 'Unknown')} -
-
- Outcome: {success.get('outcome', 'Unknown')} -
-
- {success.get('lesson', 'No lesson captured')} -
-
- """ - - return gr.HTML(f""" -
-
-
-

- 🧠 Historical Evidence (Why Sequencing Matters) -

-

- Real outcomes from similar incidentsβ€”this evidence dominates decision logic -

-
-
- Historical evidence outweighs model confidence. -
-
- -
-
-
- β›” Scaling-First Failures -
- {failures_html if failures_html else """ -
-
πŸ“Š
-
Scaling-First Failures (Evidence Present)
-
- """} -
- -
-
- βœ… Dampening-First Successes -
- {successes_html if successes_html else """ -
-
πŸ“Š
-
Dampening-First Successes (Evidence Present)
-
- """} -
-
- -
-
-
🎯
-
-
- If history shows failure, the system will not repeat it. -
-
- The system prioritizes historical evidence over predictive confidence. - If scaling-first failed in similar conditions, scaling is contraindicated regardless of model confidence. -

- "What happened is more important than what might happen." -
-
-
-
-
- """) - -# ----------------------------- -# Performance Metrics Function - DOCTRINAL METRICS -# ----------------------------- -# 1. First, update the update_performance_metrics function to return what app.py expects: -def update_performance_metrics(scenario_name: str, scenarios=INCIDENT_SCENARIOS) -> tuple: - """ - Update performance metrics based on scenario - Returns: (detection_time_html, recall_quality_html, confidence_score_html, sequencing_stage_html) - MUST MATCH what app.py demo_btn.click() expects! - """ - - # Scenario-specific metrics mapping WITH GRADIENT COLORS - metrics_config = { - "Cache": { - "detection_time": ("45s", "89% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), - "recall_quality": ("92%", "85% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), - "confidence_score": ("87%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), - "sequencing_stage": ("Dampening", "Step 1/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"), - "savings_multiplier": 0.85 - }, - "Database": { - "detection_time": ("38s", "91% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), - "recall_quality": ("89%", "82% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), - "confidence_score": ("84%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), - "sequencing_stage": ("Concurrency", "Step 2/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"), - "savings_multiplier": 0.82 - }, - "Kubernetes": { - "detection_time": ("52s", "87% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), - "recall_quality": ("91%", "84% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), - "confidence_score": ("86%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), - "sequencing_stage": ("Observe", "Step 3/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"), - "savings_multiplier": 0.83 - }, - "Network": { - "detection_time": ("28s", "93% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), - "recall_quality": ("94%", "88% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), - "confidence_score": ("89%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), - "sequencing_stage": ("Scale", "Step 4/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"), - "savings_multiplier": 0.88 - }, - "Default": { - "detection_time": ("42s", "90% faster", "linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%)"), - "recall_quality": ("90%", "85% accuracy", "linear-gradient(135deg, #10b981 0%, #047857 100%)"), - "confidence_score": ("85%", "High certainty", "linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%)"), - "sequencing_stage": ("Dampening", "Step 1/4", "linear-gradient(135deg, #f59e0b 0%, #d97706 100%)"), - "savings_multiplier": 0.85 - } - } - - # Determine scenario type - scenario_type = "Default" - for key in ["Cache", "Database", "Kubernetes", "Network", "Storage"]: - if key.lower() in scenario_name.lower(): - scenario_type = key - break - - # Get metrics for scenario type - metrics = metrics_config.get(scenario_type, metrics_config["Default"]) - - # Create HTML for each metric card WITH GRADIENT BORDERS - detection_time_html = f""" -
-
⏱️
-
-

Detection Time

-

{metrics['detection_time'][0]}

-

{metrics['detection_time'][1]} than baseline

-
-
- """ - - recall_quality_html = f""" -
-
🧠
-
-

Recall Quality

-

{metrics['recall_quality'][0]}

-

{metrics['recall_quality'][1]}

-
-
- """ - - confidence_score_html = f""" -
-
🎯
-
-

Confidence Score

-

{metrics['confidence_score'][0]}

-

{metrics['confidence_score'][1]}

-
-
- """ - - sequencing_stage_html = f""" -
-
πŸ”
-
-

Sequencing Stage

-

{metrics['sequencing_stage'][0]}

-

{metrics['sequencing_stage'][1]}

-
-
- """ - - logger.info(f"βœ… Updated performance metrics for {scenario_name} ({scenario_type} type)") - return detection_time_html, recall_quality_html, confidence_score_html, sequencing_stage_html - -# 2. Update create_tab1_incident_demo to use the SAME variable names: - -def create_tab1_incident_demo(): - """ - Creates Live Demo Tab components with psychological advantage baked in. - Returns EXACTLY 26 values matching app.py expectations. - - Canonical Question: "What should we do right now?" (Policy advice only) - No business metrics allowed in this tab. - """ - import gradio as gr - - # 1. Core Scenario Selection (Psychological: User controls context) - scenario_dropdown = gr.Dropdown( - choices=["Retry Storm Amplification", "Cascading Dependency Failure", - "Partial Regional Outage", "Latent Configuration Drift"], - value="Retry Storm Amplification", - label="Select Incident Scenario", - info="Choose the operational context for policy evaluation" - ) - - # 2. Historical Evidence Panel (DOMINANT VISUALLY - psychological advantage) - historical_panel = gr.DataFrame( - value=[ - ["2024-03-15", "US-East", "Retry Storm", "Dampen β†’ Observe β†’ Scale", "Contained in 42s"], - ["2024-02-28", "EU-West", "Cascading Failure", "Dampen β†’ Isolate β†’ Repair", "Contained in 3m 18s"], - ["2024-01-12", "AP-South", "Config Drift", "Observe β†’ Diagnose β†’ Fix", "Prevented outage"] - ], - headers=["Date", "Environment", "Pattern", "Sequence Applied", "Outcome"], - label="πŸ“Š Historical Evidence Dominates Models", - interactive=False, - height=200 - ) - - # 3. Scenario Context Card - scenario_card = gr.Markdown(""" - ### Scenario Context: Retry Storm Amplification - **What we're seeing:** Client retries triggering backend amplification (8x retry multiplier) - **Current state:** 42% error rate, rising at 3.2%/minute - **Risk profile:** Service degradation β†’ Partial outage in 8-12 minutes - """) - - # 4. Telemetry Visualization (Psychological: Show what's knowable) - telemetry_viz = gr.Plot( - value=create_telemetry_plot(), - label="Real-time Telemetry: Error Rate & Load" - ) - - # 5. Business Impact Visualization (Psychological: Show consequences) - impact_viz = gr.Plot( - value=create_impact_plot(), - label="Predicted Impact Trajectory" - ) - - # 6. OBSERVATION GATE (CRITICAL PSYCHOLOGICAL ELEMENT) - observation_gate_placeholder = create_observation_gate_placeholder() - - # 7. SEQUENCING VISUALIZATION (Policy, not reaction) - sequencing_panel = create_sequencing_visualization() - - # 8. Workflow Header - workflow_header = gr.Markdown("### Policy Execution Sequence") - - # 9-11. PROCESS DISPLAYS (Doctrinal: "Process" not "Agent") - detection_process = create_detection_display("DETECTION_PROCESS") - recall_process = create_recall_display("RECALL_PROCESS") - decision_process = create_decision_display("DECISION_PROCESS") - - # 12-14. OSS/Enterprise Boundary - oss_section = gr.Markdown("#### πŸ”“ Open Source Capabilities") - enterprise_section = gr.Markdown("#### πŸ”’ Enterprise Governance") - oss_btn = gr.Button("View OSS Implementation", variant="secondary", size="sm") - enterprise_btn = gr.Button("Requires Enterprise License", variant="secondary", size="sm") - - # 15-17. Approval & MCP - approval_toggle = gr.Checkbox( - label="Manual Approval Required", - value=True, - info="Policy: Human approval required for scaling actions during retry storms" - ) - mcp_mode = gr.Radio( - choices=["Policy Evaluation", "Manual Control", "Full Autonomous"], - value="Policy Evaluation", - label="Execution Mode" - ) - timeline_viz = gr.Plot(value=create_timeline_plot()) - - # 18-21. PERFORMANCE METRICS (CRITICAL: Must match app.py indices 18-21) - detection_time = gr.Number( - value=2.8, - label="Detection Time (seconds)", - precision=1 - ) - - # INDEX 19: MUST BE recall_quality (not mttr) - recall_quality = gr.Number( - value=0.92, - label="Recall Quality Score", - precision=2, - info="Historical match confidence" - ) - - # INDEX 20: MUST BE confidence_score (not auto_heal) - confidence_score = gr.Number( - value=0.65, - label="System Confidence", - precision=2, - info="Policy execution confidence < 70% triggers observation gate" - ) - - # INDEX 21: MUST BE sequencing_stage (not savings) - sequencing_stage = gr.Textbox( - value="Dampening", - label="Current Sequencing Stage", - info="Policy: Dampen β†’ Observe β†’ Concurrent Fix β†’ Scale" - ) - - # 22-24. Results Displays - oss_results_display = gr.Markdown("### OSS Results Will Appear Here") - enterprise_results_display = gr.Markdown("### Enterprise Results Will Appear Here") - approval_display = gr.Markdown("**Status:** Awaiting manual approval per policy") - - # 25. DEMO BUTTON (CRITICAL: Index 25 must be demo_btn) - demo_btn = gr.Button( - "▢️ Run Complete Walkthrough", - variant="primary", - size="lg", - elem_id="demo_primary_btn" - ) - - # RETURN EXACTLY 26 VALUES IN CORRECT ORDER - return ( - scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz, - observation_gate_placeholder, sequencing_panel, workflow_header, - detection_process, recall_process, decision_process, - oss_section, enterprise_section, oss_btn, enterprise_btn, - approval_toggle, mcp_mode, timeline_viz, - detection_time, recall_quality, confidence_score, sequencing_stage, # ← CORRECT VARIABLES - oss_results_display, enterprise_results_display, approval_display, demo_btn - ) -# ----------------------------- -# Tab 1: Live Incident Demo - UPDATED WITH RESTORED FUNCTIONS -# ----------------------------- -def create_tab1_incident_demo(scenarios=INCIDENT_SCENARIOS, default_scenario="Cache Miss Storm") -> tuple: - """ - Create doctrinally compliant incident demo tab. - Doctrinal: Language discipline, sequencing display, no early "critical" - """ - - # Get the default scenario data - default_scenario_data = scenarios.get(default_scenario, {}) - business_impact = default_scenario_data.get("business_impact", {}) - metrics = default_scenario_data.get("metrics", {}) - - with gr.Row(): - # Left Column: Scenario Selection & Live Visualization - with gr.Column(scale=1, variant="panel") as left_col: - # Scenario Selection with rich preview - scenario_dropdown = gr.Dropdown( - choices=list(scenarios.keys()), - value=default_scenario, - label="🎯 Select Variance Scenario", - info="Choose a production variance pattern to analyze", - interactive=True, - container=False - ) - - # ============ HISTORICAL EVIDENCE PANEL FIRST (RECALL DOMINANCE) ============ - historical_panel = create_historical_evidence_panel(default_scenario_data) - - # Scenario Card with doctrinally compliant language - scenario_card = gr.HTML(f""" -
-
-

πŸ“Š {default_scenario}

- {default_scenario_data.get('severity', 'HIGH_VARIANCE')} -
-
-
- Component: - {default_scenario_data.get('component', 'Unknown').replace('_', ' ').title()} -
-
- Users Affected: - {metrics.get('affected_users', 'Unknown') if 'affected_users' in metrics else 'Unknown'} -
-
- Revenue Risk: - ${business_impact.get('revenue_risk_per_hour', 0):,}/hour -
-
- Detection Time: - 45 seconds (Policy System) -
-
- {default_scenario_data.get('component', 'unknown').split('_')[0]} - variance - production - pattern -
-
-
- """) - - # Visualization section - with gr.Row(): - with gr.Column(scale=1): - telemetry_header = gr.Markdown("### πŸ“ˆ Live Telemetry") - telemetry_viz = gr.Plot( - label="", - show_label=False, - elem_id="telemetry_plot" - ) - - with gr.Column(scale=1): - impact_header = gr.Markdown("### πŸ’° Business Impact") - impact_viz = gr.Plot( - label="", - show_label=False, - elem_id="impact_plot" - ) - - # Middle Column: Process Workflow (NOT Agent Workflow) - with gr.Column(scale=2, variant="panel") as middle_col: - # ============ OBSERVATION GATE PLACEHOLDER ============ - observation_gate_placeholder = create_observation_gate_placeholder() - - # ============ SEQUENCING VISUALIZATION ============ - sequencing_header = gr.Markdown("### πŸ”„ Sequencing Logic: Dampening β†’ Concurrency β†’ Observe β†’ Scale") - sequencing_panel = create_sequencing_visualization() - - # Process Workflow Header (NOT Agent Workflow) - workflow_header = gr.Markdown("## πŸ”„ Policy Process Workflow") - workflow_subheader = gr.Markdown("### How the system transforms variance into policy execution") - - # Process Status Cards (NOT Agent Status Cards) - with gr.Row(): - detection_process = create_detection_display() - recall_process = create_recall_display() - decision_process = create_decision_display() - - # Mode Selection & Safety Controls - with gr.Row(): - with gr.Column(scale=1): - approval_toggle = gr.CheckboxGroup( - choices=["πŸ‘€ Require Human Approval"], - label="Safety Controls", - value=[], - info="Toggle human oversight" - ) - - with gr.Column(scale=2): - mcp_mode = gr.Radio( - choices=["πŸ›‘οΈ Advisory (OSS Only)", "πŸ‘₯ Approval", "⚑ Autonomous"], - value="πŸ›‘οΈ Advisory (OSS Only)", - label="Policy Safety Mode", - info="Control execution safety level", - interactive=True - ) - - # OSS vs Enterprise Boundary Visualization - boundary_header = gr.Markdown("### 🎭 Policy vs Execution: The Safety Boundary") - - with gr.Row(): - oss_section = create_oss_advisory_section() - - enterprise_section = gr.HTML(""" -
-
-
πŸ’°
-
-

Execution Edition

-

Full Execution & Learning - Commercial

-
-
- REQUIRES LICENSE -
-
- -
-
-

⚑ Ready to Execute

- AUTONOMOUS -
- -
-
-
- βš™οΈ -
-
-
Mode
-
Autonomous (Requires Enterprise license)
-
-
- -
-
- ⚑ -
-
-
Expected Recovery
-
12 minutes (vs 45 min manual)
-
-
- -
-
- πŸ’° -
-
-
Cost Avoided
-
$6,375
-
-
- -
-
- πŸ‘₯ -
-
-
Users Protected
-
45,000 β†’ 0 impacted
-
-
-
-
- - -
-
-
- βœ… Enterprise executes with MCP safety -
-
- -
-
- Enterprise edition adds execution, learning, and safety guarantees. -
-
- MCP safety modes: Advisory β†’ Approval β†’ Autonomous -
-
-
-
- """) - - # Execution Controls - with gr.Row(): - with gr.Column(scale=1): - oss_btn = gr.Button( - "πŸ†“ Run Policy Analysis", - variant="secondary", - size="lg" - ) - oss_info = gr.Markdown("*Free, policy-only analysis*") - - with gr.Column(scale=1): - enterprise_btn = gr.Button( - "πŸ’° Execute Enterprise Healing", - variant="primary", - size="lg" - ) - enterprise_info = gr.Markdown("*Requires Enterprise license*") - - # Timeline visualization - timeline_header = gr.Markdown("### ⏰ Incident Timeline") - timeline_viz = gr.Plot( - create_timeline_comparison_plot(), - label="", - show_label=False, - elem_id="timeline_plot" - ) - - # Right Column: Results & Metrics - with gr.Column(scale=1, variant="panel") as right_col: - # Real-time Metrics Dashboard - metrics_header = gr.Markdown("## πŸ“Š Performance Metrics") - - # Metric Cards Grid - MUST MATCH app.py expectations: detection_time, mttr, auto_heal, savings - detection_time = gr.HTML() - mttr = gr.HTML() # Mean Time to Resolve - auto_heal = gr.HTML() # Auto-heal rate - savings = gr.HTML() # Cost savings - - # Results Display Areas - oss_results_header = gr.Markdown("### πŸ†“ Policy Analysis Results") - oss_results_display = gr.JSON( - label="", - value={ - "status": "Analysis Pending", - "processes": ["Detection", "Recall", "Decision"], - "mode": "Advisory Only", - "action": "Generate Formal HealingIntent" - }, - height=200 - ) - - enterprise_results_header = gr.Markdown("### πŸ’° Execution Results") - enterprise_results_display = gr.JSON( - label="", - value={ - "status": "Execution Pending", - "requires_license": True, - "available_modes": ["Approval", "Autonomous"], - "expected_outcome": "12m MTTR, $6.3K saved" - }, - height=200 - ) - - # Approval Status - approval_display = gr.HTML(""" -
-
-

πŸ‘€ Human Approval Status

- Not Required -
-
-

Current Mode: Advisory (Policy Only)

-

Switch to "Approval" mode to enable human-in-the-loop workflows

-
-
1. System generates formal HealingIntent
-
2. Human reviews & approves contraindications
-
3. System executes with sequencing constraints
-
-
-
- """) - - # Demo Actions - demo_btn = gr.Button( - "▢️ Run Complete Walkthrough", - variant="secondary", - size="lg" - ) - demo_info = gr.Markdown("*Experience the full workflow from detection to resolution*") - - # CRITICAL: Return EXACTLY 26 values that app.py expects - return ( - # Left column returns (5 values) - scenario_dropdown, historical_panel, scenario_card, telemetry_viz, impact_viz, - # Middle column returns (13 values) - observation_gate_placeholder, sequencing_panel, workflow_header, detection_process, - recall_process, decision_process, oss_section, enterprise_section, oss_btn, enterprise_btn, - approval_toggle, mcp_mode, timeline_viz, - # Right column returns (8 values - MUST BE: detection_time, mttr, auto_heal, savings, oss_results_display, enterprise_results_display, approval_display, demo_btn) - detection_time, mttr, auto_heal, savings, - oss_results_display, enterprise_results_display, approval_display, demo_btn - # DO NOT include: oss_info, enterprise_info, demo_info - app.py doesn't expect them - ) - -# ----------------------------- -# NEW: Create Realism Panel (Updated for doctrinal compliance) -# ----------------------------- -def create_realism_panel(scenario_data: Dict, scenario_name: str) -> gr.HTML: - """ - Create doctrinally compliant realism panel. - Updated to show formal HealingIntent fields and sequencing logic. - """ - ranked_actions = scenario_data.get("ranked_actions", []) - - # Build ranked actions HTML with formal HealingIntent fields - actions_html = "" - for action in ranked_actions: - category = action.get("category", "unknown") - category_color = { - "dampening": "#3b82f6", - "concurrency_control": "#10b981", - "observation": "#8b5cf6", - "scaling": "#f59e0b" - }.get(category, "#64748b") - - rank_color = "#3b82f6" if action["rank"] == 1 else "#f59e0b" if action["rank"] == 2 else "#64748b" - status = "βœ… RECOMMENDED" if action["rank"] == 1 else "🟑 SECONDARY" if action["rank"] == 2 else "πŸ”΄ CONTRAINDICATED" - - # Formal HealingIntent fields - preconditions_html = "" - if action.get("preconditions"): - preconditions_html = f""" -
-
Preconditions:
- {"".join([f'
β€’ {pre}
' for pre in action["preconditions"]])} -
- """ - - contraindications_html = "" - if action.get("contraindicated_actions"): - contraindications_html = f""" -
-
Contraindicated Actions:
- {"".join([f'
β›” {contra}
' for contra in action["contraindicated_actions"]])} -
- """ - - reversibility_html = "" - if action.get("reversibility_statement"): - reversibility_html = f""" -
-
Reversibility Statement:
-
{action["reversibility_statement"]}
-
- """ - - historical_evidence_html = "" - if action.get("historical_evidence"): - historical_evidence_html = f""" -
-
Historical Evidence:
- {"".join([f'
πŸ“Š {evidence}
' for evidence in action["historical_evidence"]])} -
- """ - - actions_html += f""" -
-
-
-
-
- {action['rank']} -
- - {status} β€’ {action.get('confidence', 0)}% confidence - - - {category.upper().replace('_', ' ')} - -
-

- {action.get('action', 'No action specified')} -

-
-
- {action.get('confidence', 0)}% -
-
- - {preconditions_html} - {contraindications_html} - {reversibility_html} - {historical_evidence_html} - -
- Sequencing: {action.get('category', 'unknown').replace('_', ' ')} β€’ {action.get('constraints', ['No constraints'])[0]} -
-
- """ - - # Combine all panels - full_html = f""" -
- -
-
-

- 🎯 Formal HealingIntent Sequence -

-

- Policy-generated intents with preconditions, contraindications, and reversibility statements -

-
-
- DOCTRINAL COMPLIANCE v3.3.9+ -
-
- - {actions_html if actions_html else '
No ranked actions available
'} - - -
-
- πŸ”„ Doctrinal Sequencing Enforcement -
-
-
-
-
1
-
Dampening
-
First in sequence
-
-
-
2
-
Concurrency
-
Then control
-
-
-
3
-
Observe
-
Then validate
-
-
-
4
-
Scale
-
Only if necessary
-
-
-
-
- Doctrinal Constraint: Scaling actions have lower confidence than dampening actions and appear last. - If retry amplification is detected, scaling is contraindicated entirely. -
-
-
-
-
- """ - - return gr.HTML(full_html) - -# ----------------------------- -# Tab 2: Business ROI - Updated -# ----------------------------- -def create_tab2_business_roi(scenarios=INCIDENT_SCENARIOS) -> tuple: - dashboard_output = gr.Plot(label="Executive Dashboard", show_label=True) - roi_scenario_dropdown = gr.Dropdown( - choices=list(scenarios.keys()), - value="Cache Miss Storm", - label="Scenario for ROI Analysis", - info="Select the primary incident type for ROI calculation" - ) - monthly_slider = gr.Slider( - minimum=1, - maximum=50, - value=15, - step=1, - label="Monthly Incidents", - info="Average number of incidents per month" - ) - team_slider = gr.Slider( - minimum=1, - maximum=50, - value=5, - step=1, - label="Team Size", - info="Number of engineers on reliability team" - ) - calculate_btn = gr.Button("πŸ“Š Calculate Comprehensive ROI", variant="primary", size="lg") - roi_output = gr.JSON(label="ROI Analysis Results", value={}) - roi_chart = gr.Plot(label="ROI Visualization") - - return (dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider, - calculate_btn, roi_output, roi_chart) - -# ----------------------------- -# Tab 3: Enterprise Features - UPDATED WITH INSTALLATION STATUS -# ----------------------------- -def create_tab3_enterprise_features() -> tuple: - # Get installation status - try: - from app import get_installation_status - installation = get_installation_status() - - license_data = { - "status": "βœ… OSS Installed" if installation["oss_installed"] else "⚠️ OSS Not Installed", - "oss_version": installation["oss_version"] or "Not installed", - "enterprise_installed": installation["enterprise_installed"], - "enterprise_version": installation["enterprise_version"] or "Not installed", - "execution_allowed": installation["execution_allowed"], - "recommendations": installation["recommendations"], - "badges": installation["badges"] - } - - # Update features table based on installation - features_data = [ - ["ARF OSS Package", "βœ… Installed" if installation["oss_installed"] else "❌ Not Installed", "OSS"], - ["Self-Healing Core", "βœ… Active", "Enterprise"], - ["RAG Graph Memory", "βœ… Active", "Both"], - ["Predictive Analytics", "πŸ”’ Enterprise" if not installation["enterprise_installed"] else "βœ… Available", "Enterprise"], - ["Audit Trail", "πŸ”’ Enterprise" if not installation["enterprise_installed"] else "βœ… Available", "Enterprise"], - ["Compliance (SOC2)", "πŸ”’ Enterprise" if not installation["enterprise_installed"] else "βœ… Available", "Enterprise"] - ] - - except ImportError: - # Fallback if installation check fails - license_data = { - "status": "⚠️ Installation Check Failed", - "oss_version": "Unknown", - "enterprise_installed": False, - "recommendations": ["Run installation check"] - } - features_data = [ - ["Self-Healing Core", "βœ… Active", "Enterprise"], - ["RAG Graph Memory", "βœ… Active", "Both"], - ["Predictive Analytics", "πŸ”’ Enterprise", "Enterprise"], - ["Audit Trail", "πŸ”’ Enterprise", "Enterprise"], - ["Compliance (SOC2)", "πŸ”’ Enterprise", "Enterprise"], - ["Multi-Cloud", "πŸ”’ Enterprise", "Enterprise"] - ] - - license_display = gr.JSON( - value=license_data, - label="πŸ“¦ Package Installation Status" - ) - - validate_btn = gr.Button("πŸ” Validate Installation", variant="secondary") - trial_btn = gr.Button("πŸ†“ Start 30-Day Trial", variant="secondary") - upgrade_btn = gr.Button("πŸš€ Upgrade to Enterprise", variant="primary") - - mcp_mode = gr.Dropdown( - choices=["advisory", "approval", "autonomous"], - value="advisory", - label="MCP Safety Mode" - ) - - # Initial mode info - mcp_mode_info = gr.JSON( - value={ - "current_mode": "advisory", - "description": "OSS Edition - Analysis only, no execution", - "features": ["Incident analysis", "RAG similarity", "HealingIntent creation"], - "package": "agentic-reliability-framework==3.3.7", - "license": "Apache 2.0" - }, - label="Mode Details" - ) - - integrations_data = [ - ["Prometheus", "βœ… Connected", "Monitoring"], - ["Grafana", "βœ… Connected", "Visualization"], - ["Slack", "πŸ”’ Enterprise", "Notifications"], - ["PagerDuty", "πŸ”’ Enterprise", "Alerting"], - ["Jira", "πŸ”’ Enterprise", "Ticketing"], - ["Datadog", "πŸ”’ Enterprise", "Monitoring"] - ] - - features_table = gr.Dataframe( - headers=["Feature", "Status", "Edition"], - value=features_data, - label="Feature Comparison" - ) - integrations_table = gr.Dataframe( - headers=["Integration", "Status", "Type"], - value=integrations_data, - label="Integration Status" - ) - - return (license_display, validate_btn, trial_btn, upgrade_btn, - mcp_mode, mcp_mode_info, features_table, integrations_table) - -# ----------------------------- -# Tab 4: Audit Trail -# ----------------------------- -def create_tab4_audit_trail() -> tuple: - refresh_btn = gr.Button("πŸ”„ Refresh Audit Trail", variant="secondary") - clear_btn = gr.Button("πŸ—‘οΈ Clear History", variant="secondary") - export_btn = gr.Button("πŸ“₯ Export as JSON", variant="primary") - - execution_headers = ["Time", "Scenario", "Mode", "Status", "Savings", "Details"] - incident_headers = ["Time", "Component", "Scenario", "Severity", "Status"] - - execution_table = gr.Dataframe( - headers=execution_headers, - value=[], - label="Execution History" - ) - incident_table = gr.Dataframe( - headers=incident_headers, - value=[], - label="Incident History" - ) - export_text = gr.JSON( - value={"status": "Export ready"}, - label="Export Data" - ) - - return (refresh_btn, clear_btn, export_btn, execution_table, incident_table, export_text) - -# ----------------------------- -# Tab 5: Learning Engine -# ----------------------------- -def create_tab5_learning_engine() -> tuple: - learning_graph = gr.Plot(label="RAG Memory Graph") - graph_type = gr.Dropdown( - choices=["Incident Patterns", "Action-Outcome Chains", "System Dependencies"], - value="Incident Patterns", - label="Graph Type" - ) - show_labels = gr.Checkbox(label="Show Labels", value=True) - search_query = gr.Textbox(label="Search Patterns", placeholder="Enter pattern to search...") - search_btn = gr.Button("πŸ” Search Patterns", variant="secondary") - clear_btn_search = gr.Button("πŸ—‘οΈ Clear Search", variant="secondary") - search_results = gr.JSON( - value={"status": "Ready for search"}, - label="Search Results" - ) - stats_display = gr.JSON( - value={"patterns": 42, "incidents": 156, "success_rate": "87.3%"}, - label="Learning Statistics" - ) - patterns_display = gr.JSON( - value={"common_patterns": ["cache_storm", "db_pool", "memory_leak"]}, - label="Pattern Library" - ) - performance_display = gr.JSON( - value={"accuracy": "94.2%", "recall": "89.7%", "precision": "92.1%"}, - label="Agent Performance" - ) - - return (learning_graph, graph_type, show_labels, search_query, search_btn, - clear_btn_search, search_results, stats_display, patterns_display, performance_display) - -# ----------------------------- -# Footer -# ----------------------------- -def create_footer() -> gr.HTML: - return gr.HTML(""" -
-

Agentic Reliability Framework Β© 2026

-

Production-grade multi-agent AI for autonomous system reliability intelligence

-
- GitHub β€’ - Demo β€’ - PyPI β€’ - Enterprise Inquiries -
-
- """) \ No newline at end of file