Spaces:

A-R-F
/

Agentic-Reliability-Framework-API

Running

App Files Files Community

Agentic-Reliability-Framework-API / app.py

petter2025

Update app.py

445884d verified 3 months ago

raw

history blame

26.9 kB

	"""
	🚀 ARF Ultimate Investor Demo v3.8.0 - ENTERPRISE EDITION
	MODULAR VERSION - Properly integrated with all components
	COMPLETE FIXED VERSION with enhanced Tab 1
	"""

	# ... [Previous imports remain the same] ...

	try:
	# Import scenarios
	from demo.scenarios import INCIDENT_SCENARIOS

	# Import orchestrator
	from demo.orchestrator import DemoOrchestrator

	# Import ROI calculator
	from core.calculators import EnhancedROICalculator

	# Import visualizations
	from core.visualizations import EnhancedVisualizationEngine

	# Import UI components - IMPORTANT: These functions now return gr.HTML, not gr.Markdown
	from ui.components import (
	create_header, create_status_bar, create_tab1_incident_demo,
	create_tab2_business_roi, create_tab3_enterprise_features,
	create_tab4_audit_trail, create_tab5_learning_engine,
	create_footer
	)

	# Import styles
	from ui.styles import get_styles

	logger.info("✅ Successfully imported all modular components")

	except ImportError as e:
	logger.error(f"Failed to import components: {e}")
	logger.error(traceback.format_exc())
	raise

	# ... [AuditTrailManager, scenario_impact_mapping, roi_data_adapter remain the same] ...

	# ===========================================
	# VISUALIZATION HELPERS FOR TAB 1
	# ===========================================
	def create_telemetry_plot(scenario_name: str):
	"""Create a telemetry visualization for the selected scenario"""
	import plotly.graph_objects as go
	import numpy as np

	# Generate some sample data
	time_points = np.arange(0, 100, 1)

	# Different patterns for different scenarios
	if "Cache" in scenario_name:
	data = 100 + 50 * np.sin(time_points * 0.2) + np.random.normal(0, 10, 100)
	threshold = 180
	metric_name = "Cache Hit Rate (%)"
	elif "Database" in scenario_name:
	data = 70 + 30 * np.sin(time_points * 0.15) + np.random.normal(0, 8, 100)
	threshold = 120
	metric_name = "Connection Pool Usage"
	elif "Memory" in scenario_name:
	data = 50 + 40 * np.sin(time_points * 0.1) + np.random.normal(0, 12, 100)
	threshold = 95
	metric_name = "Memory Usage (%)"
	else:
	data = 80 + 20 * np.sin(time_points * 0.25) + np.random.normal(0, 5, 100)
	threshold = 110
	metric_name = "System Load"

	# Create the plot
	fig = go.Figure()

	# Add normal data
	fig.add_trace(go.Scatter(
	x=time_points[:70],
	y=data[:70],
	mode='lines',
	name='Normal',
	line=dict(color='#3b82f6', width=3),
	fill='tozeroy',
	fillcolor='rgba(59, 130, 246, 0.1)'
	))

	# Add anomaly data
	fig.add_trace(go.Scatter(
	x=time_points[70:],
	y=data[70:],
	mode='lines',
	name='Anomaly Detected',
	line=dict(color='#ef4444', width=3, dash='dash'),
	fill='tozeroy',
	fillcolor='rgba(239, 68, 68, 0.1)'
	))

	# Add threshold line
	fig.add_hline(
	y=threshold,
	line_dash="dot",
	line_color="#f59e0b",
	annotation_text="Threshold",
	annotation_position="bottom right"
	)

	# Add detection point
	fig.add_vline(
	x=70,
	line_dash="dash",
	line_color="#10b981",
	annotation_text="ARF Detection",
	annotation_position="top"
	)

	# Update layout
	fig.update_layout(
	title=f"📈 {metric_name} - Live Telemetry",
	xaxis_title="Time (minutes)",
	yaxis_title=metric_name,
	height=300,
	margin=dict(l=20, r=20, t=50, b=20),
	plot_bgcolor='rgba(0,0,0,0)',
	paper_bgcolor='rgba(0,0,0,0)',
	legend=dict(
	orientation="h",
	yanchor="bottom",
	y=1.02,
	xanchor="right",
	x=1
	)
	)

	return fig

	def create_impact_plot(scenario_name: str):
	"""Create a business impact visualization"""
	import plotly.graph_objects as go

	# Get impact data based on scenario
	impact_map = {
	"Cache Miss Storm": {"revenue": 8500, "users": 45000, "services": 12},
	"Database Connection Pool Exhaustion": {"revenue": 4200, "users": 22000, "services": 8},
	"Kubernetes Memory Leak": {"revenue": 5500, "users": 28000, "services": 15},
	"API Rate Limit Storm": {"revenue": 3800, "users": 19000, "services": 6},
	"Network Partition": {"revenue": 12000, "users": 65000, "services": 25},
	"Storage I/O Saturation": {"revenue": 6800, "users": 32000, "services": 10}
	}

	impact = impact_map.get(scenario_name, {"revenue": 5000, "users": 25000, "services": 10})

	# Create gauge for revenue impact
	fig = go.Figure(go.Indicator(
	mode="gauge+number",
	value=impact["revenue"],
	title={'text': "💰 Hourly Revenue Risk", 'font': {'size': 16}},
	number={'prefix': "$", 'font': {'size': 28}},
	gauge={
	'axis': {'range': [0, 15000], 'tickwidth': 1},
	'bar': {'color': "#ef4444"},
	'steps': [
	{'range': [0, 3000], 'color': '#10b981'},
	{'range': [3000, 7000], 'color': '#f59e0b'},
	{'range': [7000, 15000], 'color': '#ef4444'}
	],
	'threshold': {
	'line': {'color': "black", 'width': 4},
	'thickness': 0.75,
	'value': impact["revenue"]
	}
	}
	))

	fig.update_layout(
	height=300,
	margin=dict(l=20, r=20, t=50, b=20),
	paper_bgcolor='rgba(0,0,0,0)'
	)

	return fig

	def create_timeline_plot(scenario_name: str):
	"""Create an incident timeline visualization"""
	import plotly.graph_objects as go

	# Timeline data
	events = [
	{"time": 0, "event": "Incident Starts", "duration": 45},
	{"time": 45, "event": "ARF Detection", "duration": 30},
	{"time": 75, "event": "OSS Analysis Complete", "duration": 60},
	{"time": 135, "event": "Enterprise Execution", "duration": 720},
	{"time": 2700, "event": "Manual Resolution", "duration": 0}
	]

	# Create timeline
	fig = go.Figure()

	# Add event bars
	for i, event in enumerate(events):
	if event["duration"] > 0:
	fig.add_trace(go.Bar(
	x=[event["duration"]],
	y=[event["event"]],
	orientation='h',
	name=event["event"],
	marker_color=['#3b82f6', '#10b981', '#8b5cf6', '#f59e0b', '#ef4444'][i],
	text=[f"{event['duration']}s"],
	textposition='auto',
	hoverinfo='text',
	hovertemplate=f"{event['event']}: {event['duration']} seconds<extra></extra>"
	))

	fig.update_layout(
	title="⏰ Incident Timeline Comparison",
	xaxis_title="Time (seconds)",
	yaxis_title="",
	barmode='stack',
	height=300,
	margin=dict(l=20, r=20, t=50, b=20),
	plot_bgcolor='rgba(0,0,0,0)',
	paper_bgcolor='rgba(0,0,0,0)',
	showlegend=False
	)

	return fig

	# ===========================================
	# SCENARIO UPDATE HANDLER
	# ===========================================
	def update_scenario_display(scenario_name: str) -> dict:
	"""Update all scenario-related displays"""
	scenario = INCIDENT_SCENARIOS.get(scenario_name, {})
	impact = scenario.get("business_impact", {})

	# Create scenario card HTML
	scenario_html = f"""
	<div class="scenario-card">
	<div class="scenario-header">
	<h3>🚨 {scenario_name}</h3>
	<span class="severity-badge {scenario.get('severity', 'HIGH').lower()}">{scenario.get('severity', 'HIGH')}</span>
	</div>
	<div class="scenario-details">
	<div class="scenario-detail-row">
	<span class="detail-label">Component:</span>
	<span class="detail-value">{scenario.get('component', 'Unknown')}</span>
	</div>
	<div class="scenario-detail-row">
	<span class="detail-label">Impact Radius:</span>
	<span class="detail-value">{scenario.get('impact_radius', 'Unknown')}</span>
	</div>
	<div class="scenario-detail-row">
	<span class="detail-label">Revenue Risk:</span>
	<span class="detail-value revenue-risk">${impact.get('revenue_loss_per_hour', 0):,}/hour</span>
	</div>
	<div class="scenario-detail-row">
	<span class="detail-label">Detection Time:</span>
	<span class="detail-value">{scenario.get('detection_time', 'Unknown')}</span>
	</div>
	<div class="scenario-tags">
	{''.join([f'<span class="scenario-tag">{tag}</span>' for tag in scenario.get('tags', ['incident', 'demo'])])}
	</div>
	</div>
	</div>
	"""

	# Create visualizations
	telemetry_plot = create_telemetry_plot(scenario_name)
	impact_plot = create_impact_plot(scenario_name)
	timeline_plot = create_timeline_plot(scenario_name)

	return {
	"scenario_html": scenario_html,
	"telemetry_plot": telemetry_plot,
	"impact_plot": impact_plot,
	"timeline_plot": timeline_plot
	}

	# ===========================================
	# CREATE DEMO INTERFACE - UPDATED FOR ENHANCED TAB 1
	# ===========================================
	def create_demo_interface():
	"""Create demo interface using modular components"""

	import gradio as gr

	# Initialize components
	viz_engine = EnhancedVisualizationEngine()
	roi_calculator = EnhancedROICalculator()
	audit_manager = AuditTrailManager()
	orchestrator = DemoOrchestrator()

	# Get CSS styles
	css_styles = get_styles()

	with gr.Blocks(
	title="🚀 ARF Investor Demo v3.8.0",
	theme=gr.themes.Soft(primary_hue="blue"),
	css=css_styles
	) as demo:

	# Header
	header_html = create_header("3.3.6", False)

	# Status bar
	status_html = create_status_bar()

	# ============ 5 TABS ============
	with gr.Tabs(elem_classes="tab-nav"):

	# TAB 1: Live Incident Demo - ENHANCED
	with gr.TabItem("🔥 Live Incident Demo", id="tab1"):
	# Get components from UI module
	(scenario_dropdown, scenario_card, telemetry_viz, impact_viz,
	workflow_header, detection_agent, recall_agent, decision_agent,
	oss_section, enterprise_section, oss_btn, enterprise_btn,
	approval_toggle, mcp_mode, timeline_viz,
	detection_time, mttr, auto_heal, savings,
	oss_results_display, enterprise_results_display, approval_display, demo_btn) = create_tab1_incident_demo()

	# ... [Tabs 2-5 remain the same as before] ...
	with gr.TabItem("💰 Business Impact & ROI", id="tab2"):
	(dashboard_output, roi_scenario_dropdown, monthly_slider, team_slider,
	calculate_btn, roi_output, roi_chart) = create_tab2_business_roi(INCIDENT_SCENARIOS)

	with gr.TabItem("🏢 Enterprise Features", id="tab3"):
	(license_display, validate_btn, trial_btn, upgrade_btn,
	mcp_mode_tab3, mcp_mode_info, features_table, integrations_table) = create_tab3_enterprise_features()

	with gr.TabItem("📜 Audit Trail & History", id="tab4"):
	(refresh_btn, clear_btn, export_btn, execution_table,
	incident_table, export_text) = create_tab4_audit_trail()

	with gr.TabItem("🧠 Learning Engine", id="tab5"):
	(learning_graph, graph_type, show_labels, search_query, search_btn,
	clear_btn_search, search_results, stats_display, patterns_display,
	performance_display) = create_tab5_learning_engine()

	# Footer
	footer_html = create_footer()

	# ============ EVENT HANDLERS FOR ENHANCED TAB 1 ============

	# Update scenario display when dropdown changes
	scenario_dropdown.change(
	fn=update_scenario_display,
	inputs=[scenario_dropdown],
	outputs={
	scenario_card: gr.HTML(),
	telemetry_viz: gr.Plot(),
	impact_viz: gr.Plot(),
	timeline_viz: gr.Plot()
	}
	)

	# Run OSS Analysis
	async def run_oss_analysis(scenario_name):
	scenario = INCIDENT_SCENARIOS.get(scenario_name, {})

	# Use orchestrator
	analysis = await orchestrator.analyze_incident(scenario_name, scenario)

	# Add to audit trail
	audit_manager.add_incident(scenario_name, scenario.get("severity", "HIGH"))

	# Update incident table
	incident_table_data = audit_manager.get_incident_table()

	# Enhanced OSS results
	oss_results = {
	"status": "✅ OSS Analysis Complete",
	"scenario": scenario_name,
	"confidence": 0.85,
	"agents_executed": ["Detection", "Recall", "Decision"],
	"findings": [
	"Anomaly detected with 99.8% confidence",
	"3 similar incidents found in RAG memory",
	"Historical success rate for similar actions: 87%"
	],
	"recommendations": [
	"Scale resources based on historical patterns",
	"Implement circuit breaker pattern",
	"Add enhanced monitoring for key metrics"
	],
	"healing_intent": {
	"action": "scale_out",
	"component": scenario.get("component", "unknown"),
	"parameters": {"nodes": "3→5", "region": "auto-select"},
	"confidence": 0.94,
	"requires_enterprise": True,
	"advisory_only": True,
	"safety_check": "✅ Passed (blast radius: 2 services)"
	}
	}

	# Update agent status
	detection_html = """
	<div class="agent-card detection">
	<div class="agent-icon">🕵️‍♂️</div>
	<div class="agent-content">
	<h4>Detection Agent</h4>
	<p class="agent-status-text">Analysis complete: <strong>99.8% confidence</strong></p>
	<div class="agent-metrics">
	<span class="agent-metric">Time: 45s</span>
	<span class="agent-metric">Accuracy: 98.7%</span>
	</div>
	<div class="agent-status completed">COMPLETE</div>
	</div>
	</div>
	"""

	recall_html = """
	<div class="agent-card recall">
	<div class="agent-icon">🧠</div>
	<div class="agent-content">
	<h4>Recall Agent</h4>
	<p class="agent-status-text"><strong>3 similar incidents</strong> retrieved from memory</p>
	<div class="agent-metrics">
	<span class="agent-metric">Recall: 92%</span>
	<span class="agent-metric">Patterns: 5</span>
	</div>
	<div class="agent-status completed">COMPLETE</div>
	</div>
	</div>
	"""

	decision_html = """
	<div class="agent-card decision">
	<div class="agent-icon">🎯</div>
	<div class="agent-content">
	<h4>Decision Agent</h4>
	<p class="agent-status-text">HealingIntent created with <strong>94% confidence</strong></p>
	<div class="agent-metrics">
	<span class="agent-metric">Success Rate: 87%</span>
	<span class="agent-metric">Safety: 100%</span>
	</div>
	<div class="agent-status completed">COMPLETE</div>
	</div>
	</div>
	"""

	return (
	detection_html, recall_html, decision_html,
	oss_results, incident_table_data
	)

	oss_btn.click(
	fn=run_oss_analysis,
	inputs=[scenario_dropdown],
	outputs=[
	detection_agent, recall_agent, decision_agent,
	oss_results_display, incident_table
	]
	)

	# Execute Enterprise Healing
	def execute_enterprise_healing(scenario_name, approval_required, mcp_mode_value):
	scenario = INCIDENT_SCENARIOS.get(scenario_name, {})

	# Determine mode
	mode = "Approval" if approval_required else "Autonomous"
	if "Advisory" in mcp_mode_value:
	return gr.HTML.update(value="<div class='approval-status'><p>❌ Cannot execute in Advisory mode. Switch to Approval or Autonomous mode.</p></div>"), {}, []

	# Calculate savings
	impact = scenario.get("business_impact", {})
	revenue_loss = impact.get("revenue_loss_per_hour", 5000)
	savings = int(revenue_loss * 0.85) # 85% savings

	# Add to audit trail
	audit_manager.add_execution(scenario_name, mode, savings=savings)

	# Create approval display
	if approval_required:
	approval_html = f"""
	<div class="approval-status">
	<div class="approval-header">
	<h4>👤 Human Approval Required</h4>
	<span class="approval-badge pending">PENDING</span>
	</div>
	<div class="approval-content">
	<p><strong>Scenario:</strong> {scenario_name}</p>
	<p><strong>Action:</strong> Scale Redis cluster from 3 to 5 nodes</p>
	<p><strong>Estimated Savings:</strong> <span class='savings-highlight'>${savings:,}</span></p>
	<div class="approval-workflow">
	<div class="workflow-step">✅ 1. ARF generated intent (94% confidence)</div>
	<div class="workflow-step">⏳ 2. Awaiting human review...</div>
	<div class="workflow-step">3. ARF will execute upon approval</div>
	</div>
	</div>
	</div>
	"""
	else:
	approval_html = f"""
	<div class="approval-status">
	<div class="approval-header">
	<h4>⚡ Autonomous Execution Complete</h4>
	<span class="approval-badge not-required">AUTO-EXECUTED</span>
	</div>
	<div class="approval-content">
	<p><strong>Scenario:</strong> {scenario_name}</p>
	<p><strong>Mode:</strong> Autonomous</p>
	<p><strong>Action Executed:</strong> Scaled Redis cluster from 3 to 5 nodes</p>
	<p><strong>Recovery Time:</strong> 12 minutes (vs 45 min manual)</p>
	<p><strong>Cost Saved:</strong> <span class='savings-highlight'>${savings:,}</span></p>
	<div class="approval-workflow">
	<div class="workflow-step">✅ 1. ARF generated intent</div>
	<div class="workflow-step">✅ 2. Safety checks passed</div>
	<div class="workflow-step">✅ 3. Autonomous execution completed</div>
	</div>
	</div>
	</div>
	"""

	# Enterprise results
	enterprise_results = {
	"execution_mode": mode,
	"scenario": scenario_name,
	"timestamp": datetime.datetime.now().isoformat(),
	"actions_executed": [
	"✅ Scaled resources based on ML recommendations",
	"✅ Implemented circuit breaker pattern",
	"✅ Deployed enhanced monitoring",
	"✅ Updated RAG memory with outcome"
	],
	"business_impact": {
	"recovery_time": "60 min → 12 min",
	"cost_saved": f"${savings:,}",
	"users_impacted": "45,000 → 0",
	"mttr_reduction": "73% faster"
	},
	"safety_checks": {
	"blast_radius": "2 services (within limit)",
	"business_hours": "Compliant",
	"action_type": "Approved",
	"circuit_breaker": "Active"
	}
	}

	# Update execution table
	execution_table_data = audit_manager.get_execution_table()

	return approval_html, enterprise_results, execution_table_data

	enterprise_btn.click(
	fn=execute_enterprise_healing,
	inputs=[scenario_dropdown, approval_toggle, mcp_mode],
	outputs=[approval_display, enterprise_results_display, execution_table]
	)

	# Run Complete Demo
	def run_complete_demo(scenario_name):
	"""Run a complete demo walkthrough"""
	import time

	# Step 1: Update scenario
	update_result = update_scenario_display(scenario_name)

	# Simulate OSS analysis
	time.sleep(1)

	# Step 2: Run OSS analysis
	oss_result = asyncio.run(run_oss_analysis(scenario_name))

	# Step 3: Execute Enterprise (simulated)
	time.sleep(2)

	scenario = INCIDENT_SCENARIOS.get(scenario_name, {})
	impact = scenario.get("business_impact", {})
	revenue_loss = impact.get("revenue_loss_per_hour", 5000)
	savings = int(revenue_loss * 0.85)

	enterprise_results = {
	"demo_mode": "Complete Walkthrough",
	"scenario": scenario_name,
	"steps_completed": [
	"1. Incident detected (45s)",
	"2. OSS analysis completed",
	"3. HealingIntent created (94% confidence)",
	"4. Enterprise license validated",
	"5. Autonomous execution simulated",
	"6. Outcome recorded in RAG memory"
	],
	"outcome": {
	"recovery_time": "12 minutes",
	"manual_comparison": "45 minutes",
	"cost_saved": f"${savings:,}",
	"users_protected": "45,000",
	"learning": "Pattern added to RAG memory"
	}
	}

	# Create demo completion message
	demo_message = f"""
	<div class="scenario-card" style="background: linear-gradient(135deg, #f0fdf4 0%, #dcfce7 100%);">
	<div class="scenario-header">
	<h3>✅ Demo Complete</h3>
	<span class="severity-badge low">SUCCESS</span>
	</div>
	<div class="scenario-details">
	<p><strong>Scenario:</strong> {scenario_name}</p>
	<p><strong>Workflow:</strong> OSS Analysis → Enterprise Execution</p>
	<p><strong>Time Saved:</strong> 33 minutes (73% faster)</p>
	<p><strong>Cost Avoided:</strong> ${savings:,}</p>
	<p><em>This demonstrates the complete ARF value proposition from detection to autonomous healing.</em></p>
	</div>
	</div>
	"""

	return (
	update_result["scenario_html"],
	update_result["telemetry_plot"],
	update_result["impact_plot"],
	update_result["timeline_plot"],
	oss_result[0], oss_result[1], oss_result[2], # Agent updates
	oss_result[3], # OSS results
	demo_message, # Demo message
	enterprise_results # Enterprise results
	)

	demo_btn.click(
	fn=run_complete_demo,
	inputs=[scenario_dropdown],
	outputs=[
	scenario_card, telemetry_viz, impact_viz, timeline_viz,
	detection_agent, recall_agent, decision_agent,
	oss_results_display, approval_display, enterprise_results_display
	]
	)

	# ... [Rest of the event handlers remain the same] ...

	# Initialize scenario display
	demo.load(
	fn=lambda: update_scenario_display("Cache Miss Storm"),
	outputs=[scenario_card, telemetry_viz, impact_viz, timeline_viz]
	)

	# Initialize dashboard
	def initialize_dashboard():
	try:
	chart = viz_engine.create_executive_dashboard()
	return chart
	except Exception as e:
	logger.error(f"Dashboard initialization failed: {e}")
	import plotly.graph_objects as go
	fig = go.Figure(go.Indicator(
	mode="number+gauge",
	value=5.2,
	title={"text": "<b>Executive Dashboard</b><br>ROI Multiplier"},
	domain={'x': [0, 1], 'y': [0, 1]},
	gauge={
	'axis': {'range': [0, 10]},
	'bar': {'color': "#4ECDC4"},
	'steps': [
	{'range': [0, 2], 'color': 'lightgray'},
	{'range': [2, 4], 'color': 'gray'},
	{'range': [4, 6], 'color': 'lightgreen'},
	{'range': [6, 10], 'color': "#4ECDC4"}
	]
	}
	))
	fig.update_layout(height=700, paper_bgcolor="rgba(0,0,0,0)")
	return fig

	demo.load(
	fn=initialize_dashboard,
	outputs=[dashboard_output]
	)

	return demo