petter2025's picture
Update app.py
49795df verified
raw
history blame
23.3 kB
"""
🚀 ARF Investor Demo - COMPLETE STANDALONE VERSION
No module dependencies - Everything in one file
Works on Hugging Face Spaces
"""
import logging
import datetime
import random
import uuid
from typing import Dict, List, Optional, Any
import gradio as gr
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
# Import ARF OSS if available
try:
from agentic_reliability_framework.arf_core.models.healing_intent import (
HealingIntent,
create_scale_out_intent
)
from agentic_reliability_framework.arf_core.engine.simple_mcp_client import OSSMCPClient
ARF_OSS_AVAILABLE = True
logger = logging.getLogger(__name__)
logger.info("✅ ARF OSS v3.3.6 successfully imported")
except ImportError as e:
ARF_OSS_AVAILABLE = False
logger = logging.getLogger(__name__)
logger.warning(f"⚠️ ARF OSS not available: {e}. Running in simulation mode.")
# Mock classes
class HealingIntent:
def __init__(self, **kwargs):
self.intent_type = kwargs.get("intent_type", "scale_out")
self.parameters = kwargs.get("parameters", {})
def to_dict(self):
return {
"intent_type": self.intent_type,
"parameters": self.parameters,
"created_at": datetime.datetime.now().isoformat()
}
def create_scale_out_intent(resource_type: str, scale_factor: float = 2.0):
return HealingIntent(
intent_type="scale_out",
parameters={
"resource_type": resource_type,
"scale_factor": scale_factor,
"action": "Increase capacity"
}
)
class OSSMCPClient:
def analyze_incident(self, metrics: Dict, pattern: str = "") -> Dict:
return {
"status": "analysis_complete",
"recommendations": [
"Increase resource allocation",
"Implement monitoring",
"Add circuit breakers",
"Optimize configuration"
],
"confidence": 0.92
}
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ===========================================
# DATA - Everything in one place
# ===========================================
INCIDENT_SCENARIOS = {
"Cache Miss Storm": {
"metrics": {
"Cache Hit Rate": "18.5% (Critical)",
"Database Load": "92% (Overloaded)",
"Response Time": "1850ms (Slow)",
"Affected Users": "45,000"
},
"impact": {
"Revenue Loss": "$8,500/hour",
"Page Load Time": "+300%",
"Users Impacted": "45,000"
},
"oss_analysis": {
"status": "✅ ARF OSS Analysis Complete",
"recommendations": [
"Increase Redis cache memory allocation",
"Implement cache warming strategy",
"Optimize key patterns (TTL adjustments)",
"Add circuit breaker for database fallback"
],
"estimated_time": "60+ minutes",
"engineers_needed": "2-3 SREs",
"manual_effort": "High",
"arf_oss": True,
"healing_intent_created": True
},
"enterprise_results": {
"actions_completed": [
"✅ Auto-scaled Redis: 4GB → 8GB",
"✅ Deployed cache warming service",
"✅ Optimized 12 key patterns",
"✅ Implemented circuit breaker"
],
"metrics_improvement": {
"Cache Hit Rate": "18.5% → 72%",
"Response Time": "1850ms → 450ms",
"Database Load": "92% → 45%"
},
"business_impact": {
"Recovery Time": "60 min → 12 min",
"Cost Saved": "$7,200",
"Users Impacted": "45,000 → 0"
}
}
},
"Database Connection Pool Exhaustion": {
"metrics": {
"Active Connections": "98/100 (Critical)",
"API Latency": "2450ms",
"Error Rate": "15.2%",
"Queue Depth": "1250"
},
"impact": {
"Revenue Loss": "$4,200/hour",
"Affected Services": "API Gateway, User Service",
"SLA Violation": "Yes"
}
},
"Memory Leak in Production": {
"metrics": {
"Memory Usage": "96% (Critical)",
"GC Pause Time": "4500ms",
"Error Rate": "28.5%",
"Restart Frequency": "12/hour"
},
"impact": {
"Revenue Loss": "$5,500/hour",
"Session Loss": "8,500 users",
"Customer Impact": "High"
}
}
}
# ===========================================
# VISUALIZATION FUNCTIONS
# ===========================================
def create_timeline_visualization():
"""Create interactive timeline"""
fig = go.Figure()
events = [
{"time": "T-5m", "event": "📉 Cache hit rate drops", "type": "problem"},
{"time": "T-3m", "event": "🤖 ARF detects pattern", "type": "detection"},
{"time": "T-2m", "event": "🧠 Analysis complete", "type": "analysis"},
{"time": "T-1m", "event": "⚡ Healing executed", "type": "action"},
{"time": "Now", "event": "✅ System recovered", "type": "recovery"}
]
colors = {"problem": "red", "detection": "blue", "analysis": "purple",
"action": "green", "recovery": "lightgreen"}
for event in events:
fig.add_trace(go.Scatter(
x=[event["time"]],
y=[1],
mode='markers+text',
marker=dict(size=15, color=colors[event["type"]], symbol='circle'),
text=[event["event"]],
textposition="top center",
name=event["type"].capitalize()
))
fig.update_layout(
title="<b>Incident Timeline</b>",
height=400,
showlegend=True,
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
yaxis=dict(showticklabels=False, range=[0.5, 1.5])
)
return fig
def create_business_dashboard():
"""Create executive dashboard"""
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('Cost Impact', 'Team Time', 'MTTR Comparison', 'ROI'),
vertical_spacing=0.15
)
# 1. Cost Impact
categories = ['Without ARF', 'With ARF Enterprise', 'Savings']
values = [2.96, 1.0, 1.96]
fig.add_trace(
go.Bar(x=categories, y=values, marker_color=['#FF6B6B', '#4ECDC4', '#45B7D1']),
row=1, col=1
)
# 2. Team Time
activities = ['Firefighting', 'Innovation', 'Strategic']
before = [60, 20, 20]
after = [10, 60, 30]
fig.add_trace(go.Bar(x=activities, y=before, name='Before', marker_color='#FF6B6B'), row=1, col=2)
fig.add_trace(go.Bar(x=activities, y=after, name='After', marker_color='#4ECDC4'), row=1, col=2)
# 3. MTTR Comparison
mttr_methods = ['Manual', 'Traditional', 'ARF OSS', 'ARF Enterprise']
mttr_times = [120, 45, 25, 8]
fig.add_trace(
go.Bar(x=mttr_methods, y=mttr_times, marker_color=['#FF6B6B', '#FFE66D', '#45B7D1', '#4ECDC4']),
row=2, col=1
)
# 4. ROI Gauge
fig.add_trace(
go.Indicator(
mode="gauge+number",
value=5.2,
title={'text': "ROI Multiplier"},
gauge={
'axis': {'range': [0, 10]},
'bar': {'color': "#4ECDC4"},
'steps': [
{'range': [0, 2], 'color': "lightgray"},
{'range': [2, 4], 'color': "gray"},
{'range': [4, 6], 'color': "lightgreen"},
{'range': [6, 10], 'color': "green"}
]
}
),
row=2, col=2
)
fig.update_layout(
height=700,
showlegend=True,
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
title_text="<b>Executive Business Dashboard</b>"
)
return fig
# ===========================================
# BUSINESS LOGIC
# ===========================================
def run_oss_analysis(scenario_name: str):
"""Run OSS analysis"""
scenario = INCIDENT_SCENARIOS.get(scenario_name, {})
analysis = scenario.get("oss_analysis", {})
if not analysis:
analysis = {
"status": "✅ Analysis Complete",
"recommendations": [
"Increase resource allocation",
"Implement monitoring",
"Add circuit breakers",
"Optimize configuration"
],
"estimated_time": "45-60 minutes",
"engineers_needed": "2-3",
"manual_effort": "Required",
"arf_oss": ARF_OSS_AVAILABLE
}
# Add ARF context
analysis["arf_context"] = {
"oss_available": ARF_OSS_AVAILABLE,
"version": "3.3.6",
"mode": "advisory_only",
"healing_intent": "created" if ARF_OSS_AVAILABLE else "simulated"
}
return analysis
def execute_enterprise_healing(scenario_name: str, approval_required: bool):
"""Execute enterprise healing"""
scenario = INCIDENT_SCENARIOS.get(scenario_name, {})
results = scenario.get("enterprise_results", {})
if not results:
results = {
"status": "✅ Auto-Executed" if not approval_required else "✅ Approved and Executed",
"actions_completed": [
"✅ Auto-scaled resources",
"✅ Implemented optimization",
"✅ Deployed monitoring",
"✅ Validated recovery"
],
"metrics_improvement": {
"Performance": "Improved",
"Recovery": "Complete"
},
"business_impact": {
"Cost Saved": f"${random.randint(2000, 8000):,}",
"Time Saved": f"{random.randint(30, 60)} min → {random.randint(5, 15)} min"
}
}
# Add approval info
if approval_required:
approval_html = f"""
<div style='padding: 15px; background: #f8f9fa; border-radius: 8px; border-left: 4px solid #007bff; margin: 10px 0;'>
<h4 style='margin: 0 0 10px 0;'>🛡️ Approval Required</h4>
<p><b>Action:</b> Scale resources for {scenario_name}</p>
<p><b>Risk:</b> Low (auto-rollback available)</p>
<p><b>Status:</b> ✅ <span style='color: green;'>Approved & Executed</span></p>
</div>
"""
else:
approval_html = f"""
<div style='padding: 15px; background: #e8f5e8; border-radius: 8px; border-left: 4px solid #28a745; margin: 10px 0;'>
<h4 style='margin: 0 0 10px 0;'>⚡ Auto-Executed</h4>
<p><b>Action:</b> Autonomous healing for {scenario_name}</p>
<p><b>Mode:</b> Fully autonomous (guardrails active)</p>
<p><b>Status:</b> ✅ <span style='color: green;'>Successfully completed</span></p>
</div>
"""
# Add enterprise context
results["enterprise_context"] = {
"approval_required": approval_required,
"compliance_mode": "strict",
"audit_trail": "created",
"learning_applied": True,
"roi_measured": True
}
return approval_html, {"approval_required": approval_required, "compliance_mode": "strict"}, results
def calculate_roi(monthly_incidents: int, avg_impact: int, team_size: int):
"""Calculate ROI"""
try:
annual_impact = monthly_incidents * 12 * avg_impact
team_cost = team_size * 150000
savings = annual_impact * 0.82
roi_multiplier = savings / team_cost if team_cost > 0 else 0
if roi_multiplier >= 5.0:
recommendation = "🚀 Excellent fit for ARF Enterprise"
elif roi_multiplier >= 2.0:
recommendation = "✅ Good ROI with ARF Enterprise"
elif roi_multiplier >= 1.0:
recommendation = "⚠️ Consider ARF OSS edition first"
else:
recommendation = "🆓 Start with ARF OSS (free)"
return {
"analysis": {
"your_annual_impact": f"${annual_impact:,.0f}",
"your_team_cost": f"${team_cost:,.0f}",
"potential_savings": f"${savings:,.0f}",
"your_roi_multiplier": f"{roi_multiplier:.1f}×",
"vs_industry_average": "5.2× average ROI",
"recommendation": recommendation,
"payback_period": f"{(team_cost / (savings / 12)):.1f} months" if savings > 0 else "N/A"
}
}
except Exception as e:
return {"error": f"Calculation error: {str(e)}"}
# ===========================================
# MAIN INTERFACE
# ===========================================
def create_interface():
"""Create the Gradio interface"""
custom_css = """
.gradio-container { max-width: 1200px; margin: auto; }
h1, h2, h3 { color: #1a365d !important; }
"""
with gr.Blocks(
title="🚀 ARF Investor Demo v3.6.0",
theme=gr.themes.Soft(),
css=custom_css
) as demo:
# ============ HEADER ============
arf_status = "✅ ARF OSS v3.3.6" if ARF_OSS_AVAILABLE else "⚠️ Simulation Mode"
gr.Markdown(f"""
# 🚀 Agentic Reliability Framework - Investor Demo v3.6.0
## From Cost Center to Profit Engine: 5.2× ROI with Autonomous Reliability
<div style='color: #666; font-size: 16px; margin-top: 10px;'>
{arf_status} | Experience: <b>OSS (Advisory)</b> ↔ <b>Enterprise (Autonomous)</b>
</div>
""")
# ============ MAIN TABS ============
with gr.Tabs():
# TAB 1: LIVE INCIDENT DEMO
with gr.TabItem("🔥 Live Incident Demo"):
with gr.Row():
# Left Panel
with gr.Column(scale=1):
gr.Markdown("### 🎬 Incident Scenario")
scenario_dropdown = gr.Dropdown(
choices=list(INCIDENT_SCENARIOS.keys()),
value="Cache Miss Storm",
label="Select critical incident:"
)
gr.Markdown("### 📊 Current Crisis Metrics")
metrics_display = gr.JSON(
value=INCIDENT_SCENARIOS["Cache Miss Storm"]["metrics"]
)
gr.Markdown("### 💰 Business Impact")
impact_display = gr.JSON(
value=INCIDENT_SCENARIOS["Cache Miss Storm"]["impact"]
)
# Right Panel
with gr.Column(scale=2):
# Visualization
gr.Markdown("### 📈 Incident Timeline")
timeline_output = gr.Plot()
# Action Buttons
with gr.Row():
oss_btn = gr.Button("🆓 Run OSS Analysis", variant="secondary")
enterprise_btn = gr.Button("🚀 Execute Enterprise Healing", variant="primary")
# Approval Toggle
approval_toggle = gr.Checkbox(
label="🔐 Require Manual Approval",
value=True,
info="Toggle to show approval workflow vs auto-execution"
)
# Approval Display
approval_display = gr.HTML(
value="<div style='padding: 10px; background: #f8f9fa; border-radius: 5px;'>Approval status will appear here</div>"
)
# Configuration
config_display = gr.JSON(
label="⚙️ Enterprise Configuration",
value={"approval_required": True, "compliance_mode": "strict"}
)
# Results
results_display = gr.JSON(
label="🎯 Execution Results",
value={"status": "Ready for execution..."}
)
# TAB 2: BUSINESS IMPACT & ROI
with gr.TabItem("💰 Business Impact & ROI"):
with gr.Column():
# Business Dashboard
gr.Markdown("### 📊 Business Health Dashboard")
dashboard_output = gr.Plot()
# ROI Calculator
gr.Markdown("### 🧮 Interactive ROI Calculator")
with gr.Row():
with gr.Column(scale=1):
monthly_slider = gr.Slider(
1, 100, value=15, step=1,
label="Monthly incidents"
)
impact_slider = gr.Slider(
1000, 50000, value=8500, step=500,
label="Avg incident impact ($)"
)
team_slider = gr.Slider(
1, 20, value=5, step=1,
label="Reliability team size"
)
calculate_btn = gr.Button("Calculate My ROI", variant="primary")
with gr.Column(scale=2):
roi_output = gr.JSON(
label="Your ROI Analysis",
value={"analysis": "Adjust sliders and click 'Calculate My ROI'"}
)
# Capability Comparison
gr.Markdown("### 📋 Capability Comparison")
with gr.Row():
with gr.Column():
gr.Markdown("""
**OSS Edition (Free)**
- Advisory recommendations only
- Manual implementation required
- No auto-healing
- Community support
- No ROI measurement
""")
with gr.Column():
gr.Markdown("""
**Enterprise Edition**
- Autonomous execution
- 81.7% auto-heal rate
- Full audit trails & compliance
- 24/7 enterprise support
- 5.2× average ROI
- 2-3 month payback
""")
# ============ FOOTER ============
gr.Markdown("---")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("""
**📞 Contact & Demo**
📧 petter2025us@outlook.com
🌐 [https://arf.dev](https://arf.dev)
📚 [Documentation](https://docs.arf.dev)
💻 [GitHub](https://github.com/petterjuan/agentic-reliability-framework)
""")
with gr.Column(scale=1):
gr.Markdown("""
**🎯 Schedule a Demo**
(https://calendly.com/petter2025us/30min)
""")
# ============ EVENT HANDLERS ============
def update_scenario(scenario_name: str):
"""Update when scenario changes"""
scenario = INCIDENT_SCENARIOS.get(scenario_name, {})
return (
scenario.get("metrics", {}),
scenario.get("impact", {}),
create_timeline_visualization()
)
# Scenario change
scenario_dropdown.change(
update_scenario,
inputs=[scenario_dropdown],
outputs=[metrics_display, impact_display, timeline_output]
)
# OSS Analysis
oss_btn.click(
run_oss_analysis,
inputs=[scenario_dropdown],
outputs=[results_display]
)
# Enterprise Execution
enterprise_btn.click(
execute_enterprise_healing,
inputs=[scenario_dropdown, approval_toggle],
outputs=[approval_display, config_display, results_display]
)
# Approval toggle updates config
approval_toggle.change(
lambda approval: {"approval_required": approval, "compliance_mode": "strict"},
inputs=[approval_toggle],
outputs=[config_display]
)
# ROI Calculation
calculate_btn.click(
calculate_roi,
inputs=[monthly_slider, impact_slider, team_slider],
outputs=[roi_output]
)
# ============ INITIAL LOAD ============
def load_initial():
"""Load initial state"""
return (
INCIDENT_SCENARIOS["Cache Miss Storm"]["metrics"],
INCIDENT_SCENARIOS["Cache Miss Storm"]["impact"],
create_timeline_visualization(),
create_business_dashboard()
)
demo.load(
load_initial,
outputs=[metrics_display, impact_display, timeline_output, dashboard_output]
)
# ============ INSTRUCTIONS ============
gr.Markdown(f"""
<div style='margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; color: #718096; font-size: 14px;'>
🚀 <b>ARF Ultimate Investor Demo v3.6.0</b> | {'✅ Integrated with ARF OSS v3.3.6' if ARF_OSS_AVAILABLE else '⚠️ Running in simulation mode'}
<i>From Cost Center to Profit Engine: 5.2× ROI with Autonomous Reliability</i>
</div>
""")
return demo
# ===========================================
# MAIN
# ===========================================
if __name__ == "__main__":
logger.info("=" * 80)
logger.info("🚀 Launching ARF Investor Demo v3.6.0")
logger.info(f"✅ ARF OSS Available: {ARF_OSS_AVAILABLE}")
logger.info("✅ Standalone version - No module dependencies")
logger.info("=" * 80)
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=False,
show_error=True
)