petter2025 commited on
Commit
e992ee3
Β·
verified Β·
1 Parent(s): adb7e74

Update demo/orchestrator.py

Browse files
Files changed (1) hide show
  1. demo/orchestrator.py +163 -146
demo/orchestrator.py CHANGED
@@ -1,163 +1,180 @@
1
  """
2
- Demo Orchestrator - Integrates with ARF OSS framework
3
  """
4
-
5
- import asyncio
6
  import json
7
- import datetime
8
- from typing import Dict, List, Any, Optional, Tuple
9
- import logging
10
-
11
- logger = logging.getLogger(__name__)
12
 
 
 
 
 
 
 
 
 
13
 
14
- class DemoOrchestrator:
15
- """Orchestrates the demo workflow using ARF OSS"""
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- def __init__(self, arf_client=None):
18
- self.arf_client = arf_client
19
- self.incident_history = []
20
- self.execution_history = []
21
- self.learning_stats = {
22
- "patterns_detected": 0,
23
- "similar_incidents_found": 0,
24
- "healing_intents_created": 0
25
- }
26
 
27
- async def analyze_incident(self, scenario_name: str, scenario_data: Dict) -> Dict:
28
- """Analyze incident using ARF OSS"""
29
- try:
30
- if self.arf_client and hasattr(self.arf_client, 'analyze_and_recommend'):
31
- # Use actual ARF OSS analysis
32
- healing_intent = await self.arf_client.analyze_and_recommend(
33
- tool_name="analyze",
34
- component=scenario_data.get("component", "unknown"),
35
- parameters=scenario_data.get("metrics", {}),
36
- context={"scenario": scenario_name}
37
- )
38
-
39
- self.learning_stats["healing_intents_created"] += 1
40
-
41
- return {
42
- "status": "success",
43
- "healing_intent": healing_intent.to_enterprise_request(),
44
- "analysis": {
45
- "confidence": healing_intent.confidence,
46
- "similar_incidents": healing_intent.similar_incidents,
47
- "recommendation": healing_intent.justification
48
- }
49
- }
50
-
51
- # Fallback to mock analysis
52
- return {
53
- "status": "success",
54
- "analysis": {
55
- "confidence": 0.85,
56
- "similar_incidents": [
57
- {"id": "inc_001", "similarity": 0.78, "component": "redis"},
58
- {"id": "inc_045", "similarity": 0.65, "component": "database"}
59
- ],
60
- "recommendation": f"Based on 2 similar incidents, recommend action for {scenario_name}"
61
- }
62
- }
63
-
64
- except Exception as e:
65
- logger.error(f"Analysis failed: {e}")
66
- return {
67
- "status": "error",
68
- "message": str(e)
69
- }
70
 
71
- def execute_healing(self, scenario_name: str, healing_intent: Dict,
72
- mode: str = "autonomous") -> Dict:
73
- """Execute healing action"""
74
- execution_record = {
75
- "id": f"exec_{len(self.execution_history):03d}",
76
- "scenario": scenario_name,
77
- "timestamp": datetime.datetime.now().isoformat(),
78
- "mode": mode,
79
- "healing_intent": healing_intent,
80
- "status": "completed",
81
- "results": {
82
- "recovery_time_minutes": 12,
83
- "cost_saved": 7200,
84
- "users_impacted": "45,000 β†’ 0"
85
- }
86
- }
87
 
88
- self.execution_history.append(execution_record)
 
 
 
 
 
89
 
90
- # Update learning stats
91
- self.learning_stats["patterns_detected"] += 1
92
 
93
- return execution_record
94
-
95
- def get_similar_incidents(self, query: str, limit: int = 5) -> List[Dict]:
96
- """Find similar incidents"""
97
- # This would integrate with ARF's RAG memory
98
- return [
99
- {
100
- "id": "inc_001",
101
- "similarity": 0.92,
102
- "scenario": "Cache Miss Storm",
103
- "resolution": "Scaled Redis cluster + circuit breaker",
104
- "recovery_time": "12 minutes"
105
- },
106
- {
107
- "id": "inc_045",
108
- "similarity": 0.78,
109
- "scenario": "Database Connection Pool",
110
- "resolution": "Increased pool size + monitoring",
111
- "recovery_time": "18 minutes"
112
- }
113
- ][:limit]
114
 
115
- def calculate_roi(self, company_data: Dict) -> Dict:
116
- """Calculate ROI based on company data"""
117
- monthly_incidents = company_data.get("monthly_incidents", 10)
118
- avg_cost_per_incident = company_data.get("avg_cost_per_incident", 5000)
119
- team_size = company_data.get("team_size", 3)
120
 
121
- annual_impact = monthly_incidents * 12 * avg_cost_per_incident
122
- team_cost = team_size * 150000 # $150k per engineer
123
- savings = annual_impact * 0.82 # 82% savings with ARF
124
- roi_multiplier = savings / team_cost if team_cost > 0 else 0
125
 
126
- return {
127
- "annual_impact": annual_impact,
128
- "team_cost": team_cost,
129
- "potential_savings": savings,
130
- "roi_multiplier": roi_multiplier,
131
- "payback_months": (team_cost / (savings / 12)) if savings > 0 else 0,
132
- "recommendation": self._get_roi_recommendation(roi_multiplier)
133
- }
 
 
 
 
 
 
 
 
134
 
135
- def _get_roi_recommendation(self, roi_multiplier: float) -> str:
136
- """Get recommendation based on ROI"""
137
- if roi_multiplier >= 5.0:
138
- return "πŸš€ Excellent fit for ARF Enterprise"
139
- elif roi_multiplier >= 2.0:
140
- return "βœ… Good ROI with ARF Enterprise"
141
- elif roi_multiplier >= 1.0:
142
- return "⚠️ Consider ARF OSS edition first"
143
- else:
144
- return "πŸ†“ Start with ARF OSS (free)"
145
 
146
- def get_audit_trail(self) -> Dict:
147
- """Get complete audit trail"""
148
- return {
149
- "incidents": self.incident_history,
150
- "executions": self.execution_history,
151
- "learning_stats": self.learning_stats,
152
- "exported_at": datetime.datetime.now().isoformat()
153
- }
154
 
155
- def reset_demo(self):
156
- """Reset demo state"""
157
- self.incident_history = []
158
- self.execution_history = []
159
- self.learning_stats = {
160
- "patterns_detected": 0,
161
- "similar_incidents_found": 0,
162
- "healing_intents_created": 0
163
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Enhanced demo orchestrator with real ARF integration patterns
3
  """
4
+ import streamlit as st
5
+ import time
6
  import json
7
+ from datetime import datetime
8
+ from typing import Dict, Any, List, Optional
 
 
 
9
 
10
+ # Import mock ARF components
11
+ from .scenarios import get_scenario_data
12
+ from .mock_arf import (
13
+ create_mock_healing_intent,
14
+ run_rag_similarity_search,
15
+ calculate_pattern_confidence,
16
+ simulate_arf_analysis
17
+ )
18
 
19
+ def run_enhanced_incident_demo(scenario_name: str, execution_mode: str = "advisory"):
20
+ """
21
+ Run enhanced incident demo with ARF integration
22
+ """
23
+ # Get scenario data
24
+ scenario = get_scenario_data(scenario_name)
25
+ if not scenario:
26
+ st.error(f"Scenario '{scenario_name}' not found")
27
+ return
28
+
29
+ # Display incident header
30
+ st.markdown(f"### πŸ”₯ {scenario['name']}")
31
+ st.caption(scenario['description'])
32
 
33
+ # Create columns for metrics and business impact
34
+ col1, col2 = st.columns(2)
35
+
36
+ with col1:
37
+ st.markdown("#### πŸ“Š Current Metrics")
38
+ metrics = scenario.get('metrics', {})
 
 
 
39
 
40
+ # Create metrics display
41
+ metrics_cols = st.columns(2)
42
+ for idx, (key, value) in enumerate(metrics.items()):
43
+ with metrics_cols[idx % 2]:
44
+ if isinstance(value, (int, float)):
45
+ if key == "cache_hit_rate":
46
+ st.metric(label=key.replace('_', ' ').title(),
47
+ value=f"{value}%",
48
+ delta="-65%" if value < 20 else None)
49
+ elif key == "database_load":
50
+ st.metric(label=key.replace('_', ' ').title(),
51
+ value=f"{value}%",
52
+ delta="+40%" if value > 80 else None)
53
+ else:
54
+ st.metric(label=key.replace('_', ' ').title(), value=str(value))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ with col2:
57
+ st.markdown("#### πŸ’° Business Impact")
58
+ impact = scenario.get('business_impact', {})
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ if impact.get('revenue_loss_per_hour'):
61
+ st.metric(
62
+ label="Revenue Loss/Hour",
63
+ value=f"${impact['revenue_loss_per_hour']:,.0f}",
64
+ delta_color="inverse"
65
+ )
66
 
67
+ if impact.get('sla_violation'):
68
+ st.error("⚠️ SLA Violation Detected")
69
 
70
+ if impact.get('affected_users'):
71
+ st.metric(
72
+ label="Affected Users",
73
+ value=f"{impact['affected_users']:,.0f}",
74
+ delta_color="inverse"
75
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # Run ARF analysis
78
+ with st.spinner("🧠 ARF Analysis in progress..."):
79
+ time.sleep(1.5)
 
 
80
 
81
+ # Simulate ARF analysis pipeline
82
+ arf_analysis = simulate_arf_analysis(scenario)
 
 
83
 
84
+ # Run RAG similarity search
85
+ similar_incidents = run_rag_similarity_search(scenario)
86
+
87
+ # Calculate pattern confidence
88
+ pattern_confidence = calculate_pattern_confidence(scenario, similar_incidents)
89
+
90
+ # Create HealingIntent
91
+ healing_intent = create_mock_healing_intent(
92
+ scenario=scenario,
93
+ similar_incidents=similar_incidents,
94
+ confidence=pattern_confidence
95
+ )
96
+
97
+ # Display enhanced timeline with ARF integration
98
+ from ..ui.components import create_arf_enhanced_timeline
99
+ create_arf_enhanced_timeline(scenario, [healing_intent])
100
 
101
+ # Show HealingIntent visualizer
102
+ from ..ui.components import create_healing_intent_visualizer
103
+ create_healing_intent_visualizer(healing_intent)
 
 
 
 
 
 
 
104
 
105
+ # Show RAG similarity panel
106
+ from ..ui.components import create_rag_similarity_panel
107
+ create_rag_similarity_panel(
108
+ query=f"{scenario['name']} - {scenario['description']}",
109
+ similar_incidents=similar_incidents
110
+ )
 
 
111
 
112
+ # Show execution mode differences
113
+ from ..ui.components import create_execution_mode_toggle
114
+ selected_mode = create_execution_mode_toggle(execution_mode)
115
+
116
+ # Action buttons based on mode
117
+ st.markdown("---")
118
+ st.markdown("### ⚑ Take Action")
119
+
120
+ col1, col2, col3 = st.columns(3)
121
+
122
+ with col1:
123
+ if st.button("πŸ†“ Run OSS Analysis", use_container_width=True):
124
+ st.info("""
125
+ **OSS Analysis Results:**
126
+ - Incident identified: Cache miss storm
127
+ - Recommended action: Scale Redis cluster
128
+ - Confidence: 85%
129
+ - Similar incidents found: 3
130
+
131
+ *Note: OSS edition provides analysis only.*
132
+ """)
133
+
134
+ with col2:
135
+ if st.button("πŸš€ Execute Enterprise Healing", use_container_width=True):
136
+ if execution_mode == "advisory":
137
+ st.warning("""
138
+ **Enterprise Upgrade Required**
139
+
140
+ To execute healing actions, upgrade to Enterprise Edition:
141
+ - Autonomous healing capabilities
142
+ - Approval workflows
143
+ - Audit trails
144
+ - Compliance reporting
145
+
146
+ [Upgrade Now](https://arf.dev/enterprise)
147
+ """)
148
+ elif execution_mode == "approval":
149
+ st.success("""
150
+ **Healing Action Submitted for Approval**
151
+
152
+ βœ… HealingIntent created
153
+ πŸ“‹ Sent to approval workflow
154
+ πŸ‘€ Awaiting human review
155
+ πŸ• Estimated approval time: 2-5 minutes
156
+ """)
157
+ else: # autonomous
158
+ st.success("""
159
+ **Autonomous Healing Executed**
160
+
161
+ βœ… Redis cluster scaled from 3 to 5 nodes
162
+ βœ… Cache TTL adjusted to 300s
163
+ βœ… Database connections optimized
164
+ ⚑ Resolution time: 8.2 minutes
165
+ πŸ’° Cost avoided: $7,225
166
+ """)
167
+
168
+ with col3:
169
+ if st.button("πŸ” Require Manual Approval", use_container_width=True):
170
+ st.info("""
171
+ **Approval Workflow Enabled**
172
+
173
+ This incident will require manual approval before execution:
174
+ 1. SRE team notified via PagerDuty
175
+ 2. Approval required from team lead
176
+ 3. Audit trail recorded
177
+ 4. Compliance checks run
178
+
179
+ *Enterprise feature: Human-in-the-loop safety*
180
+ """)