petter2025 commited on
Commit
644fff6
Β·
verified Β·
1 Parent(s): 00342ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -170
app.py CHANGED
@@ -1,59 +1,141 @@
1
  import os
2
  import json
3
- import random
4
- import time
5
- import datetime
6
  import numpy as np
7
  import gradio as gr
8
  import requests
9
- from sentence_transformers import SentenceTransformer
10
- import faiss
 
 
11
 
12
- # === Config ===
13
- HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
14
- if not HF_TOKEN:
15
- print("⚠️ No Hugging Face token found. Running in fallback/local mode.")
16
- else:
17
- print("βœ… Hugging Face token loaded successfully.")
18
 
 
 
19
  HF_API_URL = "https://router.huggingface.co/hf-inference/v1/completions"
20
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
21
 
22
- # === FAISS Setup ===
23
- VECTOR_DIM = 384
24
- INDEX_FILE = "incident_vectors.index"
25
- TEXTS_FILE = "incident_texts.json"
26
- model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
27
-
28
- if os.path.exists(INDEX_FILE):
29
- index = faiss.read_index(INDEX_FILE)
30
- with open(TEXTS_FILE, "r") as f:
31
- incident_texts = json.load(f)
32
- else:
33
- index = faiss.IndexFlatL2(VECTOR_DIM)
 
 
 
 
 
 
 
 
 
34
  incident_texts = []
35
 
36
  def save_index():
37
- faiss.write_index(index, INDEX_FILE)
38
- with open(TEXTS_FILE, "w") as f:
39
- json.dump(incident_texts, f)
 
 
 
 
 
 
40
 
41
- # === Event Memory ===
42
- events = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- def detect_anomaly(event):
45
- """Adaptive threshold-based anomaly detection."""
46
- latency = event["latency"]
47
- error_rate = event["error_rate"]
48
 
49
- # Remove random forcing for production - use actual thresholds only
50
- latency_anomaly = latency > 150
51
- error_anomaly = error_rate > 0.05
52
 
53
- return latency_anomaly or error_anomaly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- def call_huggingface_analysis(prompt):
56
- """Use HF Inference API or fallback simulation."""
 
 
57
  if not HF_TOKEN:
58
  # Enhanced fallback analysis
59
  fallback_insights = [
@@ -63,10 +145,10 @@ def call_huggingface_analysis(prompt):
63
  "Intermittent failures indicate potential dependency service degradation",
64
  "Performance degradation detected - consider scaling compute resources"
65
  ]
 
66
  return random.choice(fallback_insights)
67
 
68
  try:
69
- # Enhanced prompt for better analysis
70
  enhanced_prompt = f"""
71
  As a senior reliability engineer, analyze this telemetry event and provide a concise root cause analysis:
72
 
@@ -91,159 +173,221 @@ def call_huggingface_analysis(prompt):
91
  if response.status_code == 200:
92
  result = response.json()
93
  analysis_text = result.get("choices", [{}])[0].get("text", "").strip()
94
- # Clean up any extra formatting from the response
95
  if analysis_text and len(analysis_text) > 10:
96
- return analysis_text.split('\n')[0] # Take first line if multiple
97
  return analysis_text
98
  else:
99
  return f"API Error {response.status_code}: Service temporarily unavailable"
100
  except Exception as e:
101
  return f"Analysis service error: {str(e)}"
102
 
103
- def simulate_healing(event):
104
- actions = [
105
- "Restarted container",
106
- "Scaled up instance",
107
- "Cleared queue backlog",
108
- "No actionable step detected."
109
- ]
110
- return random.choice(actions)
111
-
112
- def analyze_event(component, latency, error_rate):
113
- # Ensure unique timestamps with higher precision
114
- event = {
115
- "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3],
116
- "component": component,
117
- "latency": latency,
118
- "error_rate": error_rate
119
- }
120
-
121
- is_anomaly = detect_anomaly(event)
122
- event["anomaly"] = is_anomaly
123
- event["status"] = "Anomaly" if is_anomaly else "Normal"
124
-
125
- # Build enhanced textual prompt
126
  prompt = (
127
  f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
128
- f"Status: {event['status']}\n\n"
 
129
  "Provide a one-line reliability insight or root cause analysis."
130
  )
131
 
132
- # Analysis
133
  analysis = call_huggingface_analysis(prompt)
134
- event["analysis"] = analysis
135
-
136
- # Healing simulation
137
- healing_action = simulate_healing(event)
138
- event["healing_action"] = healing_action
139
-
140
- # === Vector learning ===
141
- vector_text = f"{component} {latency} {error_rate} {analysis}"
142
- vec = model.encode([vector_text])
143
- index.add(np.array(vec, dtype=np.float32))
144
- incident_texts.append(vector_text)
145
- save_index()
146
-
147
- # Find similar incidents
148
- if len(incident_texts) > 1:
149
- D, I = index.search(vec, k=min(3, len(incident_texts)))
150
- similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
151
- if similar:
152
- # Extract meaningful part from similar incident
153
- similar_preview = similar[0][:100] + "..." if len(similar[0]) > 100 else similar[0]
154
- event["healing_action"] += f" Found {len(similar)} similar incidents (e.g., {similar_preview})."
155
- else:
156
- event["healing_action"] += " - Not enough incidents stored yet."
157
-
158
- events.append(event)
159
- return json.dumps(event, indent=2)
160
-
161
- # === UI ===
162
- def submit_event(component, latency, error_rate):
163
- result = analyze_event(component, latency, error_rate)
164
- parsed = json.loads(result)
165
-
166
- # Display last 15 events to keep table manageable
167
- table = [
168
- [e["timestamp"], e["component"], e["latency"], e["error_rate"],
169
- e["status"], e["analysis"], e["healing_action"]]
170
- for e in events[-15:]
171
- ]
172
-
173
- return (
174
- f"βœ… Event Processed ({parsed['status']})",
175
- gr.Dataframe(
176
- headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"],
177
- value=table
178
- )
179
- )
180
 
181
- with gr.Blocks(title="🧠 Agentic Reliability Framework MVP", theme="soft") as demo:
182
- gr.Markdown("""
183
- # 🧠 Agentic Reliability Framework MVP
184
- **Adaptive anomaly detection + AI-driven self-healing + persistent FAISS memory**
185
-
186
- *Monitor your services in real-time with AI-powered reliability engineering*
187
- """)
188
-
189
- with gr.Row():
190
- with gr.Column(scale=1):
191
- gr.Markdown("### πŸ“Š Telemetry Input")
192
- component = gr.Textbox(
193
- label="Component",
194
- value="api-service",
195
- info="Name of the service being monitored"
196
- )
197
- latency = gr.Slider(
198
- minimum=10,
199
- maximum=400,
200
- value=100,
201
- step=1,
202
- label="Latency (ms)",
203
- info="Alert threshold: >150ms"
204
- )
205
- error_rate = gr.Slider(
206
- minimum=0,
207
- maximum=0.2,
208
- value=0.02,
209
- step=0.001,
210
- label="Error Rate",
211
- info="Alert threshold: >0.05"
212
- )
213
- submit = gr.Button("πŸš€ Submit Telemetry Event", variant="primary")
214
-
215
- with gr.Column(scale=2):
216
- gr.Markdown("### πŸ” Live Analysis")
217
- output_text = gr.Textbox(
218
- label="Detection Output",
219
- placeholder="Submit an event to see analysis results...",
220
- lines=2
221
- )
222
- gr.Markdown("### πŸ“ˆ Recent Events")
223
- table_output = gr.Dataframe(
224
- headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"],
225
- label="Event History",
226
  wrap=True
227
  )
228
-
229
- # Add some explanation
230
- with gr.Accordion("ℹ️ How it works", open=False):
 
 
 
 
 
231
  gr.Markdown("""
232
- - **Anomaly Detection**: Flags events with latency >150ms or error rate >5%
233
- - **AI Analysis**: Uses Mistral-8x7B for root cause analysis via Hugging Face
234
- - **Vector Memory**: Stores incidents in FAISS for similarity search
235
- - **Self-Healing**: Simulates automated recovery actions based on historical patterns
236
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
- submit.click(
239
- fn=submit_event,
240
- inputs=[component, latency, error_rate],
241
- outputs=[output_text, table_output]
242
- )
243
 
244
  if __name__ == "__main__":
 
245
  demo.launch(
246
- server_name="0.0.0.0",
247
  server_port=7860,
248
  share=False
249
  )
 
1
  import os
2
  import json
 
 
 
3
  import numpy as np
4
  import gradio as gr
5
  import requests
6
+ import pandas as pd
7
+ import datetime
8
+ from typing import List, Dict, Any
9
+ import hashlib
10
 
11
+ # Import our new modules
12
+ from models import ReliabilityEvent, EventSeverity, AnomalyResult, HealingAction
13
+ from healing_policies import PolicyEngine
 
 
 
14
 
15
+ # === Configuration ===
16
+ HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
17
  HF_API_URL = "https://router.huggingface.co/hf-inference/v1/completions"
18
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
19
 
20
+ # === FAISS & Embeddings Setup ===
21
+ try:
22
+ from sentence_transformers import SentenceTransformer
23
+ import faiss
24
+
25
+ VECTOR_DIM = 384
26
+ INDEX_FILE = "incident_vectors.index"
27
+ TEXTS_FILE = "incident_texts.json"
28
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
29
+
30
+ if os.path.exists(INDEX_FILE):
31
+ index = faiss.read_index(INDEX_FILE)
32
+ with open(TEXTS_FILE, "r") as f:
33
+ incident_texts = json.load(f)
34
+ else:
35
+ index = faiss.IndexFlatL2(VECTOR_DIM)
36
+ incident_texts = []
37
+
38
+ except ImportError as e:
39
+ print(f"Warning: FAISS or SentenceTransformers not available: {e}")
40
+ index = None
41
  incident_texts = []
42
 
43
  def save_index():
44
+ """Save FAISS index and incident texts"""
45
+ if index is not None:
46
+ faiss.write_index(index, INDEX_FILE)
47
+ with open(TEXTS_FILE, "w") as f:
48
+ json.dump(incident_texts, f)
49
+
50
+ # === Core Engine Components ===
51
+ policy_engine = PolicyEngine()
52
+ events_history: List[ReliabilityEvent] = []
53
 
54
+ class BusinessImpactCalculator:
55
+ """Calculate business impact of anomalies"""
56
+
57
+ def __init__(self, revenue_per_request: float = 0.01):
58
+ self.revenue_per_request = revenue_per_request
59
+
60
+ def calculate_impact(self, event: ReliabilityEvent, duration_minutes: int = 5) -> Dict[str, Any]:
61
+ """Calculate business impact of an anomaly"""
62
+
63
+ # Estimate throughput reduction (simplified)
64
+ normal_throughput = 1000 # This should come from historical baseline
65
+ throughput_reduction = max(0, 1 - (event.throughput / normal_throughput))
66
+
67
+ # Revenue impact
68
+ revenue_loss = (event.throughput * throughput_reduction *
69
+ self.revenue_per_request * (duration_minutes / 60))
70
+
71
+ # User impact
72
+ affected_users = event.user_impact or int(event.throughput * duration_minutes * 0.1)
73
+
74
+ # Severity classification
75
+ if revenue_loss > 1000 or affected_users > 10000:
76
+ severity = "CRITICAL"
77
+ elif revenue_loss > 100 or affected_users > 1000:
78
+ severity = "HIGH"
79
+ elif revenue_loss > 10 or affected_users > 100:
80
+ severity = "MEDIUM"
81
+ else:
82
+ severity = "LOW"
83
+
84
+ return {
85
+ 'revenue_loss_estimate': round(revenue_loss, 2),
86
+ 'affected_users_estimate': affected_users,
87
+ 'severity_level': severity,
88
+ 'throughput_reduction_pct': round(throughput_reduction * 100, 1)
89
+ }
90
 
91
+ business_calculator = BusinessImpactCalculator()
 
 
 
92
 
93
+ class AdvancedAnomalyDetector:
94
+ """Enhanced anomaly detection with adaptive thresholds"""
 
95
 
96
+ def __init__(self):
97
+ self.historical_data = []
98
+ self.adaptive_thresholds = {
99
+ 'latency_p99': 150, # Will adapt based on history
100
+ 'error_rate': 0.05
101
+ }
102
+
103
+ def detect_anomaly(self, event: ReliabilityEvent) -> bool:
104
+ """Enhanced anomaly detection with adaptive thresholds"""
105
+
106
+ # Basic threshold checks
107
+ latency_anomaly = event.latency_p99 > self.adaptive_thresholds['latency_p99']
108
+ error_anomaly = event.error_rate > self.adaptive_thresholds['error_rate']
109
+
110
+ # Resource-based anomalies
111
+ resource_anomaly = False
112
+ if event.cpu_util and event.cpu_util > 0.9:
113
+ resource_anomaly = True
114
+ if event.memory_util and event.memory_util > 0.9:
115
+ resource_anomaly = True
116
+
117
+ # Update adaptive thresholds (simplified)
118
+ self._update_thresholds(event)
119
+
120
+ return latency_anomaly or error_anomaly or resource_anomaly
121
+
122
+ def _update_thresholds(self, event: ReliabilityEvent):
123
+ """Update adaptive thresholds based on historical data"""
124
+ self.historical_data.append(event)
125
+
126
+ # Keep only recent history
127
+ if len(self.historical_data) > 100:
128
+ self.historical_data.pop(0)
129
+
130
+ # Update latency threshold to 90th percentile of recent data
131
+ if len(self.historical_data) > 10:
132
+ recent_latencies = [e.latency_p99 for e in self.historical_data[-20:]]
133
+ self.adaptive_thresholds['latency_p99'] = np.percentile(recent_latencies, 90)
134
 
135
+ anomaly_detector = AdvancedAnomalyDetector()
136
+
137
+ def call_huggingface_analysis(prompt: str) -> str:
138
+ """Use HF Inference API or fallback simulation"""
139
  if not HF_TOKEN:
140
  # Enhanced fallback analysis
141
  fallback_insights = [
 
145
  "Intermittent failures indicate potential dependency service degradation",
146
  "Performance degradation detected - consider scaling compute resources"
147
  ]
148
+ import random
149
  return random.choice(fallback_insights)
150
 
151
  try:
 
152
  enhanced_prompt = f"""
153
  As a senior reliability engineer, analyze this telemetry event and provide a concise root cause analysis:
154
 
 
173
  if response.status_code == 200:
174
  result = response.json()
175
  analysis_text = result.get("choices", [{}])[0].get("text", "").strip()
 
176
  if analysis_text and len(analysis_text) > 10:
177
+ return analysis_text.split('\n')[0]
178
  return analysis_text
179
  else:
180
  return f"API Error {response.status_code}: Service temporarily unavailable"
181
  except Exception as e:
182
  return f"Analysis service error: {str(e)}"
183
 
184
+ def analyze_event(component: str, latency: float, error_rate: float,
185
+ throughput: float = 1000, cpu_util: float = None,
186
+ memory_util: float = None) -> Dict[str, Any]:
187
+ """Main event analysis function"""
188
+
189
+ # Create enhanced event
190
+ event = ReliabilityEvent(
191
+ component=component,
192
+ latency_p99=latency,
193
+ error_rate=error_rate,
194
+ throughput=throughput,
195
+ cpu_util=cpu_util,
196
+ memory_util=memory_util,
197
+ upstream_deps=["auth-service", "database"] if component == "api-service" else []
198
+ )
199
+
200
+ # Detect anomaly
201
+ is_anomaly = anomaly_detector.detect_anomaly(event)
202
+ event.severity = EventSeverity.HIGH if is_anomaly else EventSeverity.LOW
203
+
204
+ # Build analysis prompt
 
 
205
  prompt = (
206
  f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
207
+ f"Throughput: {throughput:.0f}\nCPU: {cpu_util or 'N/A'}\nMemory: {memory_util or 'N/A'}\n"
208
+ f"Status: {'ANOMALY' if is_anomaly else 'NORMAL'}\n\n"
209
  "Provide a one-line reliability insight or root cause analysis."
210
  )
211
 
212
+ # Get AI analysis
213
  analysis = call_huggingface_analysis(prompt)
214
+
215
+ # Evaluate healing policies
216
+ healing_actions = policy_engine.evaluate_policies(event)
217
+
218
+ # Calculate business impact
219
+ business_impact = business_calculator.calculate_impact(event) if is_anomaly else None
220
+
221
+ # Vector memory learning
222
+ if index is not None and is_anomaly:
223
+ vector_text = f"{component} {latency} {error_rate} {analysis}"
224
+ vec = model.encode([vector_text])
225
+ index.add(np.array(vec, dtype=np.float32))
226
+ incident_texts.append(vector_text)
227
+ save_index()
228
+
229
+ # Prepare result
230
+ result = {
231
+ "timestamp": event.timestamp,
232
+ "component": component,
233
+ "latency_p99": latency,
234
+ "error_rate": error_rate,
235
+ "throughput": throughput,
236
+ "status": "ANOMALY" if is_anomaly else "NORMAL",
237
+ "analysis": analysis,
238
+ "healing_actions": [action.value for action in healing_actions],
239
+ "business_impact": business_impact,
240
+ "severity": event.severity.value,
241
+ "similar_incidents_count": len(incident_texts) if is_anomaly else 0
242
+ }
243
+
244
+ events_history.append(event)
245
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
+ # === Gradio UI ===
248
+ def submit_event(component, latency, error_rate, throughput, cpu_util, memory_util):
249
+ """Handle event submission from UI"""
250
+ try:
251
+ # Convert inputs
252
+ latency = float(latency)
253
+ error_rate = float(error_rate)
254
+ throughput = float(throughput) if throughput else 1000
255
+ cpu_util = float(cpu_util) if cpu_util else None
256
+ memory_util = float(memory_util) if memory_util else None
257
+
258
+ result = analyze_event(component, latency, error_rate, throughput, cpu_util, memory_util)
259
+
260
+ # Prepare table data
261
+ table_data = []
262
+ for event in events_history[-15:]:
263
+ table_data.append([
264
+ event.timestamp[:19], # Trim microseconds
265
+ event.component,
266
+ event.latency_p99,
267
+ f"{event.error_rate:.3f}",
268
+ event.throughput,
269
+ event.severity.value.upper(),
270
+ getattr(event, 'analysis', 'N/A')[:50] + "..." if getattr(event, 'analysis', 'N/A') else 'N/A'
271
+ ])
272
+
273
+ # Format output message
274
+ status_emoji = "🚨" if result["status"] == "ANOMALY" else "βœ…"
275
+ output_msg = f"{status_emoji} {result['status']} - {result['analysis']}"
276
+
277
+ if result["business_impact"]:
278
+ impact = result["business_impact"]
279
+ output_msg += f"\nπŸ’° Business Impact: ${impact['revenue_loss_estimate']} | πŸ‘₯ {impact['affected_users_estimate']} users"
280
+
281
+ if result["healing_actions"]:
282
+ actions = ", ".join(result["healing_actions"])
283
+ output_msg += f"\nπŸ”§ Auto-Actions: {actions}"
284
+
285
+ return (
286
+ output_msg,
287
+ gr.Dataframe(
288
+ headers=["Timestamp", "Component", "Latency", "Error Rate", "Throughput", "Severity", "Analysis"],
289
+ value=table_data,
 
 
290
  wrap=True
291
  )
292
+ )
293
+
294
+ except Exception as e:
295
+ return f"❌ Error processing event: {str(e)}", gr.Dataframe(value=[])
296
+
297
+ def create_ui():
298
+ """Create the Gradio interface"""
299
+ with gr.Blocks(title="🧠 Agentic Reliability Framework v2", theme="soft") as demo:
300
  gr.Markdown("""
301
+ # 🧠 Agentic Reliability Framework v2
302
+ **Production-Grade Self-Healing AI Systems**
303
+
304
+ *Advanced anomaly detection + AI-driven root cause analysis + Business impact quantification*
305
  """)
306
+
307
+ with gr.Row():
308
+ with gr.Column(scale=1):
309
+ gr.Markdown("### πŸ“Š Telemetry Input")
310
+ component = gr.Dropdown(
311
+ choices=["api-service", "auth-service", "payment-service", "database", "cache-service"],
312
+ value="api-service",
313
+ label="Component",
314
+ info="Select the service being monitored"
315
+ )
316
+ latency = gr.Slider(
317
+ minimum=10, maximum=1000, value=100, step=1,
318
+ label="Latency P99 (ms)",
319
+ info="Alert threshold: >150ms (adaptive)"
320
+ )
321
+ error_rate = gr.Slider(
322
+ minimum=0, maximum=0.5, value=0.02, step=0.001,
323
+ label="Error Rate",
324
+ info="Alert threshold: >0.05"
325
+ )
326
+ throughput = gr.Number(
327
+ value=1000,
328
+ label="Throughput (req/sec)",
329
+ info="Current request rate"
330
+ )
331
+ cpu_util = gr.Slider(
332
+ minimum=0, maximum=1, value=0.4, step=0.01,
333
+ label="CPU Utilization",
334
+ info="0.0 - 1.0 scale"
335
+ )
336
+ memory_util = gr.Slider(
337
+ minimum=0, maximum=1, value=0.3, step=0.01,
338
+ label="Memory Utilization",
339
+ info="0.0 - 1.0 scale"
340
+ )
341
+ submit_btn = gr.Button("πŸš€ Submit Telemetry Event", variant="primary", size="lg")
342
+
343
+ with gr.Column(scale=2):
344
+ gr.Markdown("### πŸ” Live Analysis & Healing")
345
+ output_text = gr.Textbox(
346
+ label="Analysis Results",
347
+ placeholder="Submit an event to see AI-powered analysis...",
348
+ lines=4
349
+ )
350
+ gr.Markdown("### πŸ“ˆ Recent Events (Last 15)")
351
+ events_table = gr.Dataframe(
352
+ headers=["Timestamp", "Component", "Latency", "Error Rate", "Throughput", "Severity", "Analysis"],
353
+ label="Event History",
354
+ wrap=True,
355
+ max_height="400px"
356
+ )
357
+
358
+ # Information sections
359
+ with gr.Accordion("ℹ️ Framework Capabilities", open=False):
360
+ gr.Markdown("""
361
+ - **πŸ€– AI-Powered Analysis**: Mistral-8x7B for intelligent root cause analysis
362
+ - **πŸ”§ Policy-Based Healing**: Automated recovery actions based on severity and context
363
+ - **πŸ’° Business Impact**: Revenue and user impact quantification
364
+ - **🎯 Adaptive Detection**: ML-powered thresholds that learn from your environment
365
+ - **πŸ“š Vector Memory**: FAISS-based incident memory for similarity detection
366
+ - **⚑ Production Ready**: Circuit breakers, cooldowns, and enterprise features
367
+ """)
368
+
369
+ with gr.Accordion("πŸ”§ Healing Policies", open=False):
370
+ policy_info = []
371
+ for policy in policy_engine.policies:
372
+ if policy.enabled:
373
+ actions = ", ".join([action.value for action in policy.actions])
374
+ policy_info.append(f"**{policy.name}**: {actions} (Priority: {policy.priority})")
375
+
376
+ gr.Markdown("\n\n".join(policy_info))
377
+
378
+ # Event handling
379
+ submit_btn.click(
380
+ fn=submit_event,
381
+ inputs=[component, latency, error_rate, throughput, cpu_util, memory_util],
382
+ outputs=[output_text, events_table]
383
+ )
384
 
385
+ return demo
 
 
 
 
386
 
387
  if __name__ == "__main__":
388
+ demo = create_ui()
389
  demo.launch(
390
+ server_name="0.0.0.0",
391
  server_port=7860,
392
  share=False
393
  )