petter2025 commited on
Commit
1eb0dc5
·
verified ·
1 Parent(s): 0374c62

Update app.py

Browse files

import os
import json
import random
import datetime
import numpy as np
import gradio as gr
import requests
import faiss
from fastapi import FastAPI, Body, Header, HTTPException
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
from filelock import FileLock

# === Config ===
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
API_KEY = os.getenv("API_KEY", "").strip()

HF_API_URL = "https://router.huggingface.co/hf-inference/v1/completions"
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}

# === FAISS Setup ===
VECTOR_DIM = 384
INDEX_FILE = "incident_vectors.index"
TEXTS_FILE = "incident_texts.json"
LOCK_FILE = "faiss_save.lock"

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

if os.path.exists(INDEX_FILE):
index = faiss.read_index(INDEX_FILE)
with open(TEXTS_FILE, "r") as f:
incident_texts = json.load(f)
else:
index = faiss.IndexFlatL2(VECTOR_DIM)
incident_texts = []


# === Safe persistence ===
def save_index():
with FileLock(LOCK_FILE):
faiss.write_index(index, INDEX_FILE)
with open(TEXTS_FILE, "w") as f:
json.dump(incident_texts, f)


# === Core logic ===
events = []


def detect_anomaly(event):
"""Adaptive threshold-based anomaly detection."""
latency = event["latency"]
error_rate = event["error_rate"]

# Occasionally flag random anomaly for testing
if random.random() < 0.25:
return True

return latency > 150 or error_rate > 0.05


def call_huggingface_analysis(prompt):
"""Uses HF Inference API or local fallback."""
if not HF_TOKEN:
return "Offline mode: simulated analysis."

try:
payload = {
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"prompt": prompt,
"max_tokens": 200,
"temperature": 0.3,
}
response = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=10)
if response.status_code == 200:
result = response.json()
return result.get("choices", [{}])[0].get("text", "").strip()
else:
return f"Error {response.status_code}: {response.text}"
except Exception as e:
return f"Error generating analysis: {e}"


def simulate_healing(event):
actions = [
"Restarted container",
"Scaled up instance",
"Cleared queue backlog",
"No actionable step detected.",
]
return random.choice(actions)


def analyze_event(component, latency, error_rate):
event = {
"timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"component": component,
"latency": latency,
"error_rate": error_rate,
}

is_anomaly = detect_anomaly(event)
event["anomaly"] = is_anomaly
event["status"] = "Anomaly" if is_anomaly else "Normal"

prompt = (
f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
f"Status: {event['status']}\n\n"
"Provide a one-line reliability insight or root cause analysis."
)

# AI Reliability analysis
analysis = call_huggingface_analysis(prompt)
event["analysis"] = analysis

# Simulated self-healing
healing_action = simulate_healing(event)
event["healing_action"] = healing_action

# === Vector learning & persistence ===
vector_text = f"{component} {latency} {error_rate} {analysis}"
vec = model.encode([vector_text])
index.add(np.array(vec, dtype=np.float32))
incident_texts.append(vector_text)
save_index()

# Similar incident lookup
if len(incident_texts) > 1:
D, I = index.search(vec, k=min(3, len(incident_texts)))
similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
if similar:
event["healing_action"] += f" Found {len(similar)} similar incidents (e.g., {similar[0][:100]}...)."
else:
event["healing_action"] += " - Not enough incidents stored yet."

events.append(event)
return event


# === FastAPI backend ===
app = FastAPI(title="Agentic Reliability Framework API")


class AddEventModel(BaseModel):
component: str
latency: float
error_rate: float


def verify_api_key(provided_key: str):
if not API_KEY:
return True # dev mode
return provided_key == API_KEY


@app .post("/add-event")
def add_event(
payload: AddEventModel = Body(...),
x_api_key: str = Header(None, alias="X-API-Key"),
):
"""Add a telemetry event (secured via API key)."""
if not verify_api_key(x_api_key):
raise HTTPException(status_code=401, detail="Unauthorized: invalid API key.")

try:
event = analyze_event(payload.component, payload.latency, payload.error_rate)
return {"status": "ok", "event": event}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to add event: {e}")


# === Gradio Dashboard ===
def submit_event(component, latency, error_rate):
event = analyze_event(component, latency, error_rate)

table = [
[
e["timestamp"],
e["component"],
e["latency"],
e["error_rate"],
e["status"],
e["analysis"],
e["healing_action"],
]
for e in events[-20:]
]

return (
f"✅ Event Processed ({event['status']})",
gr.Dataframe(
headers=[
"timestamp",
"component",
"latency",
"error_rate",
"status",
"analysis",
"healing_action",
],
value=table,
),
)


with gr.Blocks(title="🧠 Agentic Reliability Framework MVP") as demo:
gr.Markdown(
"## 🧠 Agentic Reliability Framework MVP\n"
"Adaptive anomaly detection + AI-driven self-healing + persistent FAISS memory"
)
with gr.Row():
component = gr.Textbox(label="Component", value="api-service")
latency = gr.Slider(10, 400, value=100, step=1, label="Latency (ms)")
error_rate = gr.Slider(0, 0.2, value=0.02, step=0.001, label="Error Rate")
submit = gr.Button("🚀 Submit Telemetry Event")
output_text = gr.Textbox(label="Detection Output")
table_output = gr.Dataframe(
headers=[
"timestamp",
"component",
"latency",
"error_rate",
"status",
"analysis",
"healing_action",
]
)
submit.click(fn=submit_event, inputs=[component, latency, error_rate], outputs=[output_text, table_output])


if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)

Files changed (1) hide show
  1. app.py +130 -159
app.py CHANGED
@@ -1,92 +1,68 @@
1
- # app.py - Agentic Reliability Framework MVP
2
- # Drop-in replacement: supports Gradio UI + FastAPI REST endpoints (/semantic-search, /add-event, /recent-events)
3
  import os
4
  import json
5
  import random
6
  import datetime
7
- import threading
8
  import numpy as np
9
  import gradio as gr
10
  import requests
11
  import faiss
12
- from fastapi import FastAPI, Query, Body, HTTPException
13
- from fastapi.middleware.cors import CORSMiddleware
14
  from sentence_transformers import SentenceTransformer
15
  from filelock import FileLock
16
- import uvicorn
17
- from pydantic import BaseModel, Field
18
 
19
  # === Config ===
20
  HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
 
 
21
  HF_API_URL = "https://router.huggingface.co/hf-inference/v1/completions"
22
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
23
 
24
- print("✅ Hugging Face token loaded." if HF_TOKEN else "⚠️ No HF token found, using local analysis mode.")
25
-
26
- # === Persistence / FAISS config ===
27
  VECTOR_DIM = 384
28
  INDEX_FILE = "incident_vectors.index"
29
  TEXTS_FILE = "incident_texts.json"
30
- LOCK_FILE = "incident.lock"
31
 
32
- # Sentence-transformers model (small and fast)
33
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
34
 
35
- def load_faiss_index():
36
- if os.path.exists(INDEX_FILE) and os.path.exists(TEXTS_FILE):
37
- try:
38
- idx = faiss.read_index(INDEX_FILE)
39
- with open(TEXTS_FILE, "r") as f:
40
- texts = json.load(f)
41
- return idx, texts
42
- except Exception as e:
43
- print(f"⚠️ Failed to load index/texts: {e} — creating new in-memory index.")
44
- return faiss.IndexFlatL2(VECTOR_DIM), []
45
 
46
- index, incident_texts = load_faiss_index()
47
 
 
48
  def save_index():
49
- """Persist FAISS + metadata atomically using a file lock."""
50
  with FileLock(LOCK_FILE):
51
- try:
52
- faiss.write_index(index, INDEX_FILE)
53
- with open(TEXTS_FILE, "w") as f:
54
- json.dump(incident_texts, f)
55
- except Exception as e:
56
- print(f"⚠️ Error saving index/texts: {e}")
57
-
58
- # === In-memory events list ===
59
- events = []
60
 
61
  # === Core logic ===
 
 
 
62
  def detect_anomaly(event):
 
63
  latency = event["latency"]
64
  error_rate = event["error_rate"]
65
- # occasional forced anomaly for testing
 
66
  if random.random() < 0.25:
67
  return True
 
68
  return latency > 150 or error_rate > 0.05
69
 
70
- def local_reliability_analysis(prompt: str):
71
- """Local fallback analysis using semantic similarity and simple heuristic text reply."""
72
- try:
73
- embedding = model.encode([prompt])
74
- # store the prompt as a data point (so local memory grows)
75
- index.add(np.array(embedding, dtype=np.float32))
76
- incident_texts.append(prompt)
77
- save_index()
78
- if len(incident_texts) > 1:
79
- D, I = index.search(np.array(embedding, dtype=np.float32), k=min(3, len(incident_texts)))
80
- similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
81
- return f"Local insight: found {len(similar)} similar incident(s)."
82
- return "Local insight: first incident stored."
83
- except Exception as e:
84
- return f"Local analysis error: {e}"
85
 
86
- def call_huggingface_analysis(prompt: str):
87
- """Try HF router -> on failure fall back to local analysis."""
88
  if not HF_TOKEN:
89
- return local_reliability_analysis(prompt)
90
 
91
  try:
92
  payload = {
@@ -95,168 +71,163 @@ def call_huggingface_analysis(prompt: str):
95
  "max_tokens": 200,
96
  "temperature": 0.3,
97
  }
98
- resp = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=12)
99
- if resp.status_code == 200:
100
- result = resp.json()
101
- # router output shapes vary; try to be defensive
102
- text = ""
103
- if isinstance(result, dict):
104
- # common HF completion shape
105
- choices = result.get("choices") or []
106
- if choices:
107
- text = choices[0].get("text") or choices[0].get("message", {}).get("content", "")
108
- else:
109
- text = result.get("generated_text") or ""
110
- elif isinstance(result, list) and result:
111
- text = result[0].get("text", "")
112
- return (text or local_reliability_analysis(prompt)).strip()
113
  else:
114
- print(f"⚠️ HF router returned {resp.status_code}: {resp.text[:200]}")
115
- return local_reliability_analysis(prompt)
116
  except Exception as e:
117
- print(f"⚠️ HF inference call error: {e}")
118
- return local_reliability_analysis(prompt)
119
 
120
  def simulate_healing(event):
121
  actions = [
122
  "Restarted container",
123
  "Scaled up instance",
124
  "Cleared queue backlog",
125
- "No actionable step detected."
126
  ]
127
  return random.choice(actions)
128
 
129
- def analyze_event(component: str, latency: float, error_rate: float):
130
- """Process one event end-to-end and persist vector memory."""
131
  event = {
132
  "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
133
  "component": component,
134
- "latency": float(latency),
135
- "error_rate": float(error_rate),
136
  }
137
- event["anomaly"] = detect_anomaly(event)
138
- event["status"] = "Anomaly" if event["anomaly"] else "Normal"
 
 
139
 
140
  prompt = (
141
  f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
142
- f"Status: {event['status']}\n\nProvide a one-line reliability insight or likely root cause."
 
143
  )
144
 
 
145
  analysis = call_huggingface_analysis(prompt)
146
  event["analysis"] = analysis
147
- event["healing_action"] = simulate_healing(event)
148
 
149
- # persist vector memory (text + embedding)
150
- vec_text = f"{component} {latency} {error_rate} {analysis}"
151
- try:
152
- vec = model.encode([vec_text])
153
- index.add(np.array(vec, dtype=np.float32))
154
- incident_texts.append(vec_text)
155
- save_index()
156
- except Exception as e:
157
- print(f"⚠️ Error encoding or saving vector: {e}")
158
-
159
- # find similar incidents and append a friendly snippet to healing_action
160
- try:
161
- if len(incident_texts) > 1:
162
- D, I = index.search(vec, k=min(3, len(incident_texts)))
163
- similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
164
- if similar:
165
- event["healing_action"] += f" Found {len(similar)} similar incidents (e.g., {similar[0][:120]}...)."
166
- else:
167
- event["healing_action"] += " - Not enough incidents stored yet."
168
- except Exception as e:
169
- print(f"⚠️ Error searching index: {e}")
170
 
171
  events.append(event)
172
- # keep events bounded to reasonable size
173
- if len(events) > 1000:
174
- events.pop(0)
175
  return event
176
 
177
- # === FastAPI app + models ===
178
- app = FastAPI(title="Agentic Reliability API", version="0.3")
179
 
180
- app.add_middleware(
181
- CORSMiddleware,
182
- allow_origins=["*"],
183
- allow_credentials=True,
184
- allow_methods=["*"],
185
- allow_headers=["*"],
186
- )
187
 
188
  class AddEventModel(BaseModel):
189
- component: str = Field(..., example="api-service")
190
- latency: float = Field(..., ge=0, example=120.5)
191
- error_rate: float = Field(..., ge=0, le=1.0, example=0.03)
 
 
 
 
 
 
 
192
 
193
  @app.post("/add-event")
194
- def add_event(payload: AddEventModel = Body(...)):
195
- """
196
- Add a telemetry event programmatically.
197
- Body: { "component": "api-service", "latency": 120, "error_rate": 0.03 }
198
- """
 
 
 
199
  try:
200
  event = analyze_event(payload.component, payload.latency, payload.error_rate)
201
  return {"status": "ok", "event": event}
202
  except Exception as e:
203
  raise HTTPException(status_code=500, detail=f"Failed to add event: {e}")
204
 
205
- @app.get("/recent-events")
206
- def recent_events(n: int = Query(20, ge=1, le=200, description="Number of recent events to return")):
207
- """Return the most recent processed events (default: 20)."""
208
- sliced = events[-n:]
209
- return {"count": len(sliced), "events": sliced[::-1]} # newest first
210
-
211
- @app.get("/semantic-search")
212
- def semantic_search(query: str = Query(..., description="Search query for reliability memory"), k: int = 3):
213
- """Perform semantic similarity search over stored reliability incidents."""
214
- if not incident_texts:
215
- return {"results": [], "message": "No incidents in memory yet."}
216
- try:
217
- embedding = model.encode([query])
218
- D, I = index.search(np.array(embedding, dtype=np.float32), k=min(k, len(incident_texts)))
219
- results = []
220
- for rank, idx in enumerate(I[0]):
221
- if idx < len(incident_texts):
222
- results.append({"text": incident_texts[idx], "distance": float(D[0][rank])})
223
- return {"query": query, "results": results}
224
- except Exception as e:
225
- raise HTTPException(status_code=500, detail=f"Semantic search failed: {e}")
226
 
227
- # === Gradio frontend ===
228
  def submit_event(component, latency, error_rate):
229
- ev = analyze_event(component, latency, error_rate)
 
230
  table = [
231
- [e["timestamp"], e["component"], e["latency"], e["error_rate"],
232
- e["status"], e["analysis"], e["healing_action"]]
 
 
 
 
 
 
 
233
  for e in events[-20:]
234
  ]
 
235
  return (
236
- f"✅ Event Processed ({ev['status']})",
237
  gr.Dataframe(
238
- headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"],
239
- value=table
240
- )
 
 
 
 
 
 
 
 
241
  )
242
 
 
243
  with gr.Blocks(title="🧠 Agentic Reliability Framework MVP") as demo:
244
- gr.Markdown("## 🧠 Agentic Reliability Framework MVP\nAdaptive anomaly detection + AI-driven self-healing + FAISS persistent vector memory.")
 
 
 
245
  with gr.Row():
246
  component = gr.Textbox(label="Component", value="api-service")
247
  latency = gr.Slider(10, 400, value=100, step=1, label="Latency (ms)")
248
  error_rate = gr.Slider(0, 0.2, value=0.02, step=0.001, label="Error Rate")
249
  submit = gr.Button("🚀 Submit Telemetry Event")
250
  output_text = gr.Textbox(label="Detection Output")
251
- table_output = gr.Dataframe(headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"])
 
 
 
 
 
 
 
 
 
 
252
  submit.click(fn=submit_event, inputs=[component, latency, error_rate], outputs=[output_text, table_output])
253
 
254
- # === Launch both servers (Gradio UI + FastAPI) in same process ===
255
- def start_gradio():
256
- demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
257
 
258
  if __name__ == "__main__":
259
- # run Gradio in a thread and uvicorn for FastAPI in main thread
260
- t = threading.Thread(target=start_gradio, daemon=True)
261
- t.start()
262
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
1
  import os
2
  import json
3
  import random
4
  import datetime
 
5
  import numpy as np
6
  import gradio as gr
7
  import requests
8
  import faiss
9
+ from fastapi import FastAPI, Body, Header, HTTPException
10
+ from pydantic import BaseModel
11
  from sentence_transformers import SentenceTransformer
12
  from filelock import FileLock
 
 
13
 
14
  # === Config ===
15
  HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
16
+ API_KEY = os.getenv("API_KEY", "").strip()
17
+
18
  HF_API_URL = "https://router.huggingface.co/hf-inference/v1/completions"
19
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
20
 
21
+ # === FAISS Setup ===
 
 
22
  VECTOR_DIM = 384
23
  INDEX_FILE = "incident_vectors.index"
24
  TEXTS_FILE = "incident_texts.json"
25
+ LOCK_FILE = "faiss_save.lock"
26
 
 
27
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
28
 
29
+ if os.path.exists(INDEX_FILE):
30
+ index = faiss.read_index(INDEX_FILE)
31
+ with open(TEXTS_FILE, "r") as f:
32
+ incident_texts = json.load(f)
33
+ else:
34
+ index = faiss.IndexFlatL2(VECTOR_DIM)
35
+ incident_texts = []
 
 
 
36
 
 
37
 
38
+ # === Safe persistence ===
39
  def save_index():
 
40
  with FileLock(LOCK_FILE):
41
+ faiss.write_index(index, INDEX_FILE)
42
+ with open(TEXTS_FILE, "w") as f:
43
+ json.dump(incident_texts, f)
44
+
 
 
 
 
 
45
 
46
  # === Core logic ===
47
+ events = []
48
+
49
+
50
  def detect_anomaly(event):
51
+ """Adaptive threshold-based anomaly detection."""
52
  latency = event["latency"]
53
  error_rate = event["error_rate"]
54
+
55
+ # Occasionally flag random anomaly for testing
56
  if random.random() < 0.25:
57
  return True
58
+
59
  return latency > 150 or error_rate > 0.05
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ def call_huggingface_analysis(prompt):
63
+ """Uses HF Inference API or local fallback."""
64
  if not HF_TOKEN:
65
+ return "Offline mode: simulated analysis."
66
 
67
  try:
68
  payload = {
 
71
  "max_tokens": 200,
72
  "temperature": 0.3,
73
  }
74
+ response = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=10)
75
+ if response.status_code == 200:
76
+ result = response.json()
77
+ return result.get("choices", [{}])[0].get("text", "").strip()
 
 
 
 
 
 
 
 
 
 
 
78
  else:
79
+ return f"Error {response.status_code}: {response.text}"
 
80
  except Exception as e:
81
+ return f"Error generating analysis: {e}"
82
+
83
 
84
  def simulate_healing(event):
85
  actions = [
86
  "Restarted container",
87
  "Scaled up instance",
88
  "Cleared queue backlog",
89
+ "No actionable step detected.",
90
  ]
91
  return random.choice(actions)
92
 
93
+
94
+ def analyze_event(component, latency, error_rate):
95
  event = {
96
  "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
97
  "component": component,
98
+ "latency": latency,
99
+ "error_rate": error_rate,
100
  }
101
+
102
+ is_anomaly = detect_anomaly(event)
103
+ event["anomaly"] = is_anomaly
104
+ event["status"] = "Anomaly" if is_anomaly else "Normal"
105
 
106
  prompt = (
107
  f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
108
+ f"Status: {event['status']}\n\n"
109
+ "Provide a one-line reliability insight or root cause analysis."
110
  )
111
 
112
+ # AI Reliability analysis
113
  analysis = call_huggingface_analysis(prompt)
114
  event["analysis"] = analysis
 
115
 
116
+ # Simulated self-healing
117
+ healing_action = simulate_healing(event)
118
+ event["healing_action"] = healing_action
119
+
120
+ # === Vector learning & persistence ===
121
+ vector_text = f"{component} {latency} {error_rate} {analysis}"
122
+ vec = model.encode([vector_text])
123
+ index.add(np.array(vec, dtype=np.float32))
124
+ incident_texts.append(vector_text)
125
+ save_index()
126
+
127
+ # Similar incident lookup
128
+ if len(incident_texts) > 1:
129
+ D, I = index.search(vec, k=min(3, len(incident_texts)))
130
+ similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
131
+ if similar:
132
+ event["healing_action"] += f" Found {len(similar)} similar incidents (e.g., {similar[0][:100]}...)."
133
+ else:
134
+ event["healing_action"] += " - Not enough incidents stored yet."
 
 
135
 
136
  events.append(event)
 
 
 
137
  return event
138
 
 
 
139
 
140
+ # === FastAPI backend ===
141
+ app = FastAPI(title="Agentic Reliability Framework API")
142
+
 
 
 
 
143
 
144
  class AddEventModel(BaseModel):
145
+ component: str
146
+ latency: float
147
+ error_rate: float
148
+
149
+
150
+ def verify_api_key(provided_key: str):
151
+ if not API_KEY:
152
+ return True # dev mode
153
+ return provided_key == API_KEY
154
+
155
 
156
  @app.post("/add-event")
157
+ def add_event(
158
+ payload: AddEventModel = Body(...),
159
+ x_api_key: str = Header(None, alias="X-API-Key"),
160
+ ):
161
+ """Add a telemetry event (secured via API key)."""
162
+ if not verify_api_key(x_api_key):
163
+ raise HTTPException(status_code=401, detail="Unauthorized: invalid API key.")
164
+
165
  try:
166
  event = analyze_event(payload.component, payload.latency, payload.error_rate)
167
  return {"status": "ok", "event": event}
168
  except Exception as e:
169
  raise HTTPException(status_code=500, detail=f"Failed to add event: {e}")
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
+ # === Gradio Dashboard ===
173
  def submit_event(component, latency, error_rate):
174
+ event = analyze_event(component, latency, error_rate)
175
+
176
  table = [
177
+ [
178
+ e["timestamp"],
179
+ e["component"],
180
+ e["latency"],
181
+ e["error_rate"],
182
+ e["status"],
183
+ e["analysis"],
184
+ e["healing_action"],
185
+ ]
186
  for e in events[-20:]
187
  ]
188
+
189
  return (
190
+ f"✅ Event Processed ({event['status']})",
191
  gr.Dataframe(
192
+ headers=[
193
+ "timestamp",
194
+ "component",
195
+ "latency",
196
+ "error_rate",
197
+ "status",
198
+ "analysis",
199
+ "healing_action",
200
+ ],
201
+ value=table,
202
+ ),
203
  )
204
 
205
+
206
  with gr.Blocks(title="🧠 Agentic Reliability Framework MVP") as demo:
207
+ gr.Markdown(
208
+ "## 🧠 Agentic Reliability Framework MVP\n"
209
+ "Adaptive anomaly detection + AI-driven self-healing + persistent FAISS memory"
210
+ )
211
  with gr.Row():
212
  component = gr.Textbox(label="Component", value="api-service")
213
  latency = gr.Slider(10, 400, value=100, step=1, label="Latency (ms)")
214
  error_rate = gr.Slider(0, 0.2, value=0.02, step=0.001, label="Error Rate")
215
  submit = gr.Button("🚀 Submit Telemetry Event")
216
  output_text = gr.Textbox(label="Detection Output")
217
+ table_output = gr.Dataframe(
218
+ headers=[
219
+ "timestamp",
220
+ "component",
221
+ "latency",
222
+ "error_rate",
223
+ "status",
224
+ "analysis",
225
+ "healing_action",
226
+ ]
227
+ )
228
  submit.click(fn=submit_event, inputs=[component, latency, error_rate], outputs=[output_text, table_output])
229
 
 
 
 
230
 
231
  if __name__ == "__main__":
232
+ demo.launch(server_name="0.0.0.0", server_port=7860)
233
+