petter2025 commited on
Commit
6a3df22
·
verified ·
1 Parent(s): 414407c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -68
app.py CHANGED
@@ -1,13 +1,20 @@
 
 
1
  import os
2
  import json
3
  import random
4
  import datetime
 
5
  import numpy as np
6
  import gradio as gr
7
  import requests
8
  import faiss
 
 
9
  from sentence_transformers import SentenceTransformer
10
  from filelock import FileLock
 
 
11
 
12
  # === Config ===
13
  HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
@@ -16,58 +23,68 @@ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
16
 
17
  print("✅ Hugging Face token loaded." if HF_TOKEN else "⚠️ No HF token found, using local analysis mode.")
18
 
19
- # === Persistent FAISS Setup ===
20
  VECTOR_DIM = 384
21
  INDEX_FILE = "incident_vectors.index"
22
  TEXTS_FILE = "incident_texts.json"
23
  LOCK_FILE = "incident.lock"
 
 
24
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
25
 
26
  def load_faiss_index():
27
  if os.path.exists(INDEX_FILE) and os.path.exists(TEXTS_FILE):
28
- index = faiss.read_index(INDEX_FILE)
29
- with open(TEXTS_FILE, "r") as f:
30
- texts = json.load(f)
31
- return index, texts
32
- else:
33
- return faiss.IndexFlatL2(VECTOR_DIM), []
 
 
34
 
35
  index, incident_texts = load_faiss_index()
36
 
37
  def save_index():
38
- """Persist FAISS + metadata safely."""
39
  with FileLock(LOCK_FILE):
40
- faiss.write_index(index, INDEX_FILE)
41
- with open(TEXTS_FILE, "w") as f:
42
- json.dump(incident_texts, f)
 
 
 
43
 
44
- # === Event Memory ===
45
  events = []
46
 
47
- # === Core Logic ===
48
  def detect_anomaly(event):
49
  latency = event["latency"]
50
  error_rate = event["error_rate"]
51
- # Occasional forced anomaly for testing
52
  if random.random() < 0.25:
53
  return True
54
  return latency > 150 or error_rate > 0.05
55
 
56
  def local_reliability_analysis(prompt: str):
57
- """Local semantic fallback analysis via vector similarity."""
58
- embedding = model.encode([prompt])
59
- index.add(np.array(embedding, dtype=np.float32))
60
- incident_texts.append(prompt)
61
- save_index()
62
- if len(incident_texts) > 1:
63
- D, I = index.search(np.array(embedding, dtype=np.float32), k=min(3, len(incident_texts)))
64
- similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
65
- return f"Local insight: {len(similar)} similar reliability events detected."
66
- else:
67
- return "Local insight: Initial incident stored."
68
-
69
- def call_huggingface_analysis(prompt):
70
- """Hybrid HF/local analysis with graceful fallback."""
 
 
 
71
  if not HF_TOKEN:
72
  return local_reliability_analysis(prompt)
73
 
@@ -78,15 +95,26 @@ def call_huggingface_analysis(prompt):
78
  "max_tokens": 200,
79
  "temperature": 0.3,
80
  }
81
- response = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=10)
82
- if response.status_code == 200:
83
- result = response.json()
84
- return result.get("choices", [{}])[0].get("text", "").strip() or local_reliability_analysis(prompt)
 
 
 
 
 
 
 
 
 
 
 
85
  else:
86
- print(f"⚠️ HF router error {response.status_code}: {response.text[:80]}...")
87
  return local_reliability_analysis(prompt)
88
  except Exception as e:
89
- print(f"⚠️ HF inference error: {e}")
90
  return local_reliability_analysis(prompt)
91
 
92
  def simulate_healing(event):
@@ -98,76 +126,137 @@ def simulate_healing(event):
98
  ]
99
  return random.choice(actions)
100
 
101
- def analyze_event(component, latency, error_rate):
 
102
  event = {
103
  "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
104
  "component": component,
105
- "latency": latency,
106
- "error_rate": error_rate,
107
  }
108
-
109
  event["anomaly"] = detect_anomaly(event)
110
  event["status"] = "Anomaly" if event["anomaly"] else "Normal"
111
 
112
  prompt = (
113
  f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
114
- f"Status: {event['status']}\n\n"
115
- "Provide a short reliability insight or root cause."
116
  )
117
 
118
  analysis = call_huggingface_analysis(prompt)
119
  event["analysis"] = analysis
120
  event["healing_action"] = simulate_healing(event)
121
 
122
- # Vector memory persistence
123
  vec_text = f"{component} {latency} {error_rate} {analysis}"
124
- vec = model.encode([vec_text])
125
- index.add(np.array(vec, dtype=np.float32))
126
- incident_texts.append(vec_text)
127
- save_index()
128
-
129
- # Retrieve similar
130
- if len(incident_texts) > 1:
131
- D, I = index.search(vec, k=min(3, len(incident_texts)))
132
- similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
133
- if similar:
134
- event["healing_action"] += f" Found {len(similar)} similar incidents (e.g., {similar[0][:120]}...)."
135
- else:
136
- event["healing_action"] += " - Not enough incidents stored yet."
 
 
 
 
 
 
137
 
138
  events.append(event)
139
- return json.dumps(event, indent=2)
 
 
 
140
 
141
- # === UI ===
142
- def submit_event(component, latency, error_rate):
143
- result = analyze_event(component, latency, error_rate)
144
- parsed = json.loads(result)
 
 
 
 
 
 
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  table = [
147
  [e["timestamp"], e["component"], e["latency"], e["error_rate"],
148
  e["status"], e["analysis"], e["healing_action"]]
149
  for e in events[-20:]
150
  ]
151
-
152
  return (
153
- f"✅ Event Processed ({parsed['status']})",
154
  gr.Dataframe(
155
  headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"],
156
- value=table,
157
- ),
158
  )
159
 
160
  with gr.Blocks(title="🧠 Agentic Reliability Framework MVP") as demo:
161
- gr.Markdown("## 🧠 Agentic Reliability Framework MVP\nAdaptive anomaly detection + AI-driven self-healing + persistent FAISS memory.")
162
  with gr.Row():
163
  component = gr.Textbox(label="Component", value="api-service")
164
  latency = gr.Slider(10, 400, value=100, step=1, label="Latency (ms)")
165
  error_rate = gr.Slider(0, 0.2, value=0.02, step=0.001, label="Error Rate")
166
  submit = gr.Button("🚀 Submit Telemetry Event")
167
  output_text = gr.Textbox(label="Detection Output")
168
- table_output = gr.Dataframe(
169
- headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"]
170
- )
171
  submit.click(fn=submit_event, inputs=[component, latency, error_rate], outputs=[output_text, table_output])
172
 
173
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
1
+ # app.py - Agentic Reliability Framework MVP
2
+ # Drop-in replacement: supports Gradio UI + FastAPI REST endpoints (/semantic-search, /add-event, /recent-events)
3
  import os
4
  import json
5
  import random
6
  import datetime
7
+ import threading
8
  import numpy as np
9
  import gradio as gr
10
  import requests
11
  import faiss
12
+ from fastapi import FastAPI, Query, Body, HTTPException
13
+ from fastapi.middleware.cors import CORSMiddleware
14
  from sentence_transformers import SentenceTransformer
15
  from filelock import FileLock
16
+ import uvicorn
17
+ from pydantic import BaseModel, Field
18
 
19
  # === Config ===
20
  HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
 
23
 
24
  print("✅ Hugging Face token loaded." if HF_TOKEN else "⚠️ No HF token found, using local analysis mode.")
25
 
26
+ # === Persistence / FAISS config ===
27
  VECTOR_DIM = 384
28
  INDEX_FILE = "incident_vectors.index"
29
  TEXTS_FILE = "incident_texts.json"
30
  LOCK_FILE = "incident.lock"
31
+
32
+ # Sentence-transformers model (small and fast)
33
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
34
 
35
  def load_faiss_index():
36
  if os.path.exists(INDEX_FILE) and os.path.exists(TEXTS_FILE):
37
+ try:
38
+ idx = faiss.read_index(INDEX_FILE)
39
+ with open(TEXTS_FILE, "r") as f:
40
+ texts = json.load(f)
41
+ return idx, texts
42
+ except Exception as e:
43
+ print(f"⚠️ Failed to load index/texts: {e} — creating new in-memory index.")
44
+ return faiss.IndexFlatL2(VECTOR_DIM), []
45
 
46
  index, incident_texts = load_faiss_index()
47
 
48
  def save_index():
49
+ """Persist FAISS + metadata atomically using a file lock."""
50
  with FileLock(LOCK_FILE):
51
+ try:
52
+ faiss.write_index(index, INDEX_FILE)
53
+ with open(TEXTS_FILE, "w") as f:
54
+ json.dump(incident_texts, f)
55
+ except Exception as e:
56
+ print(f"⚠️ Error saving index/texts: {e}")
57
 
58
+ # === In-memory events list ===
59
  events = []
60
 
61
+ # === Core logic ===
62
  def detect_anomaly(event):
63
  latency = event["latency"]
64
  error_rate = event["error_rate"]
65
+ # occasional forced anomaly for testing
66
  if random.random() < 0.25:
67
  return True
68
  return latency > 150 or error_rate > 0.05
69
 
70
  def local_reliability_analysis(prompt: str):
71
+ """Local fallback analysis using semantic similarity and simple heuristic text reply."""
72
+ try:
73
+ embedding = model.encode([prompt])
74
+ # store the prompt as a data point (so local memory grows)
75
+ index.add(np.array(embedding, dtype=np.float32))
76
+ incident_texts.append(prompt)
77
+ save_index()
78
+ if len(incident_texts) > 1:
79
+ D, I = index.search(np.array(embedding, dtype=np.float32), k=min(3, len(incident_texts)))
80
+ similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
81
+ return f"Local insight: found {len(similar)} similar incident(s)."
82
+ return "Local insight: first incident stored."
83
+ except Exception as e:
84
+ return f"Local analysis error: {e}"
85
+
86
+ def call_huggingface_analysis(prompt: str):
87
+ """Try HF router -> on failure fall back to local analysis."""
88
  if not HF_TOKEN:
89
  return local_reliability_analysis(prompt)
90
 
 
95
  "max_tokens": 200,
96
  "temperature": 0.3,
97
  }
98
+ resp = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=12)
99
+ if resp.status_code == 200:
100
+ result = resp.json()
101
+ # router output shapes vary; try to be defensive
102
+ text = ""
103
+ if isinstance(result, dict):
104
+ # common HF completion shape
105
+ choices = result.get("choices") or []
106
+ if choices:
107
+ text = choices[0].get("text") or choices[0].get("message", {}).get("content", "")
108
+ else:
109
+ text = result.get("generated_text") or ""
110
+ elif isinstance(result, list) and result:
111
+ text = result[0].get("text", "")
112
+ return (text or local_reliability_analysis(prompt)).strip()
113
  else:
114
+ print(f"⚠️ HF router returned {resp.status_code}: {resp.text[:200]}")
115
  return local_reliability_analysis(prompt)
116
  except Exception as e:
117
+ print(f"⚠️ HF inference call error: {e}")
118
  return local_reliability_analysis(prompt)
119
 
120
  def simulate_healing(event):
 
126
  ]
127
  return random.choice(actions)
128
 
129
+ def analyze_event(component: str, latency: float, error_rate: float):
130
+ """Process one event end-to-end and persist vector memory."""
131
  event = {
132
  "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
133
  "component": component,
134
+ "latency": float(latency),
135
+ "error_rate": float(error_rate),
136
  }
 
137
  event["anomaly"] = detect_anomaly(event)
138
  event["status"] = "Anomaly" if event["anomaly"] else "Normal"
139
 
140
  prompt = (
141
  f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
142
+ f"Status: {event['status']}\n\nProvide a one-line reliability insight or likely root cause."
 
143
  )
144
 
145
  analysis = call_huggingface_analysis(prompt)
146
  event["analysis"] = analysis
147
  event["healing_action"] = simulate_healing(event)
148
 
149
+ # persist vector memory (text + embedding)
150
  vec_text = f"{component} {latency} {error_rate} {analysis}"
151
+ try:
152
+ vec = model.encode([vec_text])
153
+ index.add(np.array(vec, dtype=np.float32))
154
+ incident_texts.append(vec_text)
155
+ save_index()
156
+ except Exception as e:
157
+ print(f"⚠️ Error encoding or saving vector: {e}")
158
+
159
+ # find similar incidents and append a friendly snippet to healing_action
160
+ try:
161
+ if len(incident_texts) > 1:
162
+ D, I = index.search(vec, k=min(3, len(incident_texts)))
163
+ similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
164
+ if similar:
165
+ event["healing_action"] += f" Found {len(similar)} similar incidents (e.g., {similar[0][:120]}...)."
166
+ else:
167
+ event["healing_action"] += " - Not enough incidents stored yet."
168
+ except Exception as e:
169
+ print(f"⚠️ Error searching index: {e}")
170
 
171
  events.append(event)
172
+ # keep events bounded to reasonable size
173
+ if len(events) > 1000:
174
+ events.pop(0)
175
+ return event
176
 
177
+ # === FastAPI app + models ===
178
+ app = FastAPI(title="Agentic Reliability API", version="0.3")
179
+
180
+ app.add_middleware(
181
+ CORSMiddleware,
182
+ allow_origins=["*"],
183
+ allow_credentials=True,
184
+ allow_methods=["*"],
185
+ allow_headers=["*"],
186
+ )
187
 
188
+ class AddEventModel(BaseModel):
189
+ component: str = Field(..., example="api-service")
190
+ latency: float = Field(..., ge=0, example=120.5)
191
+ error_rate: float = Field(..., ge=0, le=1.0, example=0.03)
192
+
193
+ @app.post("/add-event")
194
+ def add_event(payload: AddEventModel = Body(...)):
195
+ """
196
+ Add a telemetry event programmatically.
197
+ Body: { "component": "api-service", "latency": 120, "error_rate": 0.03 }
198
+ """
199
+ try:
200
+ event = analyze_event(payload.component, payload.latency, payload.error_rate)
201
+ return {"status": "ok", "event": event}
202
+ except Exception as e:
203
+ raise HTTPException(status_code=500, detail=f"Failed to add event: {e}")
204
+
205
+ @app.get("/recent-events")
206
+ def recent_events(n: int = Query(20, ge=1, le=200, description="Number of recent events to return")):
207
+ """Return the most recent processed events (default: 20)."""
208
+ sliced = events[-n:]
209
+ return {"count": len(sliced), "events": sliced[::-1]} # newest first
210
+
211
+ @app.get("/semantic-search")
212
+ def semantic_search(query: str = Query(..., description="Search query for reliability memory"), k: int = 3):
213
+ """Perform semantic similarity search over stored reliability incidents."""
214
+ if not incident_texts:
215
+ return {"results": [], "message": "No incidents in memory yet."}
216
+ try:
217
+ embedding = model.encode([query])
218
+ D, I = index.search(np.array(embedding, dtype=np.float32), k=min(k, len(incident_texts)))
219
+ results = []
220
+ for rank, idx in enumerate(I[0]):
221
+ if idx < len(incident_texts):
222
+ results.append({"text": incident_texts[idx], "distance": float(D[0][rank])})
223
+ return {"query": query, "results": results}
224
+ except Exception as e:
225
+ raise HTTPException(status_code=500, detail=f"Semantic search failed: {e}")
226
+
227
+ # === Gradio frontend ===
228
+ def submit_event(component, latency, error_rate):
229
+ ev = analyze_event(component, latency, error_rate)
230
  table = [
231
  [e["timestamp"], e["component"], e["latency"], e["error_rate"],
232
  e["status"], e["analysis"], e["healing_action"]]
233
  for e in events[-20:]
234
  ]
 
235
  return (
236
+ f"✅ Event Processed ({ev['status']})",
237
  gr.Dataframe(
238
  headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"],
239
+ value=table
240
+ )
241
  )
242
 
243
  with gr.Blocks(title="🧠 Agentic Reliability Framework MVP") as demo:
244
+ gr.Markdown("## 🧠 Agentic Reliability Framework MVP\nAdaptive anomaly detection + AI-driven self-healing + FAISS persistent vector memory.")
245
  with gr.Row():
246
  component = gr.Textbox(label="Component", value="api-service")
247
  latency = gr.Slider(10, 400, value=100, step=1, label="Latency (ms)")
248
  error_rate = gr.Slider(0, 0.2, value=0.02, step=0.001, label="Error Rate")
249
  submit = gr.Button("🚀 Submit Telemetry Event")
250
  output_text = gr.Textbox(label="Detection Output")
251
+ table_output = gr.Dataframe(headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"])
 
 
252
  submit.click(fn=submit_event, inputs=[component, latency, error_rate], outputs=[output_text, table_output])
253
 
254
+ # === Launch both servers (Gradio UI + FastAPI) in same process ===
255
+ def start_gradio():
256
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
257
+
258
+ if __name__ == "__main__":
259
+ # run Gradio in a thread and uvicorn for FastAPI in main thread
260
+ t = threading.Thread(target=start_gradio, daemon=True)
261
+ t.start()
262
+ uvicorn.run(app, host="0.0.0.0", port=8000)