petter2025 commited on
Commit
414407c
·
verified ·
1 Parent(s): cd4a63c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -47
app.py CHANGED
@@ -1,61 +1,75 @@
1
  import os
2
  import json
3
  import random
4
- import time
5
  import datetime
6
  import numpy as np
7
  import gradio as gr
8
  import requests
9
- from sentence_transformers import SentenceTransformer
10
  import faiss
 
 
11
 
12
  # === Config ===
13
  HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
14
- if not HF_TOKEN:
15
- print("⚠️ No Hugging Face token found. Running in fallback/local mode.")
16
- else:
17
- print("✅ Hugging Face token loaded successfully.")
18
-
19
  HF_API_URL = "https://router.huggingface.co/hf-inference/v1/completions"
20
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
21
 
22
- # === FAISS Setup ===
 
 
23
  VECTOR_DIM = 384
24
  INDEX_FILE = "incident_vectors.index"
25
  TEXTS_FILE = "incident_texts.json"
 
26
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
27
 
28
- if os.path.exists(INDEX_FILE):
29
- index = faiss.read_index(INDEX_FILE)
30
- with open(TEXTS_FILE, "r") as f:
31
- incident_texts = json.load(f)
32
- else:
33
- index = faiss.IndexFlatL2(VECTOR_DIM)
34
- incident_texts = []
 
 
 
35
 
36
  def save_index():
37
- faiss.write_index(index, INDEX_FILE)
38
- with open(TEXTS_FILE, "w") as f:
39
- json.dump(incident_texts, f)
 
 
40
 
41
  # === Event Memory ===
42
  events = []
43
 
 
44
  def detect_anomaly(event):
45
- """Adaptive threshold-based anomaly detection."""
46
  latency = event["latency"]
47
  error_rate = event["error_rate"]
48
-
49
- # Force random anomaly occasionally for testing
50
  if random.random() < 0.25:
51
  return True
52
-
53
  return latency > 150 or error_rate > 0.05
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def call_huggingface_analysis(prompt):
56
- """Use HF Inference API or fallback simulation."""
57
  if not HF_TOKEN:
58
- return "Offline mode: simulated analysis."
59
 
60
  try:
61
  payload = {
@@ -67,11 +81,13 @@ def call_huggingface_analysis(prompt):
67
  response = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=10)
68
  if response.status_code == 200:
69
  result = response.json()
70
- return result.get("choices", [{}])[0].get("text", "").strip()
71
  else:
72
- return f"Error {response.status_code}: {response.text}"
 
73
  except Exception as e:
74
- return f"Error generating analysis: {e}"
 
75
 
76
  def simulate_healing(event):
77
  actions = [
@@ -87,36 +103,30 @@ def analyze_event(component, latency, error_rate):
87
  "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
88
  "component": component,
89
  "latency": latency,
90
- "error_rate": error_rate
91
  }
92
 
93
- is_anomaly = detect_anomaly(event)
94
- event["anomaly"] = is_anomaly
95
- event["status"] = "Anomaly" if is_anomaly else "Normal"
96
 
97
- # Build textual prompt
98
  prompt = (
99
  f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
100
  f"Status: {event['status']}\n\n"
101
- "Provide a one-line reliability insight or root cause analysis."
102
  )
103
 
104
- # Analysis
105
  analysis = call_huggingface_analysis(prompt)
106
  event["analysis"] = analysis
 
107
 
108
- # Healing simulation
109
- healing_action = simulate_healing(event)
110
- event["healing_action"] = healing_action
111
-
112
- # === Vector learning ===
113
- vector_text = f"{component} {latency} {error_rate} {analysis}"
114
- vec = model.encode([vector_text])
115
  index.add(np.array(vec, dtype=np.float32))
116
- incident_texts.append(vector_text)
117
  save_index()
118
 
119
- # Find similar incidents
120
  if len(incident_texts) > 1:
121
  D, I = index.search(vec, k=min(3, len(incident_texts)))
122
  similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
@@ -143,19 +153,21 @@ def submit_event(component, latency, error_rate):
143
  f"✅ Event Processed ({parsed['status']})",
144
  gr.Dataframe(
145
  headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"],
146
- value=table
147
- )
148
  )
149
 
150
  with gr.Blocks(title="🧠 Agentic Reliability Framework MVP") as demo:
151
- gr.Markdown("## 🧠 Agentic Reliability Framework MVP\nAdaptive anomaly detection + AI-driven self-healing + vector memory (FAISS persistent)")
152
  with gr.Row():
153
  component = gr.Textbox(label="Component", value="api-service")
154
  latency = gr.Slider(10, 400, value=100, step=1, label="Latency (ms)")
155
  error_rate = gr.Slider(0, 0.2, value=0.02, step=0.001, label="Error Rate")
156
  submit = gr.Button("🚀 Submit Telemetry Event")
157
  output_text = gr.Textbox(label="Detection Output")
158
- table_output = gr.Dataframe(headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"])
 
 
159
  submit.click(fn=submit_event, inputs=[component, latency, error_rate], outputs=[output_text, table_output])
160
 
161
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
  import json
3
  import random
 
4
  import datetime
5
  import numpy as np
6
  import gradio as gr
7
  import requests
 
8
  import faiss
9
+ from sentence_transformers import SentenceTransformer
10
+ from filelock import FileLock
11
 
12
  # === Config ===
13
  HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
 
 
 
 
 
14
  HF_API_URL = "https://router.huggingface.co/hf-inference/v1/completions"
15
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
16
 
17
+ print("✅ Hugging Face token loaded." if HF_TOKEN else "⚠️ No HF token found, using local analysis mode.")
18
+
19
+ # === Persistent FAISS Setup ===
20
  VECTOR_DIM = 384
21
  INDEX_FILE = "incident_vectors.index"
22
  TEXTS_FILE = "incident_texts.json"
23
+ LOCK_FILE = "incident.lock"
24
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
25
 
26
+ def load_faiss_index():
27
+ if os.path.exists(INDEX_FILE) and os.path.exists(TEXTS_FILE):
28
+ index = faiss.read_index(INDEX_FILE)
29
+ with open(TEXTS_FILE, "r") as f:
30
+ texts = json.load(f)
31
+ return index, texts
32
+ else:
33
+ return faiss.IndexFlatL2(VECTOR_DIM), []
34
+
35
+ index, incident_texts = load_faiss_index()
36
 
37
  def save_index():
38
+ """Persist FAISS + metadata safely."""
39
+ with FileLock(LOCK_FILE):
40
+ faiss.write_index(index, INDEX_FILE)
41
+ with open(TEXTS_FILE, "w") as f:
42
+ json.dump(incident_texts, f)
43
 
44
  # === Event Memory ===
45
  events = []
46
 
47
+ # === Core Logic ===
48
  def detect_anomaly(event):
 
49
  latency = event["latency"]
50
  error_rate = event["error_rate"]
51
+ # Occasional forced anomaly for testing
 
52
  if random.random() < 0.25:
53
  return True
 
54
  return latency > 150 or error_rate > 0.05
55
 
56
+ def local_reliability_analysis(prompt: str):
57
+ """Local semantic fallback analysis via vector similarity."""
58
+ embedding = model.encode([prompt])
59
+ index.add(np.array(embedding, dtype=np.float32))
60
+ incident_texts.append(prompt)
61
+ save_index()
62
+ if len(incident_texts) > 1:
63
+ D, I = index.search(np.array(embedding, dtype=np.float32), k=min(3, len(incident_texts)))
64
+ similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
65
+ return f"Local insight: {len(similar)} similar reliability events detected."
66
+ else:
67
+ return "Local insight: Initial incident stored."
68
+
69
  def call_huggingface_analysis(prompt):
70
+ """Hybrid HF/local analysis with graceful fallback."""
71
  if not HF_TOKEN:
72
+ return local_reliability_analysis(prompt)
73
 
74
  try:
75
  payload = {
 
81
  response = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=10)
82
  if response.status_code == 200:
83
  result = response.json()
84
+ return result.get("choices", [{}])[0].get("text", "").strip() or local_reliability_analysis(prompt)
85
  else:
86
+ print(f"⚠️ HF router error {response.status_code}: {response.text[:80]}...")
87
+ return local_reliability_analysis(prompt)
88
  except Exception as e:
89
+ print(f"⚠️ HF inference error: {e}")
90
+ return local_reliability_analysis(prompt)
91
 
92
  def simulate_healing(event):
93
  actions = [
 
103
  "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
104
  "component": component,
105
  "latency": latency,
106
+ "error_rate": error_rate,
107
  }
108
 
109
+ event["anomaly"] = detect_anomaly(event)
110
+ event["status"] = "Anomaly" if event["anomaly"] else "Normal"
 
111
 
 
112
  prompt = (
113
  f"Component: {component}\nLatency: {latency:.2f}ms\nError Rate: {error_rate:.3f}\n"
114
  f"Status: {event['status']}\n\n"
115
+ "Provide a short reliability insight or root cause."
116
  )
117
 
 
118
  analysis = call_huggingface_analysis(prompt)
119
  event["analysis"] = analysis
120
+ event["healing_action"] = simulate_healing(event)
121
 
122
+ # Vector memory persistence
123
+ vec_text = f"{component} {latency} {error_rate} {analysis}"
124
+ vec = model.encode([vec_text])
 
 
 
 
125
  index.add(np.array(vec, dtype=np.float32))
126
+ incident_texts.append(vec_text)
127
  save_index()
128
 
129
+ # Retrieve similar
130
  if len(incident_texts) > 1:
131
  D, I = index.search(vec, k=min(3, len(incident_texts)))
132
  similar = [incident_texts[i] for i in I[0] if i < len(incident_texts)]
 
153
  f"✅ Event Processed ({parsed['status']})",
154
  gr.Dataframe(
155
  headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"],
156
+ value=table,
157
+ ),
158
  )
159
 
160
  with gr.Blocks(title="🧠 Agentic Reliability Framework MVP") as demo:
161
+ gr.Markdown("## 🧠 Agentic Reliability Framework MVP\nAdaptive anomaly detection + AI-driven self-healing + persistent FAISS memory.")
162
  with gr.Row():
163
  component = gr.Textbox(label="Component", value="api-service")
164
  latency = gr.Slider(10, 400, value=100, step=1, label="Latency (ms)")
165
  error_rate = gr.Slider(0, 0.2, value=0.02, step=0.001, label="Error Rate")
166
  submit = gr.Button("🚀 Submit Telemetry Event")
167
  output_text = gr.Textbox(label="Detection Output")
168
+ table_output = gr.Dataframe(
169
+ headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"]
170
+ )
171
  submit.click(fn=submit_event, inputs=[component, latency, error_rate], outputs=[output_text, table_output])
172
 
173
  demo.launch(server_name="0.0.0.0", server_port=7860)