petter2025 commited on
Commit
0b2d10e
·
verified ·
1 Parent(s): ba59239

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -37
app.py CHANGED
@@ -8,6 +8,7 @@ import requests
8
  from datetime import datetime
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  from sentence_transformers import SentenceTransformer
 
11
  import gradio as gr
12
 
13
  # ============================
@@ -27,38 +28,50 @@ else:
27
  print("⚠️ No Hugging Face token found. Running in fallback/local mode.")
28
 
29
  # ============================
30
- # GLOBAL CONFIG
31
  # ============================
32
  HF_API_URL = "https://router.huggingface.co/hf-inference"
33
  headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 
 
34
 
35
- # Load a lightweight sentence transformer for embedding incidents
 
 
36
  model = SentenceTransformer("all-MiniLM-L6-v2")
 
 
 
37
 
38
- # Vector memory store (in-memory for now)
39
- incident_memory = []
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # ============================
42
  # ANOMALY DETECTION
43
  # ============================
44
  def detect_anomaly(event):
45
- """
46
- Detects anomalies based on latency/error_rate thresholds.
47
- Forces an anomaly randomly for validation.
48
- """
49
- force_anomaly = random.random() < 0.25 # ~25% forced anomaly rate
50
  if force_anomaly or event["latency"] > 150 or event["error_rate"] > 0.05:
51
  return True
52
  return False
53
 
54
-
55
  # ============================
56
  # AI ANALYSIS + HEALING
57
  # ============================
58
  def analyze_event(event):
59
- """
60
- Send event to HF Inference API for analysis, fallback locally if needed.
61
- """
62
  prompt = (
63
  f"Analyze this telemetry event and suggest a healing action:\n"
64
  f"Component: {event['component']}\n"
@@ -90,12 +103,10 @@ def analyze_event(event):
90
  except Exception as e:
91
  return f"Error generating analysis: {e}", "No actionable step detected."
92
 
93
-
94
  # ============================
95
  # HEALING SIMULATION
96
  # ============================
97
  def choose_healing_action(event, analysis_text):
98
- """Simulates an automated healing response."""
99
  possible_actions = [
100
  "Restarted container",
101
  "Scaled service replicas",
@@ -111,38 +122,37 @@ def choose_healing_action(event, analysis_text):
111
  return "Invalidated cache"
112
  return random.choice(possible_actions)
113
 
114
-
115
  # ============================
116
- # VECTOR SIMILARITY ENGINE
117
  # ============================
118
  def record_and_search_similar(event, analysis_text):
119
- """
120
- Store each event as a vector and retrieve similar past incidents.
121
- """
122
  description = (
123
  f"Component: {event['component']} | "
124
  f"Latency: {event['latency']} | "
125
  f"ErrorRate: {event['error_rate']} | "
126
  f"Analysis: {analysis_text}"
127
  )
128
- embedding = model.encode(description)
129
 
130
  similar_info = ""
131
- if incident_memory:
132
- existing_embeddings = np.array([e["embedding"] for e in incident_memory])
133
- sims = cosine_similarity([embedding], existing_embeddings)[0]
134
- top_indices = sims.argsort()[-3:][::-1]
135
- similar = [
136
- incident_memory[i]["description"]
137
- for i in top_indices
138
- if sims[i] > 0.7
139
- ]
140
  if similar:
141
- similar_info = f"Found {len(similar)} similar incidents (e.g., {similar[0][:150]}...)."
142
 
143
- incident_memory.append({"embedding": embedding, "description": description})
144
- return similar_info
 
145
 
 
 
 
 
 
 
146
 
147
  # ============================
148
  # EVENT HANDLER
@@ -156,6 +166,7 @@ def process_event(component, latency, error_rate):
156
  "latency": latency,
157
  "error_rate": error_rate,
158
  }
 
159
  event["anomaly"] = detect_anomaly(event)
160
  status = "Anomaly" if event["anomaly"] else "Normal"
161
  analysis, healing = analyze_event(event)
@@ -170,15 +181,12 @@ def process_event(component, latency, error_rate):
170
  df = pd.DataFrame(event_log[-20:])
171
  return f"✅ Event Processed ({status})", df
172
 
173
-
174
  # ============================
175
  # GRADIO UI
176
  # ============================
177
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
178
  gr.Markdown("## 🧠 Agentic Reliability Framework MVP")
179
- gr.Markdown(
180
- "Adaptive anomaly detection + AI-driven self-healing + vector memory"
181
- )
182
 
183
  component = gr.Textbox(label="Component", value="api-service")
184
  latency = gr.Slider(10, 400, value=100, label="Latency (ms)")
 
8
  from datetime import datetime
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  from sentence_transformers import SentenceTransformer
11
+ import faiss
12
  import gradio as gr
13
 
14
  # ============================
 
28
  print("⚠️ No Hugging Face token found. Running in fallback/local mode.")
29
 
30
  # ============================
31
+ # CONFIG
32
  # ============================
33
  HF_API_URL = "https://router.huggingface.co/hf-inference"
34
  headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
35
+ DATA_DIR = "./data"
36
+ os.makedirs(DATA_DIR, exist_ok=True)
37
 
38
+ # ============================
39
+ # MODEL + FAISS SETUP
40
+ # ============================
41
  model = SentenceTransformer("all-MiniLM-L6-v2")
42
+ VECTOR_DIM = model.get_sentence_embedding_dimension()
43
+ FAISS_PATH = os.path.join(DATA_DIR, "incident_memory.faiss")
44
+ META_PATH = os.path.join(DATA_DIR, "incidents.json")
45
 
46
+ # Load or initialize FAISS index
47
+ if os.path.exists(FAISS_PATH):
48
+ try:
49
+ index = faiss.read_index(FAISS_PATH)
50
+ with open(META_PATH, "r") as f:
51
+ incident_memory = json.load(f)
52
+ print(f"✅ Loaded {len(incident_memory)} past incidents from FAISS.")
53
+ except Exception:
54
+ print("⚠️ Failed to load FAISS index. Starting fresh.")
55
+ index = faiss.IndexFlatL2(VECTOR_DIM)
56
+ incident_memory = []
57
+ else:
58
+ index = faiss.IndexFlatL2(VECTOR_DIM)
59
+ incident_memory = []
60
 
61
  # ============================
62
  # ANOMALY DETECTION
63
  # ============================
64
  def detect_anomaly(event):
65
+ """Detects anomalies based on latency/error_rate thresholds, with forced random noise."""
66
+ force_anomaly = random.random() < 0.25
 
 
 
67
  if force_anomaly or event["latency"] > 150 or event["error_rate"] > 0.05:
68
  return True
69
  return False
70
 
 
71
  # ============================
72
  # AI ANALYSIS + HEALING
73
  # ============================
74
  def analyze_event(event):
 
 
 
75
  prompt = (
76
  f"Analyze this telemetry event and suggest a healing action:\n"
77
  f"Component: {event['component']}\n"
 
103
  except Exception as e:
104
  return f"Error generating analysis: {e}", "No actionable step detected."
105
 
 
106
  # ============================
107
  # HEALING SIMULATION
108
  # ============================
109
  def choose_healing_action(event, analysis_text):
 
110
  possible_actions = [
111
  "Restarted container",
112
  "Scaled service replicas",
 
122
  return "Invalidated cache"
123
  return random.choice(possible_actions)
124
 
 
125
  # ============================
126
+ # VECTOR SIMILARITY + FAISS PERSISTENCE
127
  # ============================
128
  def record_and_search_similar(event, analysis_text):
129
+ """Store each event vector in FAISS and search for similar incidents."""
 
 
130
  description = (
131
  f"Component: {event['component']} | "
132
  f"Latency: {event['latency']} | "
133
  f"ErrorRate: {event['error_rate']} | "
134
  f"Analysis: {analysis_text}"
135
  )
136
+ embedding = model.encode(description).astype("float32").reshape(1, -1)
137
 
138
  similar_info = ""
139
+ if len(incident_memory) > 0 and index.ntotal > 0:
140
+ k = min(3, len(incident_memory))
141
+ D, I = index.search(embedding, k)
142
+ similar = [incident_memory[i]["description"] for i in I[0] if D[0][0] < 0.5]
 
 
 
 
 
143
  if similar:
144
+ similar_info = f"Found {len(similar)} similar incidents (e.g., {similar[0][:120]}...)."
145
 
146
+ # Store new entry
147
+ incident_memory.append({"description": description})
148
+ index.add(embedding)
149
 
150
+ # Persist FAISS + metadata
151
+ faiss.write_index(index, FAISS_PATH)
152
+ with open(META_PATH, "w") as f:
153
+ json.dump(incident_memory, f)
154
+
155
+ return similar_info
156
 
157
  # ============================
158
  # EVENT HANDLER
 
166
  "latency": latency,
167
  "error_rate": error_rate,
168
  }
169
+
170
  event["anomaly"] = detect_anomaly(event)
171
  status = "Anomaly" if event["anomaly"] else "Normal"
172
  analysis, healing = analyze_event(event)
 
181
  df = pd.DataFrame(event_log[-20:])
182
  return f"✅ Event Processed ({status})", df
183
 
 
184
  # ============================
185
  # GRADIO UI
186
  # ============================
187
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
188
  gr.Markdown("## 🧠 Agentic Reliability Framework MVP")
189
+ gr.Markdown("Adaptive anomaly detection + AI-driven self-healing + vector memory (FAISS persistent)")
 
 
190
 
191
  component = gr.Textbox(label="Component", value="api-service")
192
  latency = gr.Slider(10, 400, value=100, label="Latency (ms)")