Pragthedon commited on
Commit
c7893c0
·
1 Parent(s): fbb0228

Fix: Backend OOM crashes via Vector Cache and worker reduction

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -2
  2. model.py +46 -25
  3. project/database.py +5 -4
Dockerfile CHANGED
@@ -26,5 +26,5 @@ COPY --chown=user . .
26
  # Expose port (HF Spaces routes traffic to 7860 by default)
27
  EXPOSE 7860
28
 
29
- # Start Flask — 4 workers so heavy AI requests don't starve auth/ping endpoints
30
- CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "4", "--timeout", "120", "--keep-alive", "5", "--max-requests", "500", "--max-requests-jitter", "50", "app:app"]
 
26
  # Expose port (HF Spaces routes traffic to 7860 by default)
27
  EXPOSE 7860
28
 
29
+ # Start Flask — 2 workers is best for 16GB RAM + AI models; 300s timeout for heavy peaks
30
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "300", "--keep-alive", "5", "--max-requests", "500", "--max-requests-jitter", "50", "app:app"]
model.py CHANGED
@@ -32,10 +32,12 @@ nli_model = pipeline(
32
  # RELEVANCE CHECK
33
  # ==========================================
34
  def is_relevant(claim_emb, text, threshold=0.15):
 
 
35
  emb = embed_model.encode([text], normalize_embeddings=True)
36
  sim = float(np.dot(claim_emb, emb[0]))
37
  print(f"[DEBUG] Checking relevance for: '{text[:50]}...' Score: {sim:.4f}")
38
- return sim >= threshold
39
 
40
  def get_search_query(claim):
41
  stop_words = set(["is", "am", "are", "was", "were", "be", "been", "being",
@@ -72,10 +74,11 @@ def fetch_rss(claim_emb):
72
  print(f"[RSS] Parsed {url}, found {len(feed.entries)} entries")
73
  for entry in feed.entries[:5]:
74
  title = entry.title
75
- # Removed arbitrary 50-char length filter — valid short titles were being dropped
76
- if title and is_relevant(claim_emb, title):
77
- save_evidence(title, "RSS")
78
- count += 1
 
79
  except Exception as e:
80
  print(f"[RSS] Error parsing {url}: {e}")
81
  print(f"[RSS] Saved {count} items.")
@@ -104,10 +107,11 @@ def fetch_gdelt(claim, claim_emb):
104
 
105
  for art in articles:
106
  title = art.get("title", "")
107
- # Removed arbitrary 80-char length filter — valid short titles were being dropped
108
- if title and is_relevant(claim_emb, title):
109
- save_evidence(title, "GDELT")
110
- added += 1
 
111
  except Exception as e:
112
  print("[WARNING] GDELT failed:", e)
113
 
@@ -151,9 +155,11 @@ def fetch_newsapi(claim, claim_emb):
151
  description = art.get("description", "") or ""
152
  content = f"{title}. {description}".strip(". ")
153
 
154
- if content and is_relevant(claim_emb, content, threshold=0.05):
155
- save_evidence(content, f"NewsAPI: {art.get('source', {}).get('name', 'Unknown')}")
156
- added += 1
 
 
157
  except Exception as e:
158
  print("[WARNING] NewsAPI failed:", e)
159
 
@@ -185,8 +191,9 @@ def fetch_wikipedia(claim):
185
  extract = r2.json().get("extract", "")
186
  if len(extract) > 20:
187
  claim_emb_wiki = embed_model.encode([claim], normalize_embeddings=True)
188
- if is_relevant(claim_emb_wiki, extract, threshold=0.05):
189
- save_evidence(extract, f"Wikipedia: {title}")
 
190
  saved += 1
191
  print(f"[Wikipedia] Saved {saved} items.")
192
 
@@ -204,10 +211,10 @@ def fetch_knowledge_base(claim, claim_emb, threshold=0.30):
204
  for entry in KNOWLEDGE_BASE:
205
  text = entry["text"]
206
  source = entry["source"]
207
- emb = embed_model.encode([text], normalize_embeddings=True)
208
- sim = float(np.dot(claim_emb, emb[0]))
209
  if sim >= threshold:
210
- save_evidence(text, source)
211
  saved += 1
212
  print(f"[KnowledgeBase] Saved {saved} matching entries (threshold={threshold}).")
213
  return saved
@@ -243,8 +250,9 @@ def fetch_wikidata(claim, claim_emb, threshold=0.10):
243
  label = item.get("label", "")
244
  if description and label:
245
  text = f"{label}: {description}"
246
- if is_relevant(claim_emb, text, threshold=threshold):
247
- save_evidence(text, "Wikidata")
 
248
  saved += 1
249
  print(f"[Wikidata] Saved {saved} items.")
250
  return saved
@@ -273,9 +281,11 @@ def fetch_duckduckgo(claim, claim_emb):
273
  saved = 0
274
  for res in results:
275
  text = res.get_text()
276
- if len(text) > 30 and is_relevant(claim_emb, text, 0.05):
277
- save_evidence(text, "DuckDuckGo")
278
- saved += 1
 
 
279
  print(f"[DuckDuckGo] Saved {saved} items")
280
  except Exception as e:
281
  print("[WARNING] DuckDuckGo failed:", e)
@@ -284,15 +294,26 @@ def fetch_duckduckgo(claim, claim_emb):
284
  # BUILD FAISS
285
  # ==========================================
286
  def build_faiss():
 
 
287
  rows = load_all_evidence()
288
  if not rows:
289
  return False
290
 
291
- texts = [row[1] for row in rows]
292
- embeddings = embed_model.encode(texts, normalize_embeddings=True)
 
 
 
 
 
 
 
 
293
 
 
294
  index = faiss.IndexFlatIP(embeddings.shape[1])
295
- index.add(np.array(embeddings))
296
 
297
  faiss.write_index(index, FAISS_FILE)
298
  return True
 
32
  # RELEVANCE CHECK
33
  # ==========================================
34
  def is_relevant(claim_emb, text, threshold=0.15):
35
+ """Encodes text and checks similarity against claim.
36
+ Returns (bool_is_relevant, embedding_as_list)."""
37
  emb = embed_model.encode([text], normalize_embeddings=True)
38
  sim = float(np.dot(claim_emb, emb[0]))
39
  print(f"[DEBUG] Checking relevance for: '{text[:50]}...' Score: {sim:.4f}")
40
+ return sim >= threshold, emb[0].tolist()
41
 
42
  def get_search_query(claim):
43
  stop_words = set(["is", "am", "are", "was", "were", "be", "been", "being",
 
74
  print(f"[RSS] Parsed {url}, found {len(feed.entries)} entries")
75
  for entry in feed.entries[:5]:
76
  title = entry.title
77
+ if title:
78
+ relevant, emb = is_relevant(claim_emb, title)
79
+ if relevant:
80
+ save_evidence(title, "RSS", embedding=emb)
81
+ count += 1
82
  except Exception as e:
83
  print(f"[RSS] Error parsing {url}: {e}")
84
  print(f"[RSS] Saved {count} items.")
 
107
 
108
  for art in articles:
109
  title = art.get("title", "")
110
+ if title:
111
+ relevant, emb = is_relevant(claim_emb, title)
112
+ if relevant:
113
+ save_evidence(title, "GDELT", embedding=emb)
114
+ added += 1
115
  except Exception as e:
116
  print("[WARNING] GDELT failed:", e)
117
 
 
155
  description = art.get("description", "") or ""
156
  content = f"{title}. {description}".strip(". ")
157
 
158
+ if content:
159
+ relevant, emb = is_relevant(claim_emb, content, threshold=0.05)
160
+ if relevant:
161
+ save_evidence(content, f"NewsAPI: {art.get('source', {}).get('name', 'Unknown')}", embedding=emb)
162
+ added += 1
163
  except Exception as e:
164
  print("[WARNING] NewsAPI failed:", e)
165
 
 
191
  extract = r2.json().get("extract", "")
192
  if len(extract) > 20:
193
  claim_emb_wiki = embed_model.encode([claim], normalize_embeddings=True)
194
+ relevant, emb = is_relevant(claim_emb_wiki[0], extract, threshold=0.05)
195
+ if relevant:
196
+ save_evidence(extract, f"Wikipedia: {title}", embedding=emb)
197
  saved += 1
198
  print(f"[Wikipedia] Saved {saved} items.")
199
 
 
211
  for entry in KNOWLEDGE_BASE:
212
  text = entry["text"]
213
  source = entry["source"]
214
+ emb_text = embed_model.encode([text], normalize_embeddings=True)
215
+ sim = float(np.dot(claim_emb, emb_text[0]))
216
  if sim >= threshold:
217
+ save_evidence(text, source, embedding=emb_text[0].tolist())
218
  saved += 1
219
  print(f"[KnowledgeBase] Saved {saved} matching entries (threshold={threshold}).")
220
  return saved
 
250
  label = item.get("label", "")
251
  if description and label:
252
  text = f"{label}: {description}"
253
+ relevant, emb = is_relevant(claim_emb, text, threshold=threshold)
254
+ if relevant:
255
+ save_evidence(text, "Wikidata", embedding=emb)
256
  saved += 1
257
  print(f"[Wikidata] Saved {saved} items.")
258
  return saved
 
281
  saved = 0
282
  for res in results:
283
  text = res.get_text()
284
+ if len(text) > 30:
285
+ relevant, emb = is_relevant(claim_emb, text, 0.05)
286
+ if relevant:
287
+ save_evidence(text, "DuckDuckGo", embedding=emb)
288
+ saved += 1
289
  print(f"[DuckDuckGo] Saved {saved} items")
290
  except Exception as e:
291
  print("[WARNING] DuckDuckGo failed:", e)
 
294
  # BUILD FAISS
295
  # ==========================================
296
  def build_faiss():
297
+ """Loads pre-calculated embeddings from Database and builds index.
298
+ No re-encoding performed here — drastically reduces RAM peaks."""
299
  rows = load_all_evidence()
300
  if not rows:
301
  return False
302
 
303
+ # Filter rows that actually have embeddings
304
+ texts = []
305
+ embeddings_list = []
306
+ for row in rows:
307
+ if row[3]: # row[3] is the embedding
308
+ texts.append(row[1])
309
+ embeddings_list.append(row[3])
310
+
311
+ if not embeddings_list:
312
+ return False
313
 
314
+ embeddings = np.array(embeddings_list).astype('float32')
315
  index = faiss.IndexFlatIP(embeddings.shape[1])
316
+ index.add(embeddings)
317
 
318
  faiss.write_index(index, FAISS_FILE)
319
  return True
project/database.py CHANGED
@@ -88,20 +88,21 @@ def init_db():
88
  def clear_db():
89
  get_db().evidence.delete_many({})
90
 
91
- def save_evidence(text, source):
92
  try:
93
  get_db().evidence.insert_one({
94
  "text": text,
95
  "source": source,
 
96
  "created_at": datetime.now(timezone.utc)
97
  })
98
  except Exception as e:
99
  print(f"[DB] save_evidence error: {e}")
100
 
101
  def load_all_evidence():
102
- """Returns list of (id, text, source) — same shape the FAISS pipeline expects."""
103
- docs = list(get_db().evidence.find({}, {"_id": 1, "text": 1, "source": 1}))
104
- return [(str(d["_id"]), d["text"], d["source"]) for d in docs]
105
 
106
  def get_total_evidence_count():
107
  return get_db().evidence.count_documents({})
 
88
  def clear_db():
89
  get_db().evidence.delete_many({})
90
 
91
+ def save_evidence(text, source, embedding=None):
92
  try:
93
  get_db().evidence.insert_one({
94
  "text": text,
95
  "source": source,
96
+ "embedding": embedding, # Store the vector list directly
97
  "created_at": datetime.now(timezone.utc)
98
  })
99
  except Exception as e:
100
  print(f"[DB] save_evidence error: {e}")
101
 
102
  def load_all_evidence():
103
+ """Returns list of (id, text, source, embedding) — same shape the FAISS pipeline expects."""
104
+ docs = list(get_db().evidence.find({}, {"_id": 1, "text": 1, "source": 1, "embedding": 1}))
105
+ return [(str(d["_id"]), d["text"], d["source"], d.get("embedding")) for d in docs]
106
 
107
  def get_total_evidence_count():
108
  return get_db().evidence.count_documents({})