Rajhuggingface4253 commited on
Commit
ef040cf
·
verified ·
1 Parent(s): 641c35a

Update vector.py

Browse files
Files changed (1) hide show
  1. vector.py +49 -1
vector.py CHANGED
@@ -191,7 +191,7 @@ class VectorDatabase:
191
 
192
  # ==================== ENHANCED STORAGE WITH CACHE INVALIDATION ====================
193
 
194
- def store_session_document(self, text: str, filename: str, user_id: str, chat_id: str) -> bool:
195
  """Store extracted file content with enhanced chunking and cache invalidation"""
196
  if not text or len(text) < 10 or not user_id:
197
  logger.warning(f"Invalid input for {filename}")
@@ -233,6 +233,7 @@ class VectorDatabase:
233
  final_meta.append({
234
  "text": chunk["text"],
235
  "source": filename,
 
236
  "type": "file",
237
  "subtype": chunk.get("type", "general"),
238
  "name": chunk.get("name", "unknown"),
@@ -249,6 +250,7 @@ class VectorDatabase:
249
  "text": whole_file_text,
250
  "actual_content": text,
251
  "source": filename,
 
252
  "type": "file",
253
  "subtype": "whole_file",
254
  "is_whole_file": True,
@@ -295,6 +297,52 @@ class VectorDatabase:
295
  self._rollback_partial_storage(user_id, chat_id)
296
  return False
297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  # ==================== UPDATED BM25 SEARCH WITH LAZY LOADING ====================
299
 
300
  def bm25_search(self, query: str, user_id: str, chat_id: str,
 
191
 
192
  # ==================== ENHANCED STORAGE WITH CACHE INVALIDATION ====================
193
 
194
+ def store_session_document(self, text: str, filename: str, user_id: str, chat_id: str, file_id: str = None) -> bool:
195
  """Store extracted file content with enhanced chunking and cache invalidation"""
196
  if not text or len(text) < 10 or not user_id:
197
  logger.warning(f"Invalid input for {filename}")
 
233
  final_meta.append({
234
  "text": chunk["text"],
235
  "source": filename,
236
+ "file_id": file_id,
237
  "type": "file",
238
  "subtype": chunk.get("type", "general"),
239
  "name": chunk.get("name", "unknown"),
 
250
  "text": whole_file_text,
251
  "actual_content": text,
252
  "source": filename,
253
+ "file_id": file_id,
254
  "type": "file",
255
  "subtype": "whole_file",
256
  "is_whole_file": True,
 
297
  self._rollback_partial_storage(user_id, chat_id)
298
  return False
299
 
300
+ def delete_file(self, user_id: str, chat_id: str, file_id: str) -> bool:
301
+ """Surgical Strike: Remove chunks belonging to a specific file ID"""
302
+ with self.memory_lock:
303
+ new_metadata = []
304
+ removed_count = 0
305
+
306
+ # Filter loop: Keep everything that DOESN'T match our file_id
307
+ for meta in self.metadata:
308
+ # Check matches: Must match User + Chat + FileID
309
+ if (meta.get("user_id") == user_id and
310
+ meta.get("chat_id") == chat_id and
311
+ meta.get("file_id") == file_id):
312
+ removed_count += 1
313
+ else:
314
+ new_metadata.append(meta)
315
+
316
+ if removed_count == 0:
317
+ logger.info(f"ℹ️ No vectors found for file_id {file_id}")
318
+ return False
319
+
320
+ logger.info(f"🧹 Surgically removing {removed_count} vectors for file {file_id}...")
321
+
322
+ # Rebuild Index (Standard Faiss Pattern)
323
+ if not new_metadata:
324
+ self.index = faiss.IndexFlatIP(384)
325
+ else:
326
+ surviving_texts = [m["text"] for m in new_metadata]
327
+ try:
328
+ embeddings = self.embedder.encode(surviving_texts, show_progress_bar=False)
329
+ faiss.normalize_L2(embeddings)
330
+
331
+ new_index = faiss.IndexFlatIP(384)
332
+ new_index.add(np.array(embeddings).astype('float32'))
333
+ self.index = new_index
334
+ except Exception as e:
335
+ logger.error(f"❌ Rebuild failed during file deletion: {e}")
336
+ return False
337
+
338
+ self.metadata = new_metadata
339
+ self._save_index()
340
+
341
+ # Invalidate Cache
342
+ self._invalidate_bm25_cache(user_id, chat_id)
343
+
344
+ logger.info(f"✅ Successfully deleted file {file_id}")
345
+ return True
346
  # ==================== UPDATED BM25 SEARCH WITH LAZY LOADING ====================
347
 
348
  def bm25_search(self, query: str, user_id: str, chat_id: str,