NavyDevilDoc commited on
Commit
f6e4ae6
·
verified ·
1 Parent(s): 83d8092

Update src/rag_engine.py

Browse files
Files changed (1) hide show
  1. src/rag_engine.py +57 -1
src/rag_engine.py CHANGED
@@ -312,4 +312,60 @@ def reset_knowledge_base(username: str) -> Tuple[bool, str]:
312
  # Pinecone delete_all is index-wide usually.
313
  # For safety in namespace-based multi-tenancy, we usually skip this
314
  # or implement a delete_all(delete_all=True, namespace=username)
315
- return False, "Resetting entire DB via API is disabled for safety. Use Delete."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  # Pinecone delete_all is index-wide usually.
313
  # For safety in namespace-based multi-tenancy, we usually skip this
314
  # or implement a delete_all(delete_all=True, namespace=username)
315
+ return False, "Resetting entire DB via API is disabled for safety. Use Delete."
316
+
317
+ def rebuild_cache_from_pinecone(username: str, index_name: str) -> Tuple[bool, str]:
318
+ """
319
+ Downloads text from Pinecone and reconstructs local source files.
320
+ Crucial for Quiz Mode after a container restart.
321
+ """
322
+ if not PINECONE_KEY or not index_name:
323
+ return False, "Pinecone config missing."
324
+
325
+ try:
326
+ pm = PineconeManager(PINECONE_KEY)
327
+
328
+ # 1. Get all Vector IDs for this user
329
+ ids = pm.get_all_ids(index_name, username)
330
+ if not ids:
331
+ return False, "No data found in Pinecone for this user."
332
+
333
+ # 2. Fetch content (Batching by 100 for safety)
334
+ # Pinecone fetch limit is often 1000, but we play safe.
335
+ batch_size = 100
336
+ reconstructed_files = {} # { "filename.txt": ["chunk1", "chunk2"] }
337
+
338
+ for i in range(0, len(ids), batch_size):
339
+ batch_ids = ids[i : i + batch_size]
340
+ response = pm.fetch_vectors(index_name, batch_ids, username)
341
+
342
+ for vec_id, vec_data in response.get('vectors', {}).items():
343
+ meta = vec_data.get('metadata', {})
344
+ source = meta.get('source', 'unknown_restored.txt')
345
+ text = meta.get('text', '') or vec_data.get('metadata', {}).get('page_content', '')
346
+
347
+ if source not in reconstructed_files:
348
+ reconstructed_files[source] = []
349
+ reconstructed_files[source].append(text)
350
+
351
+ # 3. Write to Disk
352
+ user_dir = os.path.join(UPLOAD_DIR, username)
353
+ os.makedirs(user_dir, exist_ok=True)
354
+
355
+ count = 0
356
+ for filename, chunks in reconstructed_files.items():
357
+ # Join chunks. Since we don't track order perfectly in UUIDs,
358
+ # we just join them. For the Quizzer's sliding window, this is usually fine.
359
+ # (If you used the readable ID update from previous turn, they might sort better).
360
+ full_text = "\n\n".join(chunks)
361
+
362
+ file_path = os.path.join(user_dir, filename)
363
+ with open(file_path, "w", encoding="utf-8") as f:
364
+ f.write(full_text)
365
+ count += 1
366
+
367
+ return True, f"Restored {count} files from Pinecone!"
368
+
369
+ except Exception as e:
370
+ logger.error(f"Cache rebuild failed: {e}")
371
+ return False, str(e)