Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 22, 2025

Commit

f6e4ae6

verified ·

1 Parent(s): 83d8092

Update src/rag_engine.py

Browse files

Files changed (1) hide show

src/rag_engine.py +57 -1

src/rag_engine.py CHANGED Viewed

@@ -312,4 +312,60 @@ def reset_knowledge_base(username: str) -> Tuple[bool, str]:
     # Pinecone delete_all is index-wide usually.
     # For safety in namespace-based multi-tenancy, we usually skip this
     # or implement a delete_all(delete_all=True, namespace=username)
-    return False, "Resetting entire DB via API is disabled for safety. Use Delete."

     # Pinecone delete_all is index-wide usually.
     # For safety in namespace-based multi-tenancy, we usually skip this
     # or implement a delete_all(delete_all=True, namespace=username)
+    return False, "Resetting entire DB via API is disabled for safety. Use Delete."
+def rebuild_cache_from_pinecone(username: str, index_name: str) -> Tuple[bool, str]:
+    """
+    Downloads text from Pinecone and reconstructs local source files.
+    Crucial for Quiz Mode after a container restart.
+    """
+    if not PINECONE_KEY or not index_name:
+        return False, "Pinecone config missing."
+    try:
+        pm = PineconeManager(PINECONE_KEY)
+        # 1. Get all Vector IDs for this user
+        ids = pm.get_all_ids(index_name, username)
+        if not ids:
+            return False, "No data found in Pinecone for this user."
+        # 2. Fetch content (Batching by 100 for safety)
+        # Pinecone fetch limit is often 1000, but we play safe.
+        batch_size = 100
+        reconstructed_files = {} # { "filename.txt": ["chunk1", "chunk2"] }
+        for i in range(0, len(ids), batch_size):
+            batch_ids = ids[i : i + batch_size]
+            response = pm.fetch_vectors(index_name, batch_ids, username)
+            for vec_id, vec_data in response.get('vectors', {}).items():
+                meta = vec_data.get('metadata', {})
+                source = meta.get('source', 'unknown_restored.txt')
+                text = meta.get('text', '') or vec_data.get('metadata', {}).get('page_content', '')
+                if source not in reconstructed_files:
+                    reconstructed_files[source] = []
+                reconstructed_files[source].append(text)
+        # 3. Write to Disk
+        user_dir = os.path.join(UPLOAD_DIR, username)
+        os.makedirs(user_dir, exist_ok=True)
+        count = 0
+        for filename, chunks in reconstructed_files.items():
+            # Join chunks. Since we don't track order perfectly in UUIDs,
+            # we just join them. For the Quizzer's sliding window, this is usually fine.
+            # (If you used the readable ID update from previous turn, they might sort better).
+            full_text = "\n\n".join(chunks)
+            file_path = os.path.join(user_dir, filename)
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(full_text)
+            count += 1
+        return True, f"Restored {count} files from Pinecone!"
+    except Exception as e:
+        logger.error(f"Cache rebuild failed: {e}")
+        return False, str(e)