CapStoneRAG10 / recover_collections.py
Developer
Initial commit for HuggingFace Spaces - RAG Capstone Project with Qdrant Cloud
1d10b0a
"""Recovery script to restore ChromaDB collections after sqlite3 deletion."""
import chromadb
from chromadb.config import Settings
import os
import json
from pathlib import Path
def recover_collections(chroma_db_path: str = "./chroma_db"):
"""Recover collections by scanning UUID folders and re-registering them.
This script helps when chroma.sqlite3 is deleted and collections don't appear.
It scans the directory for collection folders and attempts to recover them.
"""
print("πŸ”§ ChromaDB Collection Recovery Tool")
print("=" * 50)
# Initialize client
client = chromadb.PersistentClient(
path=chroma_db_path,
settings=Settings(anonymized_telemetry=False, allow_reset=True)
)
# Scan for collection UUID folders
print(f"\nπŸ“ Scanning {chroma_db_path} for collections...")
uuid_pattern = r"^[a-f0-9\-]{36}$"
collection_folders = []
if os.path.exists(chroma_db_path):
for item in os.listdir(chroma_db_path):
item_path = os.path.join(chroma_db_path, item)
if os.path.isdir(item_path):
# Check if it matches UUID pattern
import re
if re.match(uuid_pattern, item):
collection_folders.append(item)
print(f"βœ… Found {len(collection_folders)} collection folders")
if not collection_folders:
print("❌ No collection folders found!")
return
# Try to recover each collection
recovered = 0
failed = []
for uuid_folder in collection_folders:
try:
print(f"\nπŸ”„ Attempting to recover: {uuid_folder}")
folder_path = os.path.join(chroma_db_path, uuid_folder)
# Check if folder has collection data files
has_data = os.path.exists(os.path.join(folder_path, "data_level0.bin"))
has_index = os.path.exists(os.path.join(folder_path, "header.bin"))
if has_data and has_index:
print(f" βœ… Data files found")
print(f" ⚠️ Note: Manual recovery requires ChromaDB API access")
print(f" πŸ’‘ Suggestion: Try deleting entire chroma_db and re-importing")
else:
print(f" ❌ Data files missing or corrupted")
failed.append(uuid_folder)
except Exception as e:
print(f" ❌ Error: {str(e)}")
failed.append(uuid_folder)
print("\n" + "=" * 50)
print(f"Summary: {recovered} recovered, {len(failed)} failed")
if failed:
print(f"\n❌ Failed collections:")
for uuid_folder in failed:
print(f" - {uuid_folder}")
print("\nπŸ’‘ RECOMMENDED SOLUTION:")
print(" The collection folders exist but ChromaDB's index is lost.")
print(" Best approach:")
print(" 1. Backup your chroma_db directory")
print(" 2. Try the 'backup and restore' approach in docs/CHROMADB_RECOVERY.md")
print(" 3. OR: Re-create collections using the UI")
if __name__ == "__main__":
recover_collections("./chroma_db")