Spaces:
Sleeping
Sleeping
| """Recovery script to restore ChromaDB collections after sqlite3 deletion.""" | |
| import chromadb | |
| from chromadb.config import Settings | |
| import os | |
| import json | |
| from pathlib import Path | |
| def recover_collections(chroma_db_path: str = "./chroma_db"): | |
| """Recover collections by scanning UUID folders and re-registering them. | |
| This script helps when chroma.sqlite3 is deleted and collections don't appear. | |
| It scans the directory for collection folders and attempts to recover them. | |
| """ | |
| print("π§ ChromaDB Collection Recovery Tool") | |
| print("=" * 50) | |
| # Initialize client | |
| client = chromadb.PersistentClient( | |
| path=chroma_db_path, | |
| settings=Settings(anonymized_telemetry=False, allow_reset=True) | |
| ) | |
| # Scan for collection UUID folders | |
| print(f"\nπ Scanning {chroma_db_path} for collections...") | |
| uuid_pattern = r"^[a-f0-9\-]{36}$" | |
| collection_folders = [] | |
| if os.path.exists(chroma_db_path): | |
| for item in os.listdir(chroma_db_path): | |
| item_path = os.path.join(chroma_db_path, item) | |
| if os.path.isdir(item_path): | |
| # Check if it matches UUID pattern | |
| import re | |
| if re.match(uuid_pattern, item): | |
| collection_folders.append(item) | |
| print(f"β Found {len(collection_folders)} collection folders") | |
| if not collection_folders: | |
| print("β No collection folders found!") | |
| return | |
| # Try to recover each collection | |
| recovered = 0 | |
| failed = [] | |
| for uuid_folder in collection_folders: | |
| try: | |
| print(f"\nπ Attempting to recover: {uuid_folder}") | |
| folder_path = os.path.join(chroma_db_path, uuid_folder) | |
| # Check if folder has collection data files | |
| has_data = os.path.exists(os.path.join(folder_path, "data_level0.bin")) | |
| has_index = os.path.exists(os.path.join(folder_path, "header.bin")) | |
| if has_data and has_index: | |
| print(f" β Data files found") | |
| print(f" β οΈ Note: Manual recovery requires ChromaDB API access") | |
| print(f" π‘ Suggestion: Try deleting entire chroma_db and re-importing") | |
| else: | |
| print(f" β Data files missing or corrupted") | |
| failed.append(uuid_folder) | |
| except Exception as e: | |
| print(f" β Error: {str(e)}") | |
| failed.append(uuid_folder) | |
| print("\n" + "=" * 50) | |
| print(f"Summary: {recovered} recovered, {len(failed)} failed") | |
| if failed: | |
| print(f"\nβ Failed collections:") | |
| for uuid_folder in failed: | |
| print(f" - {uuid_folder}") | |
| print("\nπ‘ RECOMMENDED SOLUTION:") | |
| print(" The collection folders exist but ChromaDB's index is lost.") | |
| print(" Best approach:") | |
| print(" 1. Backup your chroma_db directory") | |
| print(" 2. Try the 'backup and restore' approach in docs/CHROMADB_RECOVERY.md") | |
| print(" 3. OR: Re-create collections using the UI") | |
| if __name__ == "__main__": | |
| recover_collections("./chroma_db") | |