CapStoneRAG10 / archived_scripts /cleanup_chroma.py
Developer
Initial commit for HuggingFace Spaces - RAG Capstone Project with Qdrant Cloud
1d10b0a
#!/usr/bin/env python3
"""Script to clean up ChromaDB collections and cache."""
import shutil
import os
from pathlib import Path
def cleanup_chroma_db():
"""Clean up ChromaDB collections and cache."""
print("=" * 60)
print("ChromaDB Cleanup Utility")
print("=" * 60)
# First, forcefully delete the chroma_db directory
chroma_path = Path("./chroma_db")
if chroma_path.exists():
print(f"\nπŸ—‘οΈ Removing chroma_db directory: {chroma_path}")
try:
shutil.rmtree(chroma_path)
print(f"βœ… Deleted directory: {chroma_path}")
except Exception as e:
print(f"❌ Error deleting directory: {e}")
else:
print(f"\nβœ… chroma_db directory not found: {chroma_path}")
# Also check for ChromaDB in .chroma directory (alternative location)
chroma_alt_path = Path("./.chroma")
if chroma_alt_path.exists():
print(f"\nπŸ—‘οΈ Removing .chroma directory: {chroma_alt_path}")
try:
shutil.rmtree(chroma_alt_path)
print(f"βœ… Deleted directory: {chroma_alt_path}")
except Exception as e:
print(f"❌ Error deleting directory: {e}")
# Clear HuggingFace dataset cache (optional)
response = input("\nπŸ—‘οΈ Clear HuggingFace dataset cache? (y/n): ").lower()
if response == 'y':
cache_path = Path.home() / ".cache" / "huggingface" / "datasets"
if cache_path.exists():
print(f"πŸ—‘οΈ Removing HF cache: {cache_path}")
try:
shutil.rmtree(cache_path)
print(f"βœ… Deleted HF cache: {cache_path}")
except Exception as e:
print(f"❌ Error deleting HF cache: {e}")
else:
print("ℹ️ HuggingFace cache not found")
# Clear ChromaDB chroma cache directory
response = input("\nπŸ—‘οΈ Clear ChromaDB chroma cache? (y/n): ").lower()
if response == 'y':
chroma_cache = Path.home() / ".chroma"
if chroma_cache.exists():
print(f"πŸ—‘οΈ Removing ChromaDB cache: {chroma_cache}")
try:
shutil.rmtree(chroma_cache)
print(f"βœ… Deleted ChromaDB cache: {chroma_cache}")
except Exception as e:
print(f"❌ Error deleting ChromaDB cache: {e}")
# Try to use ChromaDBManager if possible
print("\nπŸ“‹ Attempting to connect to ChromaDB...")
try:
from vector_store import ChromaDBManager
manager = ChromaDBManager(persist_directory="./chroma_db")
# List existing collections
collections = manager.list_collections()
print(f"πŸ“Š Found {len(collections)} collection(s):")
for col in collections:
print(f" - {col}")
# Clear all collections
if collections:
print("\nπŸ—‘οΈ Clearing all collections...")
deleted = manager.clear_all_collections()
print(f"βœ… Deleted {deleted} collection(s)")
else:
print("\nβœ… No collections to delete")
except Exception as e:
print(f"⚠️ Could not connect to ChromaDB via manager: {e}")
print("ℹ️ This is OK - the directory has been deleted already.")
print("\n" + "=" * 60)
print("βœ… Cleanup completed! You can now start fresh.")
print("=" * 60)
if __name__ == "__main__":
cleanup_chroma_db()