File size: 1,599 Bytes
d147321 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | import sqlite3
import shutil
from pathlib import Path
import sys
# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.append(str(project_root))
from config.settings import Settings
def cleanup_old_embeddings():
"""Clean up old embedding directories"""
chroma_path = Settings.get_chroma_path()
# List all UUID directories
collection_dirs = list(chroma_path.glob("*-*-*-*-*"))
print("\nFound collection directories:")
for dir_path in collection_dirs:
print(f"- {dir_path.name}")
# Get current collection info from database
sqlite_file = chroma_path / "chroma.sqlite3"
conn = sqlite3.connect(sqlite_file)
cursor = conn.cursor()
try:
# Get all active collection IDs
cursor.execute("SELECT id FROM collections")
active_ids = {row[0] for row in cursor.fetchall()}
print("\nActive collection IDs:")
for id in active_ids:
print(f"- {id}")
# Find directories that don't match any active collection
for dir_path in collection_dirs:
if dir_path.name not in active_ids:
print(f"\nFound unused collection directory: {dir_path.name}")
response = input("Delete this directory? (y/N): ")
if response.lower() == 'y':
shutil.rmtree(dir_path)
print(f"Deleted: {dir_path}")
else:
print("Skipped deletion")
finally:
conn.close()
if __name__ == "__main__":
cleanup_old_embeddings() |