File size: 1,599 Bytes
d147321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import sqlite3
import shutil
from pathlib import Path
import sys

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.append(str(project_root))

from config.settings import Settings


def cleanup_old_embeddings():
    """Clean up old embedding directories"""
    chroma_path = Settings.get_chroma_path()
    
    # List all UUID directories
    collection_dirs = list(chroma_path.glob("*-*-*-*-*"))
    
    print("\nFound collection directories:")
    for dir_path in collection_dirs:
        print(f"- {dir_path.name}")
    
    # Get current collection info from database
    sqlite_file = chroma_path / "chroma.sqlite3"
    conn = sqlite3.connect(sqlite_file)
    cursor = conn.cursor()
    
    try:
        # Get all active collection IDs
        cursor.execute("SELECT id FROM collections")
        active_ids = {row[0] for row in cursor.fetchall()}
        print("\nActive collection IDs:")
        for id in active_ids:
            print(f"- {id}")
        
        # Find directories that don't match any active collection
        for dir_path in collection_dirs:
            if dir_path.name not in active_ids:
                print(f"\nFound unused collection directory: {dir_path.name}")
                response = input("Delete this directory? (y/N): ")
                if response.lower() == 'y':
                    shutil.rmtree(dir_path)
                    print(f"Deleted: {dir_path}")
                else:
                    print("Skipped deletion")
    
    finally:
        conn.close()


if __name__ == "__main__":
    cleanup_old_embeddings()