File size: 2,092 Bytes
1d10b0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""Query SQLite to find existing collection names."""
import sqlite3

conn = sqlite3.connect('chroma_db/chroma.sqlite3')
cursor = conn.cursor()

print("=" * 80)
print("๐Ÿ“Š ChromaDB Collection Names Audit")
print("=" * 80)

# Check segments table schema
print("\n๐Ÿ“‹ Segments Table Schema:")
try:
    cursor.execute('PRAGMA table_info(segments)')
    cols = cursor.fetchall()
    for col in cols[:5]:  # First 5 columns
        print(f"  {col[1]} ({col[2]})")
except Exception as e:
    print(f"  Error: {e}")

# Check segment_metadata
print("\n๐Ÿ“– Segment Metadata (sample):")
try:
    cursor.execute("""
        SELECT DISTINCT s.id, s.collection_id, sm.metadata 
        FROM segments s 
        LEFT JOIN segment_metadata sm ON s.id = sm.id 
        LIMIT 4
    """)
    results = cursor.fetchall()
    for i, (seg_id, coll_id, metadata) in enumerate(results, 1):
        print(f"\n  {i}. Segment ID: {seg_id[:20]}...")
        if coll_id:
            print(f"     Collection: {coll_id}")
        if metadata:
            print(f"     Metadata: {metadata[:80]}...")
except Exception as e:
    print(f"  Error: {e}")

# Check collections table
print("\n๐Ÿ“š Collections Table:")
try:
    cursor.execute('SELECT id, name, config_json_str FROM collections')
    results = cursor.fetchall()
    for i, (coll_id, name, config) in enumerate(results, 1):
        print(f"\n  {i}. Name: {name}")
        print(f"     ID: {coll_id}")
        if config:
            print(f"     Config: {config[:100]}...")
except Exception as e:
    print(f"  Error: {e}")

# Try to find real names by checking what was previously stored
print("\n๐Ÿ” Searching for original collection names...")
try:
    cursor.execute("""
        SELECT DISTINCT coll_id, COUNT(*) as doc_count
        FROM embeddings
        GROUP BY coll_id
    """)
    results = cursor.fetchall()
    print(f"\n  Collections with embeddings:")
    for coll_id, count in results:
        print(f"    {coll_id}: {count} embeddings")
except Exception as e:
    print(f"  Could not query embeddings: {e}")

conn.close()
print("\n" + "=" * 80)