Spaces:
Sleeping
Sleeping
File size: 2,092 Bytes
1d10b0a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | """Query SQLite to find existing collection names."""
import sqlite3
conn = sqlite3.connect('chroma_db/chroma.sqlite3')
cursor = conn.cursor()
print("=" * 80)
print("๐ ChromaDB Collection Names Audit")
print("=" * 80)
# Check segments table schema
print("\n๐ Segments Table Schema:")
try:
cursor.execute('PRAGMA table_info(segments)')
cols = cursor.fetchall()
for col in cols[:5]: # First 5 columns
print(f" {col[1]} ({col[2]})")
except Exception as e:
print(f" Error: {e}")
# Check segment_metadata
print("\n๐ Segment Metadata (sample):")
try:
cursor.execute("""
SELECT DISTINCT s.id, s.collection_id, sm.metadata
FROM segments s
LEFT JOIN segment_metadata sm ON s.id = sm.id
LIMIT 4
""")
results = cursor.fetchall()
for i, (seg_id, coll_id, metadata) in enumerate(results, 1):
print(f"\n {i}. Segment ID: {seg_id[:20]}...")
if coll_id:
print(f" Collection: {coll_id}")
if metadata:
print(f" Metadata: {metadata[:80]}...")
except Exception as e:
print(f" Error: {e}")
# Check collections table
print("\n๐ Collections Table:")
try:
cursor.execute('SELECT id, name, config_json_str FROM collections')
results = cursor.fetchall()
for i, (coll_id, name, config) in enumerate(results, 1):
print(f"\n {i}. Name: {name}")
print(f" ID: {coll_id}")
if config:
print(f" Config: {config[:100]}...")
except Exception as e:
print(f" Error: {e}")
# Try to find real names by checking what was previously stored
print("\n๐ Searching for original collection names...")
try:
cursor.execute("""
SELECT DISTINCT coll_id, COUNT(*) as doc_count
FROM embeddings
GROUP BY coll_id
""")
results = cursor.fetchall()
print(f"\n Collections with embeddings:")
for coll_id, count in results:
print(f" {coll_id}: {count} embeddings")
except Exception as e:
print(f" Could not query embeddings: {e}")
conn.close()
print("\n" + "=" * 80)
|