"""Query SQLite to find existing collection names.""" import sqlite3 conn = sqlite3.connect('chroma_db/chroma.sqlite3') cursor = conn.cursor() print("=" * 80) print("šŸ“Š ChromaDB Collection Names Audit") print("=" * 80) # Check segments table schema print("\nšŸ“‹ Segments Table Schema:") try: cursor.execute('PRAGMA table_info(segments)') cols = cursor.fetchall() for col in cols[:5]: # First 5 columns print(f" {col[1]} ({col[2]})") except Exception as e: print(f" Error: {e}") # Check segment_metadata print("\nšŸ“– Segment Metadata (sample):") try: cursor.execute(""" SELECT DISTINCT s.id, s.collection_id, sm.metadata FROM segments s LEFT JOIN segment_metadata sm ON s.id = sm.id LIMIT 4 """) results = cursor.fetchall() for i, (seg_id, coll_id, metadata) in enumerate(results, 1): print(f"\n {i}. Segment ID: {seg_id[:20]}...") if coll_id: print(f" Collection: {coll_id}") if metadata: print(f" Metadata: {metadata[:80]}...") except Exception as e: print(f" Error: {e}") # Check collections table print("\nšŸ“š Collections Table:") try: cursor.execute('SELECT id, name, config_json_str FROM collections') results = cursor.fetchall() for i, (coll_id, name, config) in enumerate(results, 1): print(f"\n {i}. Name: {name}") print(f" ID: {coll_id}") if config: print(f" Config: {config[:100]}...") except Exception as e: print(f" Error: {e}") # Try to find real names by checking what was previously stored print("\nšŸ” Searching for original collection names...") try: cursor.execute(""" SELECT DISTINCT coll_id, COUNT(*) as doc_count FROM embeddings GROUP BY coll_id """) results = cursor.fetchall() print(f"\n Collections with embeddings:") for coll_id, count in results: print(f" {coll_id}: {count} embeddings") except Exception as e: print(f" Could not query embeddings: {e}") conn.close() print("\n" + "=" * 80)