Spaces:
Sleeping
Sleeping
| """Query SQLite to find existing collection names.""" | |
| import sqlite3 | |
| conn = sqlite3.connect('chroma_db/chroma.sqlite3') | |
| cursor = conn.cursor() | |
| print("=" * 80) | |
| print("π ChromaDB Collection Names Audit") | |
| print("=" * 80) | |
| # Check segments table schema | |
| print("\nπ Segments Table Schema:") | |
| try: | |
| cursor.execute('PRAGMA table_info(segments)') | |
| cols = cursor.fetchall() | |
| for col in cols[:5]: # First 5 columns | |
| print(f" {col[1]} ({col[2]})") | |
| except Exception as e: | |
| print(f" Error: {e}") | |
| # Check segment_metadata | |
| print("\nπ Segment Metadata (sample):") | |
| try: | |
| cursor.execute(""" | |
| SELECT DISTINCT s.id, s.collection_id, sm.metadata | |
| FROM segments s | |
| LEFT JOIN segment_metadata sm ON s.id = sm.id | |
| LIMIT 4 | |
| """) | |
| results = cursor.fetchall() | |
| for i, (seg_id, coll_id, metadata) in enumerate(results, 1): | |
| print(f"\n {i}. Segment ID: {seg_id[:20]}...") | |
| if coll_id: | |
| print(f" Collection: {coll_id}") | |
| if metadata: | |
| print(f" Metadata: {metadata[:80]}...") | |
| except Exception as e: | |
| print(f" Error: {e}") | |
| # Check collections table | |
| print("\nπ Collections Table:") | |
| try: | |
| cursor.execute('SELECT id, name, config_json_str FROM collections') | |
| results = cursor.fetchall() | |
| for i, (coll_id, name, config) in enumerate(results, 1): | |
| print(f"\n {i}. Name: {name}") | |
| print(f" ID: {coll_id}") | |
| if config: | |
| print(f" Config: {config[:100]}...") | |
| except Exception as e: | |
| print(f" Error: {e}") | |
| # Try to find real names by checking what was previously stored | |
| print("\nπ Searching for original collection names...") | |
| try: | |
| cursor.execute(""" | |
| SELECT DISTINCT coll_id, COUNT(*) as doc_count | |
| FROM embeddings | |
| GROUP BY coll_id | |
| """) | |
| results = cursor.fetchall() | |
| print(f"\n Collections with embeddings:") | |
| for coll_id, count in results: | |
| print(f" {coll_id}: {count} embeddings") | |
| except Exception as e: | |
| print(f" Could not query embeddings: {e}") | |
| conn.close() | |
| print("\n" + "=" * 80) | |