"""Reset ChromaDB SQLite index while preserving all collection data.""" import chromadb from chromadb.config import Settings import os import sys def reset_sqlite_index(): """Reset SQLite index while preserving collection data. This script: 1. Detects existing collection UUID folders 2. Forces ChromaDB to rebuild the sqlite3 index 3. Verifies collections are properly indexed 4. Reports status """ print("\n" + "=" * 70) print("šŸ”§ ChromaDB SQLite Index Reset Tool") print("=" * 70) chroma_path = "./chroma_db" # Step 1: Verify collection folders exist print("\nšŸ“ Step 1: Scanning for collection folders...") print("-" * 70) if not os.path.exists(chroma_path): print(f"āŒ ERROR: {chroma_path} directory not found!") return False # Find all UUID folders uuid_folders = [] try: for item in os.listdir(chroma_path): item_path = os.path.join(chroma_path, item) # Check if it's a directory and matches UUID pattern (36 chars, 4 hyphens) if os.path.isdir(item_path) and len(item) == 36 and item.count('-') == 4: uuid_folders.append(item) except Exception as e: print(f"āŒ Error scanning directory: {e}") return False print(f"āœ… Found {len(uuid_folders)} collection folder(s)") if len(uuid_folders) > 0: for i, folder in enumerate(uuid_folders, 1): folder_path = os.path.join(chroma_path, folder) # Check what files are in the collection files = os.listdir(folder_path) print(f" {i}. {folder}") print(f" Files: {', '.join(files)}") else: print("āš ļø WARNING: No collection folders found!") # Step 2: Check SQLite status print("\nšŸ“Š Step 2: Checking SQLite status...") print("-" * 70) sqlite_path = os.path.join(chroma_path, "chroma.sqlite3") if os.path.exists(sqlite_path): sqlite_size = os.path.getsize(sqlite_path) print(f"āœ… chroma.sqlite3 exists (size: {sqlite_size:,} bytes)") if sqlite_size < 100000: # Less than 100KB is likely empty print("āš ļø SQLite file is very small (likely empty/corrupted)") else: print("āœ… chroma.sqlite3 does not exist (will be created)") # Step 3: Reset by creating new client print("\nšŸ”„ Step 3: Rebuilding SQLite index...") print("-" * 70) try: print("Creating fresh ChromaDB PersistentClient...") client = chromadb.PersistentClient( path=chroma_path, settings=Settings( anonymized_telemetry=False, allow_reset=True ) ) print("āœ… Client created successfully") except Exception as e: print(f"āŒ ERROR creating client: {e}") return False # Step 4: Verify collections are indexed print("\nšŸ” Step 4: Verifying collection index...") print("-" * 70) try: collections = client.list_collections() print(f"āœ… ChromaDB found {len(collections)} collection(s)") if len(collections) > 0: print("\nIndexed Collections:") for i, collection in enumerate(collections, 1): doc_count = collection.count() metadata = collection.metadata print(f"\n {i}. {collection.name}") print(f" Document count: {doc_count}") print(f" Metadata: {metadata}") print("\nāœ… SUCCESS! All collections are properly indexed!") return True elif len(uuid_folders) > 0: # Collections folders exist but not indexed print("āš ļø WARNING: Collection folders exist but not indexed in SQLite") print("\nThis can happen if:") print(" - SQLite file was deleted and recreated without scanning folders") print(" - Collection data is corrupted") print(" - Permission issues prevent reading collection folders") print("\nšŸ“‹ Troubleshooting steps:") print(" 1. Check file permissions in chroma_db directory") print(" 2. Try deleting chroma.sqlite3 and restarting application") print(" 3. See docs/CHROMADB_RECOVERY.md for more options") return False else: print("āœ… No collections currently indexed (database is clean)") return True except Exception as e: print(f"āŒ ERROR reading collections: {e}") return False def main(): """Main entry point.""" try: success = reset_sqlite_index() print("\n" + "=" * 70) if success: print("āœ… RESET COMPLETE - Collections are properly indexed!") print("\nšŸ“ Next steps:") print(" 1. Start Streamlit: streamlit run streamlit_app.py") print(" 2. Check 'Existing Collections' dropdown") print(" 3. Load a collection and verify it works") exit_code = 0 else: print("āŒ RESET INCOMPLETE - See messages above for details") print("\nšŸ“ Next steps:") print(" 1. Review error messages above") print(" 2. Check docs/CHROMADB_RECOVERY.md for solutions") print(" 3. Contact support if issues persist") exit_code = 1 print("=" * 70 + "\n") sys.exit(exit_code) except KeyboardInterrupt: print("\n\nāš ļø Script interrupted by user") sys.exit(1) except Exception as e: print(f"\nāŒ FATAL ERROR: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()