Spaces:
Running
Running
| """Reset ChromaDB SQLite index while preserving all collection data.""" | |
| import chromadb | |
| from chromadb.config import Settings | |
| import os | |
| import sys | |
| def reset_sqlite_index(): | |
| """Reset SQLite index while preserving collection data. | |
| This script: | |
| 1. Detects existing collection UUID folders | |
| 2. Forces ChromaDB to rebuild the sqlite3 index | |
| 3. Verifies collections are properly indexed | |
| 4. Reports status | |
| """ | |
| print("\n" + "=" * 70) | |
| print("π§ ChromaDB SQLite Index Reset Tool") | |
| print("=" * 70) | |
| chroma_path = "./chroma_db" | |
| # Step 1: Verify collection folders exist | |
| print("\nπ Step 1: Scanning for collection folders...") | |
| print("-" * 70) | |
| if not os.path.exists(chroma_path): | |
| print(f"β ERROR: {chroma_path} directory not found!") | |
| return False | |
| # Find all UUID folders | |
| uuid_folders = [] | |
| try: | |
| for item in os.listdir(chroma_path): | |
| item_path = os.path.join(chroma_path, item) | |
| # Check if it's a directory and matches UUID pattern (36 chars, 4 hyphens) | |
| if os.path.isdir(item_path) and len(item) == 36 and item.count('-') == 4: | |
| uuid_folders.append(item) | |
| except Exception as e: | |
| print(f"β Error scanning directory: {e}") | |
| return False | |
| print(f"β Found {len(uuid_folders)} collection folder(s)") | |
| if len(uuid_folders) > 0: | |
| for i, folder in enumerate(uuid_folders, 1): | |
| folder_path = os.path.join(chroma_path, folder) | |
| # Check what files are in the collection | |
| files = os.listdir(folder_path) | |
| print(f" {i}. {folder}") | |
| print(f" Files: {', '.join(files)}") | |
| else: | |
| print("β οΈ WARNING: No collection folders found!") | |
| # Step 2: Check SQLite status | |
| print("\nπ Step 2: Checking SQLite status...") | |
| print("-" * 70) | |
| sqlite_path = os.path.join(chroma_path, "chroma.sqlite3") | |
| if os.path.exists(sqlite_path): | |
| sqlite_size = os.path.getsize(sqlite_path) | |
| print(f"β chroma.sqlite3 exists (size: {sqlite_size:,} bytes)") | |
| if sqlite_size < 100000: # Less than 100KB is likely empty | |
| print("β οΈ SQLite file is very small (likely empty/corrupted)") | |
| else: | |
| print("β chroma.sqlite3 does not exist (will be created)") | |
| # Step 3: Reset by creating new client | |
| print("\nπ Step 3: Rebuilding SQLite index...") | |
| print("-" * 70) | |
| try: | |
| print("Creating fresh ChromaDB PersistentClient...") | |
| client = chromadb.PersistentClient( | |
| path=chroma_path, | |
| settings=Settings( | |
| anonymized_telemetry=False, | |
| allow_reset=True | |
| ) | |
| ) | |
| print("β Client created successfully") | |
| except Exception as e: | |
| print(f"β ERROR creating client: {e}") | |
| return False | |
| # Step 4: Verify collections are indexed | |
| print("\nπ Step 4: Verifying collection index...") | |
| print("-" * 70) | |
| try: | |
| collections = client.list_collections() | |
| print(f"β ChromaDB found {len(collections)} collection(s)") | |
| if len(collections) > 0: | |
| print("\nIndexed Collections:") | |
| for i, collection in enumerate(collections, 1): | |
| doc_count = collection.count() | |
| metadata = collection.metadata | |
| print(f"\n {i}. {collection.name}") | |
| print(f" Document count: {doc_count}") | |
| print(f" Metadata: {metadata}") | |
| print("\nβ SUCCESS! All collections are properly indexed!") | |
| return True | |
| elif len(uuid_folders) > 0: | |
| # Collections folders exist but not indexed | |
| print("β οΈ WARNING: Collection folders exist but not indexed in SQLite") | |
| print("\nThis can happen if:") | |
| print(" - SQLite file was deleted and recreated without scanning folders") | |
| print(" - Collection data is corrupted") | |
| print(" - Permission issues prevent reading collection folders") | |
| print("\nπ Troubleshooting steps:") | |
| print(" 1. Check file permissions in chroma_db directory") | |
| print(" 2. Try deleting chroma.sqlite3 and restarting application") | |
| print(" 3. See docs/CHROMADB_RECOVERY.md for more options") | |
| return False | |
| else: | |
| print("β No collections currently indexed (database is clean)") | |
| return True | |
| except Exception as e: | |
| print(f"β ERROR reading collections: {e}") | |
| return False | |
| def main(): | |
| """Main entry point.""" | |
| try: | |
| success = reset_sqlite_index() | |
| print("\n" + "=" * 70) | |
| if success: | |
| print("β RESET COMPLETE - Collections are properly indexed!") | |
| print("\nπ Next steps:") | |
| print(" 1. Start Streamlit: streamlit run streamlit_app.py") | |
| print(" 2. Check 'Existing Collections' dropdown") | |
| print(" 3. Load a collection and verify it works") | |
| exit_code = 0 | |
| else: | |
| print("β RESET INCOMPLETE - See messages above for details") | |
| print("\nπ Next steps:") | |
| print(" 1. Review error messages above") | |
| print(" 2. Check docs/CHROMADB_RECOVERY.md for solutions") | |
| print(" 3. Contact support if issues persist") | |
| exit_code = 1 | |
| print("=" * 70 + "\n") | |
| sys.exit(exit_code) | |
| except KeyboardInterrupt: | |
| print("\n\nβ οΈ Script interrupted by user") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"\nβ FATAL ERROR: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() | |