Spaces:
Sleeping
Sleeping
File size: 5,930 Bytes
1d10b0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
"""Reset ChromaDB SQLite index while preserving all collection data."""
import chromadb
from chromadb.config import Settings
import os
import sys
def reset_sqlite_index():
"""Reset SQLite index while preserving collection data.
This script:
1. Detects existing collection UUID folders
2. Forces ChromaDB to rebuild the sqlite3 index
3. Verifies collections are properly indexed
4. Reports status
"""
print("\n" + "=" * 70)
print("π§ ChromaDB SQLite Index Reset Tool")
print("=" * 70)
chroma_path = "./chroma_db"
# Step 1: Verify collection folders exist
print("\nπ Step 1: Scanning for collection folders...")
print("-" * 70)
if not os.path.exists(chroma_path):
print(f"β ERROR: {chroma_path} directory not found!")
return False
# Find all UUID folders
uuid_folders = []
try:
for item in os.listdir(chroma_path):
item_path = os.path.join(chroma_path, item)
# Check if it's a directory and matches UUID pattern (36 chars, 4 hyphens)
if os.path.isdir(item_path) and len(item) == 36 and item.count('-') == 4:
uuid_folders.append(item)
except Exception as e:
print(f"β Error scanning directory: {e}")
return False
print(f"β
Found {len(uuid_folders)} collection folder(s)")
if len(uuid_folders) > 0:
for i, folder in enumerate(uuid_folders, 1):
folder_path = os.path.join(chroma_path, folder)
# Check what files are in the collection
files = os.listdir(folder_path)
print(f" {i}. {folder}")
print(f" Files: {', '.join(files)}")
else:
print("β οΈ WARNING: No collection folders found!")
# Step 2: Check SQLite status
print("\nπ Step 2: Checking SQLite status...")
print("-" * 70)
sqlite_path = os.path.join(chroma_path, "chroma.sqlite3")
if os.path.exists(sqlite_path):
sqlite_size = os.path.getsize(sqlite_path)
print(f"β
chroma.sqlite3 exists (size: {sqlite_size:,} bytes)")
if sqlite_size < 100000: # Less than 100KB is likely empty
print("β οΈ SQLite file is very small (likely empty/corrupted)")
else:
print("β
chroma.sqlite3 does not exist (will be created)")
# Step 3: Reset by creating new client
print("\nπ Step 3: Rebuilding SQLite index...")
print("-" * 70)
try:
print("Creating fresh ChromaDB PersistentClient...")
client = chromadb.PersistentClient(
path=chroma_path,
settings=Settings(
anonymized_telemetry=False,
allow_reset=True
)
)
print("β
Client created successfully")
except Exception as e:
print(f"β ERROR creating client: {e}")
return False
# Step 4: Verify collections are indexed
print("\nπ Step 4: Verifying collection index...")
print("-" * 70)
try:
collections = client.list_collections()
print(f"β
ChromaDB found {len(collections)} collection(s)")
if len(collections) > 0:
print("\nIndexed Collections:")
for i, collection in enumerate(collections, 1):
doc_count = collection.count()
metadata = collection.metadata
print(f"\n {i}. {collection.name}")
print(f" Document count: {doc_count}")
print(f" Metadata: {metadata}")
print("\nβ
SUCCESS! All collections are properly indexed!")
return True
elif len(uuid_folders) > 0:
# Collections folders exist but not indexed
print("β οΈ WARNING: Collection folders exist but not indexed in SQLite")
print("\nThis can happen if:")
print(" - SQLite file was deleted and recreated without scanning folders")
print(" - Collection data is corrupted")
print(" - Permission issues prevent reading collection folders")
print("\nπ Troubleshooting steps:")
print(" 1. Check file permissions in chroma_db directory")
print(" 2. Try deleting chroma.sqlite3 and restarting application")
print(" 3. See docs/CHROMADB_RECOVERY.md for more options")
return False
else:
print("β
No collections currently indexed (database is clean)")
return True
except Exception as e:
print(f"β ERROR reading collections: {e}")
return False
def main():
"""Main entry point."""
try:
success = reset_sqlite_index()
print("\n" + "=" * 70)
if success:
print("β
RESET COMPLETE - Collections are properly indexed!")
print("\nπ Next steps:")
print(" 1. Start Streamlit: streamlit run streamlit_app.py")
print(" 2. Check 'Existing Collections' dropdown")
print(" 3. Load a collection and verify it works")
exit_code = 0
else:
print("β RESET INCOMPLETE - See messages above for details")
print("\nπ Next steps:")
print(" 1. Review error messages above")
print(" 2. Check docs/CHROMADB_RECOVERY.md for solutions")
print(" 3. Contact support if issues persist")
exit_code = 1
print("=" * 70 + "\n")
sys.exit(exit_code)
except KeyboardInterrupt:
print("\n\nβ οΈ Script interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\nβ FATAL ERROR: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()
|