Spaces:
Running
Running
Asish Karthikeya Gogineni commited on
Commit ·
3508757
1
Parent(s): fe03c86
fix: Add ChromaDB auto-recovery from corruption
Browse files- code_chatbot/indexer.py +30 -6
- code_chatbot/indexing_progress.py +4 -1
code_chatbot/indexer.py
CHANGED
|
@@ -16,21 +16,45 @@ logger = logging.getLogger(__name__)
|
|
| 16 |
# Global ChromaDB client cache to avoid "different settings" error
|
| 17 |
_chroma_clients = {}
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
def get_chroma_client(persist_directory: str):
|
| 20 |
"""Get or create a shared ChromaDB client for a given path."""
|
| 21 |
global _chroma_clients
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
if persist_directory not in _chroma_clients:
|
| 24 |
import chromadb
|
| 25 |
from chromadb.config import Settings
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
)
|
| 33 |
-
)
|
| 34 |
|
| 35 |
return _chroma_clients[persist_directory]
|
| 36 |
|
|
|
|
| 16 |
# Global ChromaDB client cache to avoid "different settings" error
|
| 17 |
_chroma_clients = {}
|
| 18 |
|
| 19 |
+
def reset_chroma_clients():
|
| 20 |
+
"""Reset all cached ChromaDB clients. Call when database corruption is detected."""
|
| 21 |
+
global _chroma_clients
|
| 22 |
+
_chroma_clients = {}
|
| 23 |
+
logger.info("Reset ChromaDB client cache")
|
| 24 |
+
|
| 25 |
def get_chroma_client(persist_directory: str):
|
| 26 |
"""Get or create a shared ChromaDB client for a given path."""
|
| 27 |
global _chroma_clients
|
| 28 |
|
| 29 |
+
# Ensure directory exists
|
| 30 |
+
os.makedirs(persist_directory, exist_ok=True)
|
| 31 |
+
|
| 32 |
if persist_directory not in _chroma_clients:
|
| 33 |
import chromadb
|
| 34 |
from chromadb.config import Settings
|
| 35 |
|
| 36 |
+
try:
|
| 37 |
+
_chroma_clients[persist_directory] = chromadb.PersistentClient(
|
| 38 |
+
path=persist_directory,
|
| 39 |
+
settings=Settings(
|
| 40 |
+
anonymized_telemetry=False,
|
| 41 |
+
allow_reset=True
|
| 42 |
+
)
|
| 43 |
+
)
|
| 44 |
+
except Exception as e:
|
| 45 |
+
logger.error(f"Failed to create ChromaDB client: {e}")
|
| 46 |
+
# Try to reset and create fresh
|
| 47 |
+
import shutil
|
| 48 |
+
if os.path.exists(persist_directory):
|
| 49 |
+
shutil.rmtree(persist_directory)
|
| 50 |
+
os.makedirs(persist_directory, exist_ok=True)
|
| 51 |
+
_chroma_clients[persist_directory] = chromadb.PersistentClient(
|
| 52 |
+
path=persist_directory,
|
| 53 |
+
settings=Settings(
|
| 54 |
+
anonymized_telemetry=False,
|
| 55 |
+
allow_reset=True
|
| 56 |
+
)
|
| 57 |
)
|
|
|
|
| 58 |
|
| 59 |
return _chroma_clients[persist_directory]
|
| 60 |
|
code_chatbot/indexing_progress.py
CHANGED
|
@@ -145,7 +145,10 @@ def index_with_progress(
|
|
| 145 |
progress_bar.progress(1.0)
|
| 146 |
|
| 147 |
else: # Chroma
|
| 148 |
-
from code_chatbot.indexer import get_chroma_client
|
|
|
|
|
|
|
|
|
|
| 149 |
chroma_client = get_chroma_client(indexer.persist_directory)
|
| 150 |
|
| 151 |
vectordb = Chroma(
|
|
|
|
| 145 |
progress_bar.progress(1.0)
|
| 146 |
|
| 147 |
else: # Chroma
|
| 148 |
+
from code_chatbot.indexer import get_chroma_client, reset_chroma_clients
|
| 149 |
+
|
| 150 |
+
# Reset client cache to avoid stale/corrupt connections
|
| 151 |
+
reset_chroma_clients()
|
| 152 |
chroma_client = get_chroma_client(indexer.persist_directory)
|
| 153 |
|
| 154 |
vectordb = Chroma(
|