Spaces:
Sleeping
Sleeping
faiss error
Browse files- app/ml/retriever.py +73 -7
app/ml/retriever.py
CHANGED
|
@@ -288,12 +288,9 @@ def load_retriever() -> CustomRetrieverModel:
|
|
| 288 |
|
| 289 |
# return RETRIEVER_MODEL
|
| 290 |
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
# =============================================================================================
|
| 294 |
# Latest version given by perplexity, should work, if not then use one of the other versions.
|
| 295 |
-
#
|
| 296 |
-
|
| 297 |
def load_faiss_index():
|
| 298 |
"""
|
| 299 |
Load FAISS index + knowledge base from pickle file.
|
|
@@ -321,9 +318,27 @@ def load_faiss_index():
|
|
| 321 |
print(f"Loading FAISS index from {settings.FAISS_INDEX_PATH}...")
|
| 322 |
|
| 323 |
try:
|
| 324 |
-
# Load pickled
|
| 325 |
with open(settings.FAISS_INDEX_PATH, 'rb') as f:
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
print(f"β
FAISS index loaded: {FAISS_INDEX.ntotal} vectors")
|
| 329 |
print(f"β
Knowledge base loaded: {len(KB_DATA)} documents")
|
|
@@ -334,11 +349,62 @@ def load_faiss_index():
|
|
| 334 |
raise
|
| 335 |
except Exception as e:
|
| 336 |
print(f"β Failed to load FAISS index: {e}")
|
|
|
|
|
|
|
| 337 |
raise
|
| 338 |
|
| 339 |
return FAISS_INDEX, KB_DATA
|
| 340 |
|
| 341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
# ===========================================================================
|
| 344 |
# This version is used in the code, atleast for localhost testing
|
|
|
|
| 288 |
|
| 289 |
# return RETRIEVER_MODEL
|
| 290 |
|
| 291 |
+
# ==================================================================================================
|
|
|
|
|
|
|
| 292 |
# Latest version given by perplexity, should work, if not then use one of the other versions.
|
| 293 |
+
# ==================================================================================================
|
|
|
|
| 294 |
def load_faiss_index():
|
| 295 |
"""
|
| 296 |
Load FAISS index + knowledge base from pickle file.
|
|
|
|
| 318 |
print(f"Loading FAISS index from {settings.FAISS_INDEX_PATH}...")
|
| 319 |
|
| 320 |
try:
|
| 321 |
+
# Load pickled data
|
| 322 |
with open(settings.FAISS_INDEX_PATH, 'rb') as f:
|
| 323 |
+
loaded_data = pickle.load(f)
|
| 324 |
+
|
| 325 |
+
# β
Handle both formats: (index, kb_data) OR (index_bytes, kb_data)
|
| 326 |
+
if isinstance(loaded_data, tuple) and len(loaded_data) == 2:
|
| 327 |
+
first_item, KB_DATA = loaded_data
|
| 328 |
+
|
| 329 |
+
# Check if first item is bytes (new format) or FAISS index (old format)
|
| 330 |
+
if isinstance(first_item, bytes):
|
| 331 |
+
# New format: deserialize bytes
|
| 332 |
+
print("π¦ Detected new format (serialized bytes)")
|
| 333 |
+
FAISS_INDEX = faiss.deserialize_index(first_item)
|
| 334 |
+
elif hasattr(first_item, 'ntotal'):
|
| 335 |
+
# Old format: direct FAISS index object
|
| 336 |
+
print("π¦ Detected old format (direct index)")
|
| 337 |
+
FAISS_INDEX = first_item
|
| 338 |
+
else:
|
| 339 |
+
raise ValueError(f"Unknown FAISS index format: {type(first_item)}")
|
| 340 |
+
else:
|
| 341 |
+
raise ValueError(f"Invalid pickle format: expected tuple, got {type(loaded_data)}")
|
| 342 |
|
| 343 |
print(f"β
FAISS index loaded: {FAISS_INDEX.ntotal} vectors")
|
| 344 |
print(f"β
Knowledge base loaded: {len(KB_DATA)} documents")
|
|
|
|
| 349 |
raise
|
| 350 |
except Exception as e:
|
| 351 |
print(f"β Failed to load FAISS index: {e}")
|
| 352 |
+
import traceback
|
| 353 |
+
traceback.print_exc()
|
| 354 |
raise
|
| 355 |
|
| 356 |
return FAISS_INDEX, KB_DATA
|
| 357 |
|
| 358 |
|
| 359 |
+
# ==================================================================================================
|
| 360 |
+
# Second Latest version given by perplexity, should work, if not then use one of the other versions.
|
| 361 |
+
# ==================================================================================================
|
| 362 |
+
|
| 363 |
+
# def load_faiss_index():
|
| 364 |
+
# """
|
| 365 |
+
# Load FAISS index + knowledge base from pickle file.
|
| 366 |
+
# Downloads from HuggingFace Hub if not present locally.
|
| 367 |
+
# Uses module-level caching - loaded once on startup.
|
| 368 |
+
|
| 369 |
+
# Returns:
|
| 370 |
+
# tuple: (faiss.Index, List[Dict]) - FAISS index and KB data
|
| 371 |
+
# """
|
| 372 |
+
# global FAISS_INDEX, KB_DATA
|
| 373 |
+
|
| 374 |
+
# if FAISS_INDEX is None or KB_DATA is None:
|
| 375 |
+
# # Download FAISS index from HF Hub if needed (for deployment)
|
| 376 |
+
# settings.download_model_if_needed(
|
| 377 |
+
# hf_filename="models/faiss_index.pkl",
|
| 378 |
+
# local_path=settings.FAISS_INDEX_PATH
|
| 379 |
+
# )
|
| 380 |
+
|
| 381 |
+
# # Download knowledge base from HF Hub if needed (for deployment)
|
| 382 |
+
# settings.download_model_if_needed(
|
| 383 |
+
# hf_filename="data/final_knowledge_base.jsonl",
|
| 384 |
+
# local_path=settings.KB_PATH
|
| 385 |
+
# )
|
| 386 |
+
|
| 387 |
+
# print(f"Loading FAISS index from {settings.FAISS_INDEX_PATH}...")
|
| 388 |
+
|
| 389 |
+
# try:
|
| 390 |
+
# # Load pickled FAISS index + KB data
|
| 391 |
+
# with open(settings.FAISS_INDEX_PATH, 'rb') as f:
|
| 392 |
+
# FAISS_INDEX, KB_DATA = pickle.load(f)
|
| 393 |
+
|
| 394 |
+
# print(f"β
FAISS index loaded: {FAISS_INDEX.ntotal} vectors")
|
| 395 |
+
# print(f"β
Knowledge base loaded: {len(KB_DATA)} documents")
|
| 396 |
+
|
| 397 |
+
# except FileNotFoundError:
|
| 398 |
+
# print(f"β FAISS index file not found: {settings.FAISS_INDEX_PATH}")
|
| 399 |
+
# print(f"β οΈ Make sure models are uploaded to HuggingFace Hub: {settings.HF_MODEL_REPO}")
|
| 400 |
+
# raise
|
| 401 |
+
# except Exception as e:
|
| 402 |
+
# print(f"β Failed to load FAISS index: {e}")
|
| 403 |
+
# raise
|
| 404 |
+
|
| 405 |
+
# return FAISS_INDEX, KB_DATA
|
| 406 |
+
|
| 407 |
+
|
| 408 |
|
| 409 |
# ===========================================================================
|
| 410 |
# This version is used in the code, atleast for localhost testing
|