LP_2-test / components.py
DocUA's picture
Clean deployment without large index files
461adca
from typing import Dict, Any, Optional
from pathlib import Path
from llama_index.core import Settings
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
class SearchComponents:
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(SearchComponents, cls).__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if not self._initialized:
self._components = {}
self._initialized = True
def initialize_components(self, local_dir: Path) -> bool:
"""Initialize all search components."""
try:
# Initialize BM25 Retriever
print(f"Loading docstore from {local_dir / 'docstore_es_filter.json'}")
docstore = SimpleDocumentStore.from_persist_path(
str(local_dir / "docstore_es_filter.json")
)
print("Docstore loaded successfully")
print(f"Loading BM25 retriever from {local_dir / 'bm25_retriever'}")
bm25_retriever = BM25Retriever.from_persist_dir(
# str(local_dir / "bm25_retriever_es")
str(local_dir / "bm25_retriever")
)
print("BM25 retriever loaded successfully")
print(f"Loading BM25 retriever (short) from {local_dir / 'bm25_retriever_short'}")
bm25_retriever_short = BM25Retriever.from_persist_dir(
# str(local_dir / "bm25_retriever_es")
str(local_dir / "bm25_retriever_short")
)
print("BM25 retriever (short) loaded successfully")
# Для коротких текстів створюємо гібридний retriever
print("Creating QueryFusionRetriever...")
fusion_retriever = QueryFusionRetriever(
# [bm25_retriever],
[bm25_retriever_short],
similarity_top_k=Settings.similarity_top_k * 2, # Збільшуємо к-сть результатів перед дедуплікацією
num_queries=1,
use_async=True
)
print("QueryFusionRetriever created successfully")
# Store components
self._components['docstore'] = docstore
self._components['bm25_retriever'] = bm25_retriever
self._components['fusion_retriever'] = fusion_retriever
return True
except Exception as e:
print(f"Error initializing components: {str(e)}")
import traceback
traceback.print_exc()
return False
def get_component(self, name: str) -> Optional[Any]:
"""Get a component by name."""
return self._components.get(name)
def get_retriever(self) -> Optional[QueryFusionRetriever]:
"""Get the main retriever component."""
return self.get_component('fusion_retriever')
# Global instance
search_components = SearchComponents()