from typing import Dict, Any, Optional from pathlib import Path from llama_index.core import Settings from llama_index.core.storage.docstore import SimpleDocumentStore from llama_index.retrievers.bm25 import BM25Retriever from llama_index.core.retrievers import QueryFusionRetriever class SearchComponents: _instance = None def __new__(cls): if cls._instance is None: cls._instance = super(SearchComponents, cls).__new__(cls) cls._instance._initialized = False return cls._instance def __init__(self): if not self._initialized: self._components = {} self._initialized = True def initialize_components(self, local_dir: Path) -> bool: """Initialize all search components.""" try: # Initialize BM25 Retriever print(f"Loading docstore from {local_dir / 'docstore_es_filter.json'}") docstore = SimpleDocumentStore.from_persist_path( str(local_dir / "docstore_es_filter.json") ) print("Docstore loaded successfully") print(f"Loading BM25 retriever from {local_dir / 'bm25_retriever'}") bm25_retriever = BM25Retriever.from_persist_dir( # str(local_dir / "bm25_retriever_es") str(local_dir / "bm25_retriever") ) print("BM25 retriever loaded successfully") print(f"Loading BM25 retriever (short) from {local_dir / 'bm25_retriever_short'}") bm25_retriever_short = BM25Retriever.from_persist_dir( # str(local_dir / "bm25_retriever_es") str(local_dir / "bm25_retriever_short") ) print("BM25 retriever (short) loaded successfully") # Для коротких текстів створюємо гібридний retriever print("Creating QueryFusionRetriever...") fusion_retriever = QueryFusionRetriever( # [bm25_retriever], [bm25_retriever_short], similarity_top_k=Settings.similarity_top_k * 2, # Збільшуємо к-сть результатів перед дедуплікацією num_queries=1, use_async=True ) print("QueryFusionRetriever created successfully") # Store components self._components['docstore'] = docstore self._components['bm25_retriever'] = bm25_retriever self._components['fusion_retriever'] = fusion_retriever return True except Exception as e: print(f"Error initializing components: {str(e)}") import traceback traceback.print_exc() return False def get_component(self, name: str) -> Optional[Any]: """Get a component by name.""" return self._components.get(name) def get_retriever(self) -> Optional[QueryFusionRetriever]: """Get the main retriever component.""" return self.get_component('fusion_retriever') # Global instance search_components = SearchComponents()