| | from typing import Dict, Any, Optional |
| | from pathlib import Path |
| | from llama_index.core import Settings |
| | from llama_index.core.storage.docstore import SimpleDocumentStore |
| | from llama_index.retrievers.bm25 import BM25Retriever |
| | from llama_index.core.retrievers import QueryFusionRetriever |
| |
|
| |
|
| | class SearchComponents: |
| | _instance = None |
| |
|
| | def __new__(cls): |
| | if cls._instance is None: |
| | cls._instance = super(SearchComponents, cls).__new__(cls) |
| | cls._instance._initialized = False |
| | return cls._instance |
| |
|
| | def __init__(self): |
| | if not self._initialized: |
| | self._components = {} |
| | self._initialized = True |
| |
|
| | def initialize_components(self, local_dir: Path) -> bool: |
| | """Initialize all search components.""" |
| | try: |
| | |
| | print(f"Loading docstore from {local_dir / 'docstore_es_filter.json'}") |
| | docstore = SimpleDocumentStore.from_persist_path( |
| | str(local_dir / "docstore_es_filter.json") |
| | ) |
| | print("Docstore loaded successfully") |
| |
|
| | print(f"Loading BM25 retriever from {local_dir / 'bm25_retriever'}") |
| | bm25_retriever = BM25Retriever.from_persist_dir( |
| | |
| | str(local_dir / "bm25_retriever") |
| | ) |
| | print("BM25 retriever loaded successfully") |
| |
|
| | print(f"Loading BM25 retriever (short) from {local_dir / 'bm25_retriever_short'}") |
| | bm25_retriever_short = BM25Retriever.from_persist_dir( |
| | |
| | str(local_dir / "bm25_retriever_short") |
| | ) |
| | print("BM25 retriever (short) loaded successfully") |
| |
|
| | |
| | print("Creating QueryFusionRetriever...") |
| | fusion_retriever = QueryFusionRetriever( |
| | |
| | [bm25_retriever_short], |
| | similarity_top_k=Settings.similarity_top_k * 2, |
| | num_queries=1, |
| | use_async=True |
| | ) |
| | print("QueryFusionRetriever created successfully") |
| |
|
| | |
| | self._components['docstore'] = docstore |
| | self._components['bm25_retriever'] = bm25_retriever |
| | self._components['fusion_retriever'] = fusion_retriever |
| |
|
| | return True |
| | except Exception as e: |
| | print(f"Error initializing components: {str(e)}") |
| | import traceback |
| | traceback.print_exc() |
| | return False |
| |
|
| | def get_component(self, name: str) -> Optional[Any]: |
| | """Get a component by name.""" |
| | return self._components.get(name) |
| |
|
| | def get_retriever(self) -> Optional[QueryFusionRetriever]: |
| | """Get the main retriever component.""" |
| | return self.get_component('fusion_retriever') |
| |
|
| | |
| | search_components = SearchComponents() |