File size: 3,133 Bytes
461adca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from typing import Dict, Any, Optional
from pathlib import Path
from llama_index.core import Settings
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever


class SearchComponents:
    _instance = None

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(SearchComponents, cls).__new__(cls)
            cls._instance._initialized = False
        return cls._instance

    def __init__(self):
        if not self._initialized:
            self._components = {}
            self._initialized = True

    def initialize_components(self, local_dir: Path) -> bool:
        """Initialize all search components."""
        try:
            # Initialize BM25 Retriever
            print(f"Loading docstore from {local_dir / 'docstore_es_filter.json'}")
            docstore = SimpleDocumentStore.from_persist_path(
                str(local_dir / "docstore_es_filter.json")
            )
            print("Docstore loaded successfully")

            print(f"Loading BM25 retriever from {local_dir / 'bm25_retriever'}")
            bm25_retriever = BM25Retriever.from_persist_dir(
                # str(local_dir / "bm25_retriever_es")
                str(local_dir / "bm25_retriever")
            )
            print("BM25 retriever loaded successfully")

            print(f"Loading BM25 retriever (short) from {local_dir / 'bm25_retriever_short'}")
            bm25_retriever_short = BM25Retriever.from_persist_dir(
                # str(local_dir / "bm25_retriever_es")
                str(local_dir / "bm25_retriever_short")
            )
            print("BM25 retriever (short) loaded successfully")

            # Для коротких текстів створюємо гібридний retriever
            print("Creating QueryFusionRetriever...")
            fusion_retriever = QueryFusionRetriever(
                # [bm25_retriever],
                [bm25_retriever_short],
                similarity_top_k=Settings.similarity_top_k * 2,  # Збільшуємо к-сть результатів перед дедуплікацією
                num_queries=1,
                use_async=True
            )
            print("QueryFusionRetriever created successfully")

            # Store components
            self._components['docstore'] = docstore
            self._components['bm25_retriever'] = bm25_retriever
            self._components['fusion_retriever'] = fusion_retriever

            return True
        except Exception as e:
            print(f"Error initializing components: {str(e)}")
            import traceback
            traceback.print_exc()
            return False

    def get_component(self, name: str) -> Optional[Any]:
        """Get a component by name."""
        return self._components.get(name)

    def get_retriever(self) -> Optional[QueryFusionRetriever]:
        """Get the main retriever component."""
        return self.get_component('fusion_retriever')

# Global instance
search_components = SearchComponents()