File size: 2,760 Bytes
f896763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
from typing import Any

from dotenv import find_dotenv, load_dotenv
from huggingface_hub import login
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.milvus import MilvusVectorStore

from src.agent_hackathon.consts import PROJECT_ROOT_DIR
from src.agent_hackathon.logger import get_logger

logger = get_logger(log_name="query_vector_db", log_dir=PROJECT_ROOT_DIR / "logs")


class RetrieverEngineBuilder:
    """
    Handles the creation of a query engine for a vector database using HuggingFace and LlamaIndex.
    """

    def __init__(
        self,
        hf_token_env: str = "HF_TOKEN",
        embedding_model: str = "Qwen/Qwen3-Embedding-0.6B",
        vector_store: MilvusVectorStore = None,
        device: str = "cpu",
    ) -> None:
        """
        Initialize the QueryEngineBuilder.

        Args:
            hf_token_env: Environment variable name for HuggingFace token.
            embedding_model: Name of the embedding model.
            vector_store: An instance of MilvusVectorStore.
            device: Device to run the embedding model on.
        """
        self.hf_token_env = hf_token_env
        self.embedding_model = embedding_model
        self.vector_store = vector_store
        self.device = device

        logger.info("Initializing RetrieverEngineBuilder.")
        # self._login_huggingface()
        # self._load_env()

        self.embed_model = HuggingFaceEmbedding(
            model_name=self.embedding_model, device=self.device
        )
        logger.info("RetrieverEngineBuilder initialized.")

    def _login_huggingface(self) -> None:
        """Login to HuggingFace using the token from environment variable."""
        logger.info("Logging in to HuggingFace.")
        login(token=os.getenv(key=self.hf_token_env))
        logger.info("Logged in to HuggingFace.")

    def _load_env(self) -> None:
        """Load environment variables from .env file."""
        logger.info("Loading environment variables.")
        _ = load_dotenv(dotenv_path=find_dotenv(raise_error_if_not_found=False))
        logger.info("Environment variables loaded.")

    def build_retriever_engine(self) -> Any:
        """
        Build and return the retriever engine.

        Returns:
            Retriever engine object.
        """
        logger.info("Building retriever engine.")
        index = VectorStoreIndex.from_vector_store(
            vector_store=self.vector_store, embed_model=self.embed_model
        )
        retriever = index.as_retriever(
            vector_store_query_mode="hybrid",
            similarity_top_k=5,
        )
        logger.info("Retriever engine built.")
        return retriever