shamik
feat: adding project files.
f896763 unverified
import os
from typing import Any
from dotenv import find_dotenv, load_dotenv
from huggingface_hub import login
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.milvus import MilvusVectorStore
from src.agent_hackathon.consts import PROJECT_ROOT_DIR
from src.agent_hackathon.logger import get_logger
logger = get_logger(log_name="query_vector_db", log_dir=PROJECT_ROOT_DIR / "logs")
class RetrieverEngineBuilder:
"""
Handles the creation of a query engine for a vector database using HuggingFace and LlamaIndex.
"""
def __init__(
self,
hf_token_env: str = "HF_TOKEN",
embedding_model: str = "Qwen/Qwen3-Embedding-0.6B",
vector_store: MilvusVectorStore = None,
device: str = "cpu",
) -> None:
"""
Initialize the QueryEngineBuilder.
Args:
hf_token_env: Environment variable name for HuggingFace token.
embedding_model: Name of the embedding model.
vector_store: An instance of MilvusVectorStore.
device: Device to run the embedding model on.
"""
self.hf_token_env = hf_token_env
self.embedding_model = embedding_model
self.vector_store = vector_store
self.device = device
logger.info("Initializing RetrieverEngineBuilder.")
# self._login_huggingface()
# self._load_env()
self.embed_model = HuggingFaceEmbedding(
model_name=self.embedding_model, device=self.device
)
logger.info("RetrieverEngineBuilder initialized.")
def _login_huggingface(self) -> None:
"""Login to HuggingFace using the token from environment variable."""
logger.info("Logging in to HuggingFace.")
login(token=os.getenv(key=self.hf_token_env))
logger.info("Logged in to HuggingFace.")
def _load_env(self) -> None:
"""Load environment variables from .env file."""
logger.info("Loading environment variables.")
_ = load_dotenv(dotenv_path=find_dotenv(raise_error_if_not_found=False))
logger.info("Environment variables loaded.")
def build_retriever_engine(self) -> Any:
"""
Build and return the retriever engine.
Returns:
Retriever engine object.
"""
logger.info("Building retriever engine.")
index = VectorStoreIndex.from_vector_store(
vector_store=self.vector_store, embed_model=self.embed_model
)
retriever = index.as_retriever(
vector_store_query_mode="hybrid",
similarity_top_k=5,
)
logger.info("Retriever engine built.")
return retriever