import os from dotenv import load_dotenv from langchain_openai import OpenAIEmbeddings from langchain_mongodb import MongoDBAtlasVectorSearch from langchain_core.documents import Document from src.database.connection import DatabaseConnection from src.utils.constants import Constants from src.utils.logging import get_logger class VectorSearch: """ Class to handle vector search functionality using MongoDB Atlas Vector Search and OpenAI embeddings. """ def __init__(self): """ Initialize the vector search with OpenAI embeddings and MongoDB Atlas Vector Search. """ self.logger = get_logger() self.logger.info("Initializing vector search") load_dotenv(override=True) self.openai_api_key = os.getenv("OPENAI_API_KEY") if not self.openai_api_key: self.logger.error("OPENAI_API_KEY environment variable is not set") raise ValueError("OPENAI_API_KEY environment variable is not set") try: self.db_connection = DatabaseConnection() self.collection = self.db_connection.get_collection() self.logger.info(f"Initializing OpenAI embeddings with model: {Constants.EMBEDDING_MODEL}") self.embeddings = OpenAIEmbeddings( model=Constants.EMBEDDING_MODEL, openai_api_key=self.openai_api_key ) self.logger.info(f"Initializing MongoDB Atlas Vector Search with index: {Constants.ATLAS_VECTOR_SEARCH_INDEX_NAME}") self.vector_search = MongoDBAtlasVectorSearch.from_connection_string( connection_string=os.getenv("MONGODB_URI"), namespace=f"{Constants.DB_NAME}.{Constants.COLLECTION_NAME}", embedding=self.embeddings, index_name=Constants.ATLAS_VECTOR_SEARCH_INDEX_NAME, text_key="text" ) self.logger.info("Vector search initialized successfully") except Exception as e: self.logger.error(f"Failed to initialize vector search: {str(e)}") raise def search(self, query, limit=5): """ Search for documents similar to the query. Args: query (str): The search query. limit (int, optional): Maximum number of results to return. Defaults to 5. Returns: list: List of search results. """ self.logger.info(f"Performing vector search with query: '{query}' (limit: {limit})") try: results = self.vector_search.similarity_search(query, k=limit) self.logger.info(f"Vector search returned {len(results)} results") return self._format_results(results) except Exception as e: self.logger.error(f"Error during vector search: {str(e)}") raise def _format_results(self, results): """ Format the search results into a standardized format. Args: results (list): List of Document objects from the vector search. Returns: list: List of dictionaries with standardized fields. """ self.logger.debug(f"Formatting {len(results)} search results") try: formatted_results = [] for doc in results: metadata = doc.metadata thumbnail = metadata.get("thumbnail") if thumbnail == "" or thumbnail is None: thumbnail = Constants.PLACEHOLDER_IMAGE_URL formatted_results.append({ "id": metadata.get("id", ""), "title": metadata.get("title", ""), "text": doc.page_content, "price": metadata.get("price", ""), "thumbnail": thumbnail, "product_page_url": metadata.get("product_page_url", "") }) self.logger.debug("Results formatted successfully") return formatted_results except Exception as e: self.logger.error(f"Error formatting search results: {str(e)}") raise def close(self): """ Close the database connection. """ self.logger.info("Closing vector search resources") try: self.db_connection.close_connection() self.logger.info("Vector search resources closed successfully") except Exception as e: self.logger.error(f"Error closing vector search resources: {str(e)}")