"""Vector store module for document embedding and retrieval""" from typing import List from langchain_community.vectorstores import FAISS from langchain_openai import OpenAIEmbeddings from langchain_community.embeddings import HuggingFaceEmbeddings # from langchain.schema import Document from langchain_core.documents import Document class VectorStore: """Manages vector store operations""" def __init__(self): """Initialize vector store with OpenAI embeddings""" # self.embedding = OpenAIEmbeddings() # ✅ Hugging Face Embeddings (LOCAL / FREE) self.embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}, # change to "cuda" if GPU available encode_kwargs={"normalize_embeddings": True} ) self.vectorstore = None self.retriever = None def create_vectorstore(self, documents: List[Document]): """ Create vector store from documents Args: documents: List of documents to embed """ self.vectorstore = FAISS.from_documents(documents, self.embedding) self.retriever = self.vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult":0.25}) def get_retriever(self): """ Get the retriever instance Returns: Retriever instance """ if self.retriever is None: raise ValueError("Vector store not initialized. Call create_vectorstore first.") return self.retriever def retrieve(self, query: str, k: int = 4) -> List[Document]: """ Retrieve relevant documents for a query Args: query: Search query k: Number of documents to retrieve Returns: List of relevant documents """ if self.retriever is None: raise ValueError("Vector store not initialized. Call create_vectorstore first.") return self.retriever.invoke(query)