demo2 / src /vectorstore /vectorstore.py
Dinesh310's picture
Update src/vectorstore/vectorstore.py
b9fd15d verified
"""Vector store module for document embedding and retrieval"""
from typing import List
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain.schema import Document
from langchain_core.documents import Document
class VectorStore:
"""Manages vector store operations"""
def __init__(self):
"""Initialize vector store with OpenAI embeddings"""
# self.embedding = OpenAIEmbeddings()
# ✅ Hugging Face Embeddings (LOCAL / FREE)
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"}, # change to "cuda" if GPU available
encode_kwargs={"normalize_embeddings": True}
)
self.vectorstore = None
self.retriever = None
def create_vectorstore(self, documents: List[Document]):
"""
Create vector store from documents
Args:
documents: List of documents to embed
"""
self.vectorstore = FAISS.from_documents(documents, self.embedding)
self.retriever = self.vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult":0.25})
def get_retriever(self):
"""
Get the retriever instance
Returns:
Retriever instance
"""
if self.retriever is None:
raise ValueError("Vector store not initialized. Call create_vectorstore first.")
return self.retriever
def retrieve(self, query: str, k: int = 4) -> List[Document]:
"""
Retrieve relevant documents for a query
Args:
query: Search query
k: Number of documents to retrieve
Returns:
List of relevant documents
"""
if self.retriever is None:
raise ValueError("Vector store not initialized. Call create_vectorstore first.")
return self.retriever.invoke(query)