File size: 2,105 Bytes
dd31a47 349188f dd31a47 349188f dd31a47 b9fd15d dd31a47 8d56dc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
"""Vector store module for document embedding and retrieval"""
from typing import List
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain.schema import Document
from langchain_core.documents import Document
class VectorStore:
"""Manages vector store operations"""
def __init__(self):
"""Initialize vector store with OpenAI embeddings"""
# self.embedding = OpenAIEmbeddings()
# ✅ Hugging Face Embeddings (LOCAL / FREE)
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"}, # change to "cuda" if GPU available
encode_kwargs={"normalize_embeddings": True}
)
self.vectorstore = None
self.retriever = None
def create_vectorstore(self, documents: List[Document]):
"""
Create vector store from documents
Args:
documents: List of documents to embed
"""
self.vectorstore = FAISS.from_documents(documents, self.embedding)
self.retriever = self.vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult":0.25})
def get_retriever(self):
"""
Get the retriever instance
Returns:
Retriever instance
"""
if self.retriever is None:
raise ValueError("Vector store not initialized. Call create_vectorstore first.")
return self.retriever
def retrieve(self, query: str, k: int = 4) -> List[Document]:
"""
Retrieve relevant documents for a query
Args:
query: Search query
k: Number of documents to retrieve
Returns:
List of relevant documents
"""
if self.retriever is None:
raise ValueError("Vector store not initialized. Call create_vectorstore first.")
return self.retriever.invoke(query) |