|
|
"""Vector store module for document embedding and retrieval""" |
|
|
|
|
|
from typing import List |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_openai import OpenAIEmbeddings |
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
|
|
|
from langchain_core.documents import Document |
|
|
|
|
|
|
|
|
class VectorStore: |
|
|
"""Manages vector store operations""" |
|
|
|
|
|
def __init__(self): |
|
|
"""Initialize vector store with OpenAI embeddings""" |
|
|
|
|
|
|
|
|
self.embeddings = HuggingFaceEmbeddings( |
|
|
model_name="sentence-transformers/all-MiniLM-L6-v2", |
|
|
model_kwargs={"device": "cpu"}, |
|
|
encode_kwargs={"normalize_embeddings": True} |
|
|
) |
|
|
self.vectorstore = None |
|
|
self.retriever = None |
|
|
|
|
|
def create_vectorstore(self, documents: List[Document]): |
|
|
""" |
|
|
Create vector store from documents |
|
|
|
|
|
Args: |
|
|
documents: List of documents to embed |
|
|
""" |
|
|
self.vectorstore = FAISS.from_documents(documents, self.embedding) |
|
|
self.retriever = self.vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult":0.25}) |
|
|
|
|
|
def get_retriever(self): |
|
|
""" |
|
|
Get the retriever instance |
|
|
|
|
|
Returns: |
|
|
Retriever instance |
|
|
""" |
|
|
if self.retriever is None: |
|
|
raise ValueError("Vector store not initialized. Call create_vectorstore first.") |
|
|
return self.retriever |
|
|
|
|
|
def retrieve(self, query: str, k: int = 4) -> List[Document]: |
|
|
""" |
|
|
Retrieve relevant documents for a query |
|
|
|
|
|
Args: |
|
|
query: Search query |
|
|
k: Number of documents to retrieve |
|
|
|
|
|
Returns: |
|
|
List of relevant documents |
|
|
""" |
|
|
if self.retriever is None: |
|
|
raise ValueError("Vector store not initialized. Call create_vectorstore first.") |
|
|
return self.retriever.invoke(query) |