Legalize_AI / vector_store.py
hashirlodhi's picture
Upload 11 files
e46711a verified
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
import config
def get_embedding_model():
"""
Initialize the embedding model.
"""
return HuggingFaceEmbeddings(
model_name=config.EMBEDDING_MODEL_NAME,
model_kwargs={'device': 'cpu'}
)
def create_vector_store(chunks, embedding_model):
"""
Create and persist a Chroma vector store from document chunks.
"""
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embedding_model,
persist_directory=config.CHROMA_DB_DIR
)
return vectorstore
def get_vector_store(embedding_model):
"""
Load existing vector store.
"""
# Simply initializing with persist_directory attempts to load it
return Chroma(
persist_directory=config.CHROMA_DB_DIR,
embedding_function=embedding_model
)
def get_retriever(vectorstore):
"""
Get a retriever from the vector store.
"""
return vectorstore.as_retriever(
search_type="similarity",
search_kwargs={"k": config.RETRIEVER_K}
)