from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma import config def get_embedding_model(): """ Initialize the embedding model. """ return HuggingFaceEmbeddings( model_name=config.EMBEDDING_MODEL_NAME, model_kwargs={'device': 'cpu'} ) def create_vector_store(chunks, embedding_model): """ Create and persist a Chroma vector store from document chunks. """ vectorstore = Chroma.from_documents( documents=chunks, embedding=embedding_model, persist_directory=config.CHROMA_DB_DIR ) return vectorstore def get_vector_store(embedding_model): """ Load existing vector store. """ # Simply initializing with persist_directory attempts to load it return Chroma( persist_directory=config.CHROMA_DB_DIR, embedding_function=embedding_model ) def get_retriever(vectorstore): """ Get a retriever from the vector store. """ return vectorstore.as_retriever( search_type="similarity", search_kwargs={"k": config.RETRIEVER_K} )