Spaces:
Sleeping
Sleeping
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_core.vectorstores import InMemoryVectorStore | |
| from langchain_community.docstore.document import Document | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from src.utils.exceptions import CustomException | |
| from src.utils.functions import getConfig | |
| from src.utils.logging import logger | |
| class VectorStore: | |
| def __init__(self): | |
| """Initialize the VectorStore with configuration, embeddings, and text splitter.""" | |
| self.config = getConfig(path="config.ini") | |
| self.vectorEmbeddings = HuggingFaceEmbeddings( | |
| model_name=self.config.get("EMBEDDINGS", "embeddingModel"), | |
| model_kwargs={"device": self.config.get("EMBEDDINGS", "device")}, | |
| encode_kwargs={"normalize_embeddings": self.config.getboolean("EMBEDDINGS", "normalize_embeddings")} | |
| ) | |
| self.splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=self.config.getint("VECTORSTORE", "chunkSize"), | |
| chunk_overlap=self.config.getint("VECTORSTORE", "chunkOverlap"), | |
| add_start_index=self.config.getboolean("VECTORSTORE", "addStartIndex") | |
| ) | |
| def setupStore(self, text: str): | |
| """ | |
| Set up the vector store with the provided text. | |
| Args: | |
| text (str): The text to store and process. | |
| Returns: | |
| Retriever: A retriever for querying the vector store. | |
| """ | |
| try: | |
| store = InMemoryVectorStore(self.vectorEmbeddings) | |
| textDocument = Document(page_content=text) | |
| documents = self.splitter.split_documents([textDocument]) | |
| store.add_documents(documents=documents) | |
| return store.as_retriever( | |
| search_type=self.config.get("RETRIEVER", "searchType"), | |
| search_kwargs={ | |
| "k": self.config.getint("RETRIEVER", "k"), | |
| "fetch_k": self.config.getint("RETRIEVER", "fetchK") | |
| } | |
| ) | |
| except Exception as e: | |
| logger.error(CustomException(e)) | |
| print(CustomException(e)) |