Spaces:
Sleeping
Sleeping
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_core.vectorstores import InMemoryVectorStore | |
| from langchain_community.docstore.document import Document | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from src.utils.exceptions import CustomException | |
| from src.utils.functions import getConfig | |
| from src.utils.logging import logger | |
| class VectorStore: | |
| def __init__(self): | |
| self.config = getConfig(path = "config.ini") | |
| self.vectorEmbeddings = HuggingFaceEmbeddings( | |
| model_name = self.config.get("EMBEDDINGS", "embeddingModel"), | |
| model_kwargs = {"device": self.config.get("EMBEDDINGS", "device")}, | |
| encode_kwargs = {"normalize_embeddings": self.config.getboolean("EMBEDDINGS", "normalize_embeddings")} | |
| ) | |
| self.splitter = RecursiveCharacterTextSplitter( | |
| chunk_size = self.config.getint("VECTORSTORE", "chunkSize"), | |
| chunk_overlap = self.config.getint("VECTORSTORE", "chunkOverlap"), | |
| add_start_index = self.config.getboolean("VECTORSTORE", "addStartIndex") | |
| ) | |
| def setupStore(self, text: str): | |
| try: | |
| store = InMemoryVectorStore(self.vectorEmbeddings) | |
| textDocument = Document(page_content = text) | |
| documents = self.splitter.split_documents([textDocument]) | |
| store.add_documents(documents = documents) | |
| return store.as_retriever( | |
| search_type = self.config.get("RETRIEVER", "searchType"), | |
| search_kwargs = { | |
| "k": self.config.getint("RETRIEVER", "k"), | |
| "fetch_k": self.config.getint("RETRIEVER", "fetchK") | |
| } | |
| ) | |
| except Exception as e: | |
| print(CustomException(e)) | |
| logger.error(CustomException(e)) |