Spaces:
Runtime error
Runtime error
| from langchain_community.vectorstores import FAISS | |
| # from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from huggingface_hub import snapshot_download | |
| import logging | |
| log = logging.getLogger(__name__) | |
| from termcolor import cprint | |
| class VectorStore: | |
| def __init__(self, | |
| embeddings_model: str, | |
| vs_local_path: str = None, | |
| vs_hf_path: str = None, | |
| # Retrieval parameters | |
| number_of_contexts: int = 2, | |
| embedding_score_threshold: float = None, | |
| # Context formatting parameters | |
| context_fmt: str = "Context document {num_document}:\n{document_content}", | |
| join_str: str = "\n\n", | |
| header_context_str: str = "", | |
| footer_context_str: str = "", | |
| no_context_str: str = "Answer 'no relevant context found'.", | |
| ): | |
| """Initializes the VectorStore with the given parameters and loads the vectorstore from the specified path. | |
| Arguments: | |
| ---------- | |
| embeddings_model : str | |
| The name of the HuggingFace embeddings model to use. | |
| vs_local_path : str, optional | |
| Local path to the vectorstore. Defaults to None. | |
| vs_hf_path : str, optional | |
| HuggingFace Hub path to the vectorstore. Defaults to None. | |
| number_of_contexts : int, optional | |
| Number of top similar contexts to retrieve. Defaults to 2. | |
| embedding_score_threshold : float, optional | |
| Minimum similarity score threshold for retrieved documents. Defaults to None. | |
| context_fmt : str, optional | |
| Template to format each retrieved document. | |
| Use only {document_content} or both {num_document} and {document_content} placeholders. | |
| Defaults to "Context document {num_document}:\n{document_content}". | |
| join_str : str, optional | |
| String to join multiple retrieved documents. Defaults to "\n\n". | |
| no_context_str : str, optional | |
| String to return if no documents are retrieved. Defaults to "No relevant context found.". | |
| header_context_str : str, optional | |
| String to prepend to the final context. | |
| Defaults to "The following is the context to help you answer the question (sorted from most to least relevant):\n\n". | |
| footer_context_str : str, optional | |
| String to append to the final context. | |
| Defaults to "\n\nAnswer based only on the above context.". | |
| """ | |
| log.info("Loading vectorstore...") | |
| # Retrieval parameters | |
| self.number_of_contexts = number_of_contexts | |
| self.embedding_score_threshold = embedding_score_threshold | |
| # Context formatting parameters | |
| self.context_fmt = context_fmt | |
| self.join_str = join_str | |
| self.header_context_str = header_context_str | |
| self.footer_context_str = footer_context_str | |
| self.no_context_str = no_context_str | |
| embeddings = HuggingFaceEmbeddings(model_name=embeddings_model) | |
| log.info(f"Loaded embeddings model: {embeddings_model}") | |
| if vs_hf_path: | |
| hf_vectorstore = snapshot_download(repo_id=vs_hf_path) | |
| self.vdb = FAISS.load_local(hf_vectorstore, embeddings, allow_dangerous_deserialization=True) | |
| log.info(f"Loaded vectorstore from {vs_hf_path}") | |
| else: | |
| self.vdb = FAISS.load_local(vs_local_path, embeddings, allow_dangerous_deserialization=True) | |
| log.info(f"Loaded vectorstore from {vs_local_path}") | |
| def get_context(self, query,): | |
| # Retrieve documents | |
| results = self.vdb.similarity_search_with_relevance_scores(query=query, k=self.number_of_contexts, score_threshold=self.embedding_score_threshold) | |
| log.info(f"Retrieved {len(results)} documents from the vectorstore.") | |
| # Return formatted context | |
| return self._beautiful_context(results) | |
| def _beautiful_context(self, docs): | |
| log.info(f"Formatting {len(docs)} contexts...") | |
| # If no documents are retrieved, return the no_context_str | |
| if not docs: | |
| return self.no_context_str | |
| contexts = [] | |
| for i, doc in enumerate(docs): | |
| log.info(f"Document {i+1} (score: {doc[1]:.4f}): {repr(doc[0].page_content[:100])}...") | |
| # Format each context document using the provided template | |
| context = self.context_fmt.format(num_document=i + 1, document_content=doc[0].page_content) | |
| contexts.append(context) | |
| # Join all contexts into a single string and add header and footer | |
| context = self.header_context_str + self.join_str.join(contexts) + self.footer_context_str | |
| return context | |