from langchain_community.vectorstores import FAISS
# from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from huggingface_hub import snapshot_download
import logging
log = logging.getLogger(__name__)

from termcolor import cprint


class VectorStore:
    def __init__(self, 
                 embeddings_model: str, 
                 vs_local_path: str = None, 
                 vs_hf_path: str = None, 

                 # Retrieval parameters
                 number_of_contexts: int = 2,
                 embedding_score_threshold: float = None, 
                 
                 # Context formatting parameters
                 context_fmt: str = "Context document {num_document}:\n{document_content}",
                 join_str: str = "\n\n",
                 header_context_str: str = "",
                 footer_context_str: str = "",
                 no_context_str: str = "Answer 'no relevant context found'.",
                 ):
        
        """Initializes the VectorStore with the given parameters and loads the vectorstore from the specified path.

        Arguments:
        ----------
        embeddings_model : str 
            The name of the HuggingFace embeddings model to use.
        vs_local_path : str, optional
            Local path to the vectorstore. Defaults to None.    
        vs_hf_path : str, optional
            HuggingFace Hub path to the vectorstore. Defaults to None.
        number_of_contexts : int, optional
            Number of top similar contexts to retrieve. Defaults to 2.
        embedding_score_threshold : float, optional
            Minimum similarity score threshold for retrieved documents. Defaults to None.
        context_fmt : str, optional
            Template to format each retrieved document. 
            Use only {document_content} or both {num_document} and {document_content} placeholders. 
            Defaults to "Context document {num_document}:\n{document_content}".
        join_str : str, optional
            String to join multiple retrieved documents. Defaults to "\n\n".
        no_context_str : str, optional
            String to return if no documents are retrieved. Defaults to "No relevant context found.".
        header_context_str : str, optional
            String to prepend to the final context. 
            Defaults to "The following is the context to help you answer the question (sorted from most to least relevant):\n\n".
        footer_context_str : str, optional
            String to append to the final context. 
            Defaults to "\n\nAnswer based only on the above context.".
        """
        
        log.info("Loading vectorstore...")

        # Retrieval parameters
        self.number_of_contexts = number_of_contexts
        self.embedding_score_threshold = embedding_score_threshold
        
        # Context formatting parameters
        self.context_fmt = context_fmt
        self.join_str = join_str
        self.header_context_str = header_context_str
        self.footer_context_str = footer_context_str
        self.no_context_str = no_context_str

        embeddings = HuggingFaceEmbeddings(model_name=embeddings_model)
        log.info(f"Loaded embeddings model: {embeddings_model}")

        if vs_hf_path:
            hf_vectorstore = snapshot_download(repo_id=vs_hf_path)
            self.vdb = FAISS.load_local(hf_vectorstore, embeddings, allow_dangerous_deserialization=True)
            log.info(f"Loaded vectorstore from {vs_hf_path}")
        else:
            self.vdb = FAISS.load_local(vs_local_path, embeddings, allow_dangerous_deserialization=True)
            log.info(f"Loaded vectorstore from {vs_local_path}")

    
    def get_context(self, query,):

        # Retrieve documents
        results = self.vdb.similarity_search_with_relevance_scores(query=query, k=self.number_of_contexts, score_threshold=self.embedding_score_threshold)
        log.info(f"Retrieved {len(results)} documents from the vectorstore.")

        # Return formatted context
        return self._beautiful_context(results)
    
    
    def _beautiful_context(self, docs):
        
        log.info(f"Formatting {len(docs)} contexts...")

        # If no documents are retrieved, return the no_context_str
        if not docs:
            return self.no_context_str
        
        contexts = []
        for i, doc in enumerate(docs):
            
            log.info(f"Document {i+1} (score: {doc[1]:.4f}): {repr(doc[0].page_content[:100])}...")
            
            # Format each context document using the provided template
            context = self.context_fmt.format(num_document=i + 1, document_content=doc[0].page_content)
            contexts.append(context)

        # Join all contexts into a single string and add header and footer
        context = self.header_context_str + self.join_str.join(contexts) + self.footer_context_str
        

        return context