from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import EmbeddingsFilter
#from langchain_text_splitters import CharacterTextSplitter
from langchain.retrievers import ContextualCompressionRetriever
from langchain_groq import ChatGroq
#from langchain.document_loaders import HuggingFaceDatasetLoader
# from langchain_community.document_loaders import UnstructuredExcelLoader
# from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
# from transformers import AutoTokenizer, AutoModelForQuestionAnswering
# from transformers import AutoTokenizer, pipeline
# from langchain import HuggingFacePipeline
import re
import os
import streamlit as st
import requests


# Define the path to the pre-trained model you want to use
modelPath = "sentence-transformers/all-MiniLM-l6-v2"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device': 'cpu'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,  # Provide the pre-trained model's path
    model_kwargs=model_kwargs,  # Pass the model configuration options
    encode_kwargs=encode_kwargs  # Pass the encoding options
)


# Initialize the HuggingFaceEmbeddings
model_path = "sentence-transformers/all-MiniLM-l6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_path,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# Load the FAISS index
db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

retriever = db.as_retriever(search_kwargs={"k": 2})
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings)
pipeline_compressor = DocumentCompressorPipeline(transformers=[text_splitter, redundant_filter, relevant_filter])
compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor, base_retriever=retriever)

chat = ChatGroq(temperature=0, groq_api_key="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",
                model_name="llama3-70b-8192")

rag_template_str = ("""
Answer the following query based on the context given.
Stylization:
1)Do not include or reference quoted content verbatim in the answer. Don't say "According to context provided"
2)Include the source URLs 
3)Include the Category it belongs to
Formatting:
1)Use bullet points
Restriction:
1)Only use context to answer the question 
2)If you don't know the answer,reply with "No answer found, you can contact us on https://www.i2econsulting.com/contact-us/" 
context: {context}
query:{query}
""")

rag_prompt = ChatPromptTemplate.from_template(rag_template_str)
rag_chain = rag_prompt | chat | StrOutputParser()

llm = ChatGroq(groq_api_key="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",
            model_name="mixtral-8x7b-32768")

prompt = ChatPromptTemplate.from_template(
    """
    Answer the questions based on the provided context only.
    Please provide the most accurate response based on the question
    <context>
    {context}
    <context>
    Questions:{input}

    """
)
rag_prompt = ChatPromptTemplate.from_template(rag_template_str)
rag_chain = rag_prompt | chat | StrOutputParser()

st.title("i2e Enterprise Chatbot")

prompt = st.text_input("Ask Question")

def api_py_function(query):
    context = compression_retriever.get_relevant_documents(query)
    #print(context)
    l = []
    for documents in context[:5]:
        if documents.state['query_similarity_score'] > 0.1:
            content = documents.page_content + str(documents.metadata)
            l.append(content)
    final_context = ''.join(l)
    if l != []:
        response = rag_chain.invoke({"query": query, "context": final_context})
    else:
        response = "No answer found, Please rephrase your question or you can contact us on https://www.i2econsulting.com/contact-us/"
    return response

if prompt:
    print("processing request")
    full_response=api_py_function(prompt)
    # full_response = response.text.replace(u"\u2000", "")
    # full_response=response.text.replace("\\n\\n","  \\n")
    # full_response = full_response.replace("\\n", "  \\n")
    st.write(full_response)