import os
import streamlit as st
# from PyPDF2 import PdfReader
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader

from llama_index.llama_pack import download_llama_pack


# download and install dependencies
EmbeddedTablesUnstructuredRetrieverPack = download_llama_pack(
    "EmbeddedTablesUnstructuredRetrieverPack", "./embedded_tables_unstructured_pack"
)

# import requests
import subprocess


# Function to read PDF content
def read_pdf(file_path):
    print(f"Parámetros: file_path: {file_path}")
    pdf_link = file_path
    loader = PyPDFLoader(pdf_link, extract_images=False)
    data = loader.load_and_split()
    return data


# Load environment variables
load_dotenv()


# Main Streamlit app
def main():
    # st.title("🤗💬 ChatPDF")
    archivo_pdf = st.file_uploader("Cargar archivo PDF", type=["pdf"])
    
    with st.sidebar:
        st.title('🤗💬 ChatPDF')
        st.markdown('''
        ## Instrucciones
        Cargar un archivo PDF.

        Esperar unos segundos y aparecerá la ventana de chat.
        
        Finalmente, comenzar a chatear con el PDF.
        ''')


    # custom_names = list(pdf_mapping.keys())

    # selected_custom_name = st.sidebar.selectbox('Choose your PDF', ['', *custom_names])

    # selected_actual_name = pdf_mapping.get(selected_custom_name)

    if archivo_pdf is not None:
        # # pdf_folder = "pdfs"
        # file_path = archivo_pdf#os.path.join(pdf_folder, selected_actual_name)
        file_path = os.path.join(os.getcwd(), archivo_pdf.name)# PyPDFLoader
        with open(file_path, "wb") as f:
            f.write(archivo_pdf.getvalue())
        
        try:
            text = read_pdf(file_path)
            st.info("The content of the PDF is hidden. Type your query in the chat window.")
        except FileNotFoundError:
            st.error(f"No se encontró el archivo: {file_path}")
            return
        except Exception as e:
            st.error(f"Error durante la lectura del archivo: {e}")
            return

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=20,
            length_function=len,
            add_start_index = True,
        )
        
        # Process the PDF text and create the documents list
        # documents = text_splitter.split_text(text=text)
        documents = text_splitter.split_documents(text)

        # Vectorize the documents and create vectorstore
        embeddings = OpenAIEmbeddings()
        # vectorstore = FAISS.from_texts(documents, embedding=embeddings)
        vectorstore = FAISS.from_documents(documents, embedding=embeddings)

        st.session_state.processed_data = {
            "document_chunks": documents,
            "vectorstore": vectorstore,
        }

        
        # Load the Langchain chatbot
        llm = ChatOpenAI(temperature=0, max_tokens=1000, model_name="gpt-3.5-turbo")
        qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever())

        # Initialize Streamlit chat UI
        if "messages" not in st.session_state:
            st.session_state.messages = []

        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])

        if prompt := st.chat_input("Haz tus preguntas..."):
            st.session_state.messages.append({"role": "user", "content": prompt})
            with st.chat_message("user"):
                st.markdown(prompt)

            result = qa({"question": prompt, "chat_history": [(message["role"], message["content"]) for message in st.session_state.messages]})
            print(prompt)

            with st.chat_message("assistant"):
                message_placeholder = st.empty()
                full_response = result["answer"]
                message_placeholder.markdown(full_response + "|")
            message_placeholder.markdown(full_response)
            print(full_response)
            st.session_state.messages.append({"role": "assistant", "content": full_response})

if __name__ == "__main__":
    main()