Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| # from PyPDF2 import PdfReader | |
| from dotenv import load_dotenv | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.vectorstores import FAISS | |
| from langchain.document_loaders import PyPDFLoader | |
| from llama_index.llama_pack import download_llama_pack | |
| # download and install dependencies | |
| EmbeddedTablesUnstructuredRetrieverPack = download_llama_pack( | |
| "EmbeddedTablesUnstructuredRetrieverPack", "./embedded_tables_unstructured_pack" | |
| ) | |
| # import requests | |
| import subprocess | |
| # Function to read PDF content | |
| def read_pdf(file_path): | |
| print(f"Parámetros: file_path: {file_path}") | |
| pdf_link = file_path | |
| loader = PyPDFLoader(pdf_link, extract_images=False) | |
| data = loader.load_and_split() | |
| return data | |
| # Load environment variables | |
| load_dotenv() | |
| # Main Streamlit app | |
| def main(): | |
| # st.title("🤗💬 ChatPDF") | |
| archivo_pdf = st.file_uploader("Cargar archivo PDF", type=["pdf"]) | |
| with st.sidebar: | |
| st.title('🤗💬 ChatPDF') | |
| st.markdown(''' | |
| ## Instrucciones | |
| Cargar un archivo PDF. | |
| Esperar unos segundos y aparecerá la ventana de chat. | |
| Finalmente, comenzar a chatear con el PDF. | |
| ''') | |
| # custom_names = list(pdf_mapping.keys()) | |
| # selected_custom_name = st.sidebar.selectbox('Choose your PDF', ['', *custom_names]) | |
| # selected_actual_name = pdf_mapping.get(selected_custom_name) | |
| if archivo_pdf is not None: | |
| # # pdf_folder = "pdfs" | |
| # file_path = archivo_pdf#os.path.join(pdf_folder, selected_actual_name) | |
| file_path = os.path.join(os.getcwd(), archivo_pdf.name)# PyPDFLoader | |
| with open(file_path, "wb") as f: | |
| f.write(archivo_pdf.getvalue()) | |
| try: | |
| text = read_pdf(file_path) | |
| st.info("The content of the PDF is hidden. Type your query in the chat window.") | |
| except FileNotFoundError: | |
| st.error(f"No se encontró el archivo: {file_path}") | |
| return | |
| except Exception as e: | |
| st.error(f"Error durante la lectura del archivo: {e}") | |
| return | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=20, | |
| length_function=len, | |
| add_start_index = True, | |
| ) | |
| # Process the PDF text and create the documents list | |
| # documents = text_splitter.split_text(text=text) | |
| documents = text_splitter.split_documents(text) | |
| # Vectorize the documents and create vectorstore | |
| embeddings = OpenAIEmbeddings() | |
| # vectorstore = FAISS.from_texts(documents, embedding=embeddings) | |
| vectorstore = FAISS.from_documents(documents, embedding=embeddings) | |
| st.session_state.processed_data = { | |
| "document_chunks": documents, | |
| "vectorstore": vectorstore, | |
| } | |
| # Load the Langchain chatbot | |
| llm = ChatOpenAI(temperature=0, max_tokens=1000, model_name="gpt-3.5-turbo") | |
| qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever()) | |
| # Initialize Streamlit chat UI | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| if prompt := st.chat_input("Haz tus preguntas..."): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| result = qa({"question": prompt, "chat_history": [(message["role"], message["content"]) for message in st.session_state.messages]}) | |
| print(prompt) | |
| with st.chat_message("assistant"): | |
| message_placeholder = st.empty() | |
| full_response = result["answer"] | |
| message_placeholder.markdown(full_response + "|") | |
| message_placeholder.markdown(full_response) | |
| print(full_response) | |
| st.session_state.messages.append({"role": "assistant", "content": full_response}) | |
| if __name__ == "__main__": | |
| main() |