Spaces:
Sleeping
Sleeping
File size: 4,391 Bytes
810f6e5 d8fa018 fee4b6c a26048f 658d39c 810f6e5 56f4ff9 435e1e6 56f4ff9 a26048f 837924a 435e1e6 a26048f d1ad982 293d55c 658d39c a26048f 810f6e5 5dcf9d3 a5930f5 810f6e5 5dcf9d3 810f6e5 5dcf9d3 e8f6569 8c8901e e8f6569 810f6e5 a5930f5 810f6e5 a5930f5 810f6e5 a5930f5 810f6e5 a5930f5 658d39c 810f6e5 5dcf9d3 810f6e5 5dcf9d3 810f6e5 7dc4e70 810f6e5 286044f 810f6e5 e8f6569 810f6e5 750e91f 810f6e5 7dc4e70 810f6e5 5dcf9d3 810f6e5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | import os
import streamlit as st
# from PyPDF2 import PdfReader
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from llama_index.llama_pack import download_llama_pack
# download and install dependencies
EmbeddedTablesUnstructuredRetrieverPack = download_llama_pack(
"EmbeddedTablesUnstructuredRetrieverPack", "./embedded_tables_unstructured_pack"
)
# import requests
import subprocess
# Function to read PDF content
def read_pdf(file_path):
print(f"Parámetros: file_path: {file_path}")
pdf_link = file_path
loader = PyPDFLoader(pdf_link, extract_images=False)
data = loader.load_and_split()
return data
# Load environment variables
load_dotenv()
# Main Streamlit app
def main():
# st.title("🤗💬 ChatPDF")
archivo_pdf = st.file_uploader("Cargar archivo PDF", type=["pdf"])
with st.sidebar:
st.title('🤗💬 ChatPDF')
st.markdown('''
## Instrucciones
Cargar un archivo PDF.
Esperar unos segundos y aparecerá la ventana de chat.
Finalmente, comenzar a chatear con el PDF.
''')
# custom_names = list(pdf_mapping.keys())
# selected_custom_name = st.sidebar.selectbox('Choose your PDF', ['', *custom_names])
# selected_actual_name = pdf_mapping.get(selected_custom_name)
if archivo_pdf is not None:
# # pdf_folder = "pdfs"
# file_path = archivo_pdf#os.path.join(pdf_folder, selected_actual_name)
file_path = os.path.join(os.getcwd(), archivo_pdf.name)# PyPDFLoader
with open(file_path, "wb") as f:
f.write(archivo_pdf.getvalue())
try:
text = read_pdf(file_path)
st.info("The content of the PDF is hidden. Type your query in the chat window.")
except FileNotFoundError:
st.error(f"No se encontró el archivo: {file_path}")
return
except Exception as e:
st.error(f"Error durante la lectura del archivo: {e}")
return
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=20,
length_function=len,
add_start_index = True,
)
# Process the PDF text and create the documents list
# documents = text_splitter.split_text(text=text)
documents = text_splitter.split_documents(text)
# Vectorize the documents and create vectorstore
embeddings = OpenAIEmbeddings()
# vectorstore = FAISS.from_texts(documents, embedding=embeddings)
vectorstore = FAISS.from_documents(documents, embedding=embeddings)
st.session_state.processed_data = {
"document_chunks": documents,
"vectorstore": vectorstore,
}
# Load the Langchain chatbot
llm = ChatOpenAI(temperature=0, max_tokens=1000, model_name="gpt-3.5-turbo")
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever())
# Initialize Streamlit chat UI
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Haz tus preguntas..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
result = qa({"question": prompt, "chat_history": [(message["role"], message["content"]) for message in st.session_state.messages]})
print(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = result["answer"]
message_placeholder.markdown(full_response + "|")
message_placeholder.markdown(full_response)
print(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
if __name__ == "__main__":
main() |