Spaces:
Sleeping
Sleeping
| # Imports | |
| from langchain.document_loaders import PyPDFLoader | |
| import os | |
| from langchain.chains import RetrievalQA, ConversationalRetrievalChain | |
| from langchain.indexes import VectorstoreIndexCreator | |
| from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter | |
| from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain import HuggingFacePipeline | |
| from langchain.chat_models import ChatOpenAI | |
| from dotenv import load_dotenv | |
| from langchain.memory import ConversationBufferMemory, ConversationTokenBufferMemory | |
| import gradio as gr | |
| # Funcion de carga de la api key | |
| def process_key(api_key): | |
| os.environ['OPENAI_API_KEY'] = api_key | |
| def load_pdf(file): | |
| name_file = file.name | |
| print(file.name) | |
| loader = PyPDFLoader(file.name) | |
| documents = loader.load() | |
| print(documents) | |
| # Creo el objeto que permite dividir el texto en chunks | |
| text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=64) | |
| # Esto lo que hace es dividir el texto en chunks de 2048 caracteres con un overlap de 128 caracteres | |
| texts = text_splitter.split_documents(documents) | |
| # Genero el objeto que crea los embeddings | |
| # Nota: Estos embeddings son gratuitos a diferencia de los de OpenAI | |
| embeddings = HuggingFaceEmbeddings() | |
| # Defino el modelo de lenguaje | |
| llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.0, max_tokens=1000) | |
| # Creo la base de datos de vectores | |
| global vectorstore | |
| vectorstore = Chroma.from_documents(texts, embeddings) | |
| # Defino la memoria | |
| global memory | |
| # La definicion de Memoria no es trivial, es bastante compleja de hecho se deben especificar bien todos los parameteros para que no de error | |
| memory = ConversationTokenBufferMemory(llm=llm, | |
| memory_key="chat_history", | |
| input_key='question', | |
| output_key='answer', | |
| max_token_limit=1000, | |
| return_messages=False) | |
| # Defino la cadena de qa | |
| global qa | |
| qa = ConversationalRetrievalChain.from_llm(llm, | |
| vectorstore.as_retriever(search_kwargs={'k': 3}), # Este parametro especifica cuantos chunks se van a recuperar | |
| return_source_documents=True, | |
| verbose=True, | |
| chain_type='stuff', | |
| memory=memory, | |
| max_tokens_limit=2500, | |
| get_chat_history=lambda h: h) | |
| return 'Done' | |
| # Funcion que ejecuta LLM y responde la pregunta | |
| def answer_question(question): | |
| result = qa(inputs={'question': question}) | |
| pages = [x.metadata['page'] for i, x in enumerate(result['source_documents'])] | |
| return result['answer'], pages | |
| # Funcion que pega las respuestas anteriores en el objeto Chat bot | |
| def bot(history): | |
| res = qa( | |
| { | |
| 'question': history[-1][0], | |
| 'chat_history': history[:-1] | |
| } | |
| ) | |
| history[-1][1] = res['answer'] | |
| return history | |
| # Agrego el texto a la historia del chat | |
| def add_text(history, text): | |
| history = history + [(text, None)] | |
| return history, "" | |
| # Analizar como parsea las ecuaciones | |
| with gr.Blocks() as demo: | |
| with gr.Tab(label='Load PDF'): | |
| with gr.Row(): | |
| with gr.Column(): | |
| open_ai_key = gr.Textbox(label='Ingresa tu api key de Open AI', type='password') | |
| with gr.Row(): | |
| with gr.Column(scale=0.4): | |
| api_key_button = gr.Button('Enviar', variant='primary') | |
| with gr.Row(): | |
| pdf_file = gr.File(label='PDF file') | |
| # Esta linea esta para probar si el calculo se realiza | |
| emb = gr.Textbox(label='Calculo de Embeddings, por favor espere...') | |
| # send_pdf = gr.Button(label='Load PDF').style(full_width=False) | |
| with gr.Row(): | |
| with gr.Column(scale=0.50): | |
| send_pdf = gr.Button(label='Load PDF') | |
| send_pdf.click(load_pdf, pdf_file, emb) | |
| with gr.Tab(label='Galicia QA Demo'): | |
| chatbot = gr.Chatbot([], | |
| elem_id="chatbot", | |
| label='Document GPT').style(height=500) | |
| with gr.Row(): | |
| with gr.Column(scale=0.80): | |
| txt = gr.Textbox( | |
| show_label=False, | |
| placeholder="Enter text and press enter", | |
| ).style(container=False) | |
| with gr.Column(scale=0.10): | |
| submit_btn = gr.Button( | |
| 'Submit', | |
| variant='primary' | |
| ) | |
| with gr.Column(scale=0.10): | |
| clear_btn = gr.Button( | |
| 'Clear', | |
| variant='stop' | |
| ) | |
| # Tanto el submit (hacer enter en el campo de texto) como el submit_btn hacen la misma accion | |
| txt.submit(fn=add_text, inputs=[chatbot, txt], outputs=[chatbot, txt] # Cuando envio el submit hago esta funcion | |
| ).then(fn=bot, inputs=chatbot, outputs=chatbot) # Luego hago esta otra funcion | |
| submit_btn.click(fn=add_text, inputs=[chatbot, txt], outputs=[chatbot, txt] | |
| ).then(fn=bot, inputs=chatbot, outputs=chatbot) | |
| clear_btn.click(lambda: None, None, chatbot, queue=False) | |
| api_key_button.click(fn=process_key, inputs=[open_ai_key], outputs=None) | |
| demo.launch(inline=False) |