Building_a_RAG_ChatBot / 4.1_RAG_chroma_pdf_qa.py
EddyGiusepe's picture
Corrigir ainda ...
0b866d1
"""
Data Scientist.: Dr. Eddy Giusepe Chirinos Isidro
Link de estudo --> https://sophiamyang.medium.com/building-a-retrieval-augmented-generation-chatbot-d567a24fcd14
"""
import os
import tempfile
import panel as pn
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import panel as pn
from panel.chat import ChatInterface
pn.extension("perspective")
def initialize_chain():
if key_input.value:
os.environ["OPENAI_API_KEY"] = key_input.value
selections = (pdf_input.value, k_slider.value, chain_select.value)
if selections in pn.state.cache:
return pn.state.cache[selections]
chat_input.placeholder = "Ask questions here!"
# load document
with tempfile.NamedTemporaryFile("wb", delete=False) as f:
f.write(pdf_input.value)
file_name = f.name
loader = PyPDFLoader(file_name)
documents = loader.load()
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(
search_type="similarity", search_kwargs={"k": k_slider.value}
)
# create a chain to answer questions
qa = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type=chain_select.value,
retriever=retriever,
return_source_documents=True,
verbose=True,
)
return qa
async def respond(contents, user, chat_interface):
if not pdf_input.value:
chat_interface.send(
{"user": "System", "value": "Please first upload a PDF!"}, respond=False
)
return
elif chat_interface.active == 0:
chat_interface.active = 1
chat_interface.active_widget.placeholder = "Ask questions here!"
yield {"user": "OpenAI", "value": "Let's chat about the PDF!"}
return
qa = initialize_chain()
response = qa({"query": contents})
answers = pn.Column(response["result"])
answers.append(pn.layout.Divider())
for doc in response["source_documents"][::-1]:
answers.append(f"**Page {doc.metadata['page']}**:")
answers.append(f"```\n{doc.page_content}\n```")
yield {"user": "OpenAI", "value": answers}
pdf_input = pn.widgets.FileInput(accept=".pdf", value="", height=50)
key_input = pn.widgets.PasswordInput(
name="OpenAI Key",
placeholder="sk-...",
)
k_slider = pn.widgets.IntSlider(
name="Number of Relevant Chunks", start=1, end=5, step=1, value=2
)
chain_select = pn.widgets.RadioButtonGroup(
name="Chain Type", options=["stuff", "map_reduce", "refine", "map_rerank"]
)
chat_input = pn.widgets.TextInput(placeholder="First, upload a PDF!")
chat_interface = pn.chat.ChatInterface(
callback=respond, sizing_mode="stretch_width", widgets=[pdf_input, chat_input]
)
chat_interface.send(
{"user": "System", "value": "Please first upload a PDF and click send!"},
respond=False,
)
template = pn.template.BootstrapTemplate(
sidebar=[key_input, k_slider, chain_select], main=[chat_interface]
)
template.servable()