chat-with-pdf / app.py
arthikrangan's picture
Update app.py
f606f71 verified
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain.retrievers import MultiQueryRetriever
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.schema.runnable.config import RunnableConfig
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough
from langchain.schema import StrOutputParser
from dotenv import load_dotenv
import os
import chainlit as cl
load_dotenv(os.getenv("OPENAI_API_KEY"))
@cl.on_chat_start
async def init():
files = None
# Wait for the user to upload a PDF file
while files is None:
files = await cl.AskFileMessage(
content="Please upload a PDF file to begin!",
accept=["application/pdf"],
max_size_mb=100,
timeout=180,
).send()
file = files[0]
msg = cl.Message(content=f"Processing `{file.name}`...")
await msg.send()
loader = PyMuPDFLoader(file.path)
documents = loader.load()
chunk_size=2000
chunk_overlap = int(0.1 * chunk_size)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
documents = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
qdrant_vector_store = Qdrant.from_documents(
documents,
embeddings,
location=":memory:",
collection_name="generic-document-store",
)
retriever = qdrant_vector_store.as_retriever()
primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True)
retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=primary_qa_llm)
template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
Context:
{context}
Question:
{question}
"""
prompt = ChatPromptTemplate.from_template(template)
runnable = (
{"context": itemgetter("question") | retriever, "question": itemgetter("question")}
| RunnablePassthrough.assign(context=itemgetter("context"))
| prompt | primary_qa_llm | StrOutputParser()
)
cl.user_session.set("runnable", runnable)
msg.content = f"`{file.name}` processed. You can now ask questions!"
await msg.update()
@cl.on_message
async def main(message):
runnable = cl.user_session.get("runnable")
msg = cl.Message(content="")
async for chunk in runnable.astream(
{"question": message.content},
config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
):
await msg.stream_token(chunk)
await msg.send()