Spaces:

arthikrangan
/

chat-with-pdf

Sleeping

App Files Files Community

chat-with-pdf / app.py

arthikrangan

Update app.py

f606f71 verified almost 2 years ago

raw

history blame contribute delete

2.97 kB

	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyMuPDFLoader

	from langchain_community.vectorstores import Qdrant

	from langchain_openai import OpenAIEmbeddings

	from langchain.retrievers import MultiQueryRetriever

	from langchain.prompts import ChatPromptTemplate
	from langchain_openai import ChatOpenAI
	from langchain.schema.runnable.config import RunnableConfig

	from operator import itemgetter
	from langchain_core.runnables import RunnablePassthrough
	from langchain.schema import StrOutputParser

	from dotenv import load_dotenv

	import os
	import chainlit as cl

	load_dotenv(os.getenv("OPENAI_API_KEY"))

	@cl.on_chat_start
	async def init():
	files = None

	# Wait for the user to upload a PDF file
	while files is None:
	files = await cl.AskFileMessage(
	content="Please upload a PDF file to begin!",
	accept=["application/pdf"],
	max_size_mb=100,
	timeout=180,
	).send()

	file = files[0]

	msg = cl.Message(content=f"Processing `{file.name}`...")
	await msg.send()

	loader = PyMuPDFLoader(file.path)
	documents = loader.load()

	chunk_size=2000
	chunk_overlap = int(0.1 * chunk_size)
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
	documents = text_splitter.split_documents(documents)

	embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

	qdrant_vector_store = Qdrant.from_documents(
	documents,
	embeddings,
	location=":memory:",
	collection_name="generic-document-store",
	)

	retriever = qdrant_vector_store.as_retriever()

	primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True)

	retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=primary_qa_llm)

	template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':

	Context:
	{context}

	Question:
	{question}
	"""
	prompt = ChatPromptTemplate.from_template(template)

	runnable = (
	{"context": itemgetter("question") \| retriever, "question": itemgetter("question")}
	\| RunnablePassthrough.assign(context=itemgetter("context"))
	\| prompt \| primary_qa_llm \| StrOutputParser()
	)

	cl.user_session.set("runnable", runnable)
	msg.content = f"`{file.name}` processed. You can now ask questions!"

	await msg.update()


	@cl.on_message
	async def main(message):
	runnable = cl.user_session.get("runnable")
	msg = cl.Message(content="")
	async for chunk in runnable.astream(
	{"question": message.content},
	config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
	):
	await msg.stream_token(chunk)

	await msg.send()