Spaces:
Sleeping
Sleeping
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyMuPDFLoader | |
| from langchain_community.vectorstores import Qdrant | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain.retrievers import MultiQueryRetriever | |
| from langchain.prompts import ChatPromptTemplate | |
| from langchain_openai import ChatOpenAI | |
| from langchain.schema.runnable.config import RunnableConfig | |
| from operator import itemgetter | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain.schema import StrOutputParser | |
| from dotenv import load_dotenv | |
| import os | |
| import chainlit as cl | |
| load_dotenv(os.getenv("OPENAI_API_KEY")) | |
| async def init(): | |
| files = None | |
| # Wait for the user to upload a PDF file | |
| while files is None: | |
| files = await cl.AskFileMessage( | |
| content="Please upload a PDF file to begin!", | |
| accept=["application/pdf"], | |
| max_size_mb=100, | |
| timeout=180, | |
| ).send() | |
| file = files[0] | |
| msg = cl.Message(content=f"Processing `{file.name}`...") | |
| await msg.send() | |
| loader = PyMuPDFLoader(file.path) | |
| documents = loader.load() | |
| chunk_size=2000 | |
| chunk_overlap = int(0.1 * chunk_size) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) | |
| documents = text_splitter.split_documents(documents) | |
| embeddings = OpenAIEmbeddings(model="text-embedding-3-small") | |
| qdrant_vector_store = Qdrant.from_documents( | |
| documents, | |
| embeddings, | |
| location=":memory:", | |
| collection_name="generic-document-store", | |
| ) | |
| retriever = qdrant_vector_store.as_retriever() | |
| primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True) | |
| retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=primary_qa_llm) | |
| template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know': | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| """ | |
| prompt = ChatPromptTemplate.from_template(template) | |
| runnable = ( | |
| {"context": itemgetter("question") | retriever, "question": itemgetter("question")} | |
| | RunnablePassthrough.assign(context=itemgetter("context")) | |
| | prompt | primary_qa_llm | StrOutputParser() | |
| ) | |
| cl.user_session.set("runnable", runnable) | |
| msg.content = f"`{file.name}` processed. You can now ask questions!" | |
| await msg.update() | |
| async def main(message): | |
| runnable = cl.user_session.get("runnable") | |
| msg = cl.Message(content="") | |
| async for chunk in runnable.astream( | |
| {"question": message.content}, | |
| config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]), | |
| ): | |
| await msg.stream_token(chunk) | |
| await msg.send() | |