Spaces:
Sleeping
Sleeping
whymath
commited on
Commit
·
7010433
1
Parent(s):
0f173ea
Primary fixes for upload functionality
Browse files- .gitignore +1 -0
- app.py +65 -11
- utils.py +50 -0
.gitignore
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
bkp/
|
| 2 |
.chainlit/
|
|
|
|
| 3 |
|
| 4 |
# Byte-compiled / optimized / DLL files
|
| 5 |
__pycache__/
|
|
|
|
| 1 |
bkp/
|
| 2 |
.chainlit/
|
| 3 |
+
.files/
|
| 4 |
|
| 5 |
# Byte-compiled / optimized / DLL files
|
| 6 |
__pycache__/
|
app.py
CHANGED
|
@@ -7,31 +7,85 @@ import utils
|
|
| 7 |
load_dotenv()
|
| 8 |
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
@cl.on_chat_start
|
| 11 |
async def start_chat():
|
| 12 |
-
# Create the RAQA chain and store it in the user session
|
| 13 |
-
raqa_chain = utils.create_raqa_chain_from_docs()
|
| 14 |
-
settings = {
|
| 15 |
-
|
| 16 |
-
}
|
| 17 |
-
cl.user_session.set("settings", settings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
@cl.on_message
|
| 21 |
async def main(message: cl.Message):
|
| 22 |
# Print the message content
|
| 23 |
user_query = message.content
|
| 24 |
-
print('
|
| 25 |
|
| 26 |
# Get the chain from the user session
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Generate the response from the chain
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
query_answer = query_response["response"].content
|
| 33 |
-
print('query_answer =', query_answer)
|
| 34 |
|
| 35 |
# Create and send the message stream
|
| 36 |
msg = cl.Message(content=query_answer)
|
| 37 |
await msg.send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
load_dotenv()
|
| 8 |
|
| 9 |
|
| 10 |
+
start_msg = "Teach2Learn Virtual Student by Jerry Chiang and Yohan Mathew\n\nYou can choose to upload a PDF, or just start chatting"
|
| 11 |
+
|
| 12 |
+
# Create the RAQA chain and store it in the user session
|
| 13 |
+
raqa_chain = utils.create_raqa_chain_from_docs()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
@cl.on_chat_start
|
| 17 |
async def start_chat():
|
| 18 |
+
# # Create the RAQA chain and store it in the user session
|
| 19 |
+
# raqa_chain = utils.create_raqa_chain_from_docs()
|
| 20 |
+
# settings = {
|
| 21 |
+
# "chain": raqa_chain
|
| 22 |
+
# }
|
| 23 |
+
# cl.user_session.set("settings", settings)
|
| 24 |
+
print("Chat started")
|
| 25 |
+
|
| 26 |
+
# Send a welcome message with an action button
|
| 27 |
+
actions = [
|
| 28 |
+
cl.Action(name="upload_pdf", value="upload_pdf_value", label="Upload a PDF", description="Upload a PDF")
|
| 29 |
+
]
|
| 30 |
+
await cl.Message(content=start_msg, actions=actions).send()
|
| 31 |
|
| 32 |
|
| 33 |
@cl.on_message
|
| 34 |
async def main(message: cl.Message):
|
| 35 |
# Print the message content
|
| 36 |
user_query = message.content
|
| 37 |
+
print('\nuser_query =', user_query)
|
| 38 |
|
| 39 |
# Get the chain from the user session
|
| 40 |
+
try:
|
| 41 |
+
settings = cl.user_session.get("settings")
|
| 42 |
+
raqa_chain_upload = settings["raqa_chain_upload"]
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print("Error fetching chain from session, defaulting to base chain", e)
|
| 45 |
+
raqa_chain_upload = None
|
| 46 |
|
| 47 |
# Generate the response from the chain
|
| 48 |
+
if raqa_chain_upload:
|
| 49 |
+
print("\nUsing UPLOAD chain to answer query", user_query)
|
| 50 |
+
query_response = raqa_chain_upload.invoke({"question" : user_query})
|
| 51 |
+
else:
|
| 52 |
+
print("\nUsing DEFAULT chain to answer query", user_query)
|
| 53 |
+
query_response = raqa_chain.invoke({"question" : user_query})
|
| 54 |
query_answer = query_response["response"].content
|
| 55 |
+
print('query_answer =', query_answer, '\n')
|
| 56 |
|
| 57 |
# Create and send the message stream
|
| 58 |
msg = cl.Message(content=query_answer)
|
| 59 |
await msg.send()
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
@cl.action_callback("upload_pdf")
|
| 63 |
+
async def upload_pdf_fn(action: cl.Action):
|
| 64 |
+
print("\nThe user clicked on an action button!")
|
| 65 |
+
|
| 66 |
+
files = None
|
| 67 |
+
|
| 68 |
+
# Wait for the user to upload a file
|
| 69 |
+
while files == None:
|
| 70 |
+
files = await cl.AskFileMessage(
|
| 71 |
+
content="Processing your file",
|
| 72 |
+
accept=["application/pdf"],
|
| 73 |
+
max_size_mb=20,
|
| 74 |
+
timeout=180,
|
| 75 |
+
).send()
|
| 76 |
+
|
| 77 |
+
file_uploaded = files[0]
|
| 78 |
+
print("\nUploaded file:", file_uploaded, "\n")
|
| 79 |
+
|
| 80 |
+
# Create the RAQA chain and store it in the user session
|
| 81 |
+
filepath_uploaded = file_uploaded.path
|
| 82 |
+
filename_uploaded = file_uploaded.name
|
| 83 |
+
raqa_chain_upload = utils.create_raqa_chain_from_file(filepath_uploaded, filename_uploaded)
|
| 84 |
+
|
| 85 |
+
settings = {
|
| 86 |
+
"raqa_chain_upload": raqa_chain_upload
|
| 87 |
+
}
|
| 88 |
+
cl.user_session.set("settings", settings)
|
| 89 |
+
|
| 90 |
+
msg = cl.Message(content="Thank you for uploading!")
|
| 91 |
+
await msg.send()
|
utils.py
CHANGED
|
@@ -82,5 +82,55 @@ def create_raqa_chain_from_docs():
|
|
| 82 |
| RunnablePassthrough.assign(context=itemgetter("context"))
|
| 83 |
| {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
|
| 84 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
return retrieval_augmented_qa_chain
|
|
|
|
| 82 |
| RunnablePassthrough.assign(context=itemgetter("context"))
|
| 83 |
| {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
|
| 84 |
)
|
| 85 |
+
print("Created retrieval augmented QA chain from default PDF file")
|
| 86 |
+
|
| 87 |
+
return retrieval_augmented_qa_chain
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def create_raqa_chain_from_file(filepath_uploaded, filename_uploaded):
|
| 91 |
+
|
| 92 |
+
# # Load the documents from a PDF file using PyMuPDFLoader
|
| 93 |
+
# docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
|
| 94 |
+
docs = PyMuPDFLoader(filepath_uploaded).load()
|
| 95 |
+
print("Loaded", len(docs), "documents")
|
| 96 |
+
print(docs[0])
|
| 97 |
+
|
| 98 |
+
# Create a Qdrant vector store from the split chunks and embedding model, and obtain its retriever
|
| 99 |
+
split_chunks = chunk_documents(docs, tiktoken_len)
|
| 100 |
+
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 101 |
+
qdrant_vectorstore = Qdrant.from_documents(
|
| 102 |
+
split_chunks,
|
| 103 |
+
embedding_model,
|
| 104 |
+
location=":memory:",
|
| 105 |
+
collection_name="LoadedPDF",
|
| 106 |
+
)
|
| 107 |
+
qdrant_retriever = qdrant_vectorstore.as_retriever()
|
| 108 |
+
|
| 109 |
+
# Define the RAG prompt template
|
| 110 |
+
# RAG_PROMPT = """
|
| 111 |
+
# Assume you are a virtual student being taught by the user. You can ask clarifying questions to better understand the user's explanation. Your goal is to ensure that the user understands the concept they are explaining. You can also ask questions to help the user elaborate on their explanation. You can ask questions like "Can you explain that in simpler terms?" or "Can you provide an example?".
|
| 112 |
+
|
| 113 |
+
# USER MESSAGE:
|
| 114 |
+
# {question}
|
| 115 |
+
# """
|
| 116 |
+
RAG_PROMPT = """
|
| 117 |
+
CONTEXT:
|
| 118 |
+
{context}
|
| 119 |
+
|
| 120 |
+
QUERY:
|
| 121 |
+
{question}
|
| 122 |
+
|
| 123 |
+
Use the provided context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, respond with "I don't know".
|
| 124 |
+
"""
|
| 125 |
+
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
|
| 126 |
+
|
| 127 |
+
# Create the retrieval augmented QA chain using the Qdrant retriever, RAG prompt, and OpenAI chat model
|
| 128 |
+
openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
|
| 129 |
+
retrieval_augmented_qa_chain = (
|
| 130 |
+
{"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
|
| 131 |
+
| RunnablePassthrough.assign(context=itemgetter("context"))
|
| 132 |
+
| {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
|
| 133 |
+
)
|
| 134 |
+
print("Created retrieval augmented QA chain from uploaded PDF file =", filename_uploaded, "\n")
|
| 135 |
|
| 136 |
return retrieval_augmented_qa_chain
|