Spaces:
Sleeping
Sleeping
whymath
commited on
Commit
·
c1a97fb
1
Parent(s):
0981b86
Reverting to initial code to resolve Assistants API file_ids error
Browse files
app.py
CHANGED
|
@@ -3,68 +3,19 @@ import chainlit as cl
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
import utils
|
| 5 |
|
| 6 |
-
from openai import AsyncOpenAI
|
| 7 |
-
import time
|
| 8 |
-
from langchain.agents.openai_assistant import OpenAIAssistantRunnable
|
| 9 |
-
|
| 10 |
|
| 11 |
load_dotenv()
|
| 12 |
|
| 13 |
|
| 14 |
-
start_msg = "Teach2Learn Virtual Student by Jerry Chiang and Yohan Mathew\n\nYou can choose to upload a PDF, or just start chatting"
|
| 15 |
-
# instructions = "You are a helpful assistant"
|
| 16 |
-
instructions = "You are a virtual student being taught by the user. You can ask clarifying questions to better understand the user's explanation. Your goal is to ensure that the user understands the concept they are explaining. You can also ask questions to help the user elaborate on their explanation. You can ask questions like 'Can you explain that in simpler terms?' or 'Can you provide an example?'."
|
| 17 |
-
# client = AsyncOpenAI()
|
| 18 |
-
# assistant = client.beta.assistants.create(
|
| 19 |
-
# name="T2L Virtual Student",
|
| 20 |
-
# instructions=instructions,
|
| 21 |
-
# model="gpt-3.5-turbo",
|
| 22 |
-
# )
|
| 23 |
-
assistant = OpenAIAssistantRunnable.create_assistant(
|
| 24 |
-
name="T2L Virtual Student",
|
| 25 |
-
instructions=instructions,
|
| 26 |
-
model="gpt-3.5-turbo",
|
| 27 |
-
tools=[{"type": "code_interpreter"}],
|
| 28 |
-
tool_resources={
|
| 29 |
-
"code_interpreter": {
|
| 30 |
-
"file_ids": ["file-OKohewH4QI0GidqqJzxrkwpj"]
|
| 31 |
-
}
|
| 32 |
-
}
|
| 33 |
-
)
|
| 34 |
-
print("assistant =", assistant)
|
| 35 |
-
# thread = client.beta.threads.create()
|
| 36 |
-
# print("thread =", thread)
|
| 37 |
-
|
| 38 |
-
|
| 39 |
@cl.on_chat_start
|
| 40 |
async def start_chat():
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
# name="T2L Virtual Student",
|
| 47 |
-
# instructions=instructions,
|
| 48 |
-
# model="gpt-3.5-turbo",
|
| 49 |
-
# )
|
| 50 |
-
# thread = client.beta.threads.create()
|
| 51 |
-
|
| 52 |
-
# Store the assistant and thread in the user session
|
| 53 |
-
# settings = {
|
| 54 |
-
# "instructions": instructions,
|
| 55 |
-
# "client": client,
|
| 56 |
-
# "assistant": assistant,
|
| 57 |
-
# "thread": thread
|
| 58 |
-
# }
|
| 59 |
-
settings = {}
|
| 60 |
cl.user_session.set("settings", settings)
|
| 61 |
|
| 62 |
-
# Send a welcome message with an action button
|
| 63 |
-
# actions = [
|
| 64 |
-
# cl.Action(name="upload_pdf", value="upload_pdf_value", label="Upload a PDF", description="Upload a PDF")
|
| 65 |
-
# ]
|
| 66 |
-
# await cl.Message(content=start_msg, actions=actions).send()
|
| 67 |
-
|
| 68 |
|
| 69 |
@cl.on_message
|
| 70 |
async def main(message: cl.Message):
|
|
@@ -74,89 +25,13 @@ async def main(message: cl.Message):
|
|
| 74 |
|
| 75 |
# Get the chain from the user session
|
| 76 |
settings = cl.user_session.get("settings")
|
| 77 |
-
|
| 78 |
-
# client = settings["client"]
|
| 79 |
-
# assistant = settings["assistant"]
|
| 80 |
-
# thread = settings["thread"]
|
| 81 |
-
try:
|
| 82 |
-
raqa_chain = settings["raqa_chain"]
|
| 83 |
-
except KeyError:
|
| 84 |
-
raqa_chain = None
|
| 85 |
|
| 86 |
# Generate the response from the chain
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
query_answer = query_response["response"].content
|
| 91 |
-
else:
|
| 92 |
-
print("Using OpenAI assistant to generate response")
|
| 93 |
-
# message = client.beta.threads.messages.create(
|
| 94 |
-
# thread_id=thread.id,
|
| 95 |
-
# role="user",
|
| 96 |
-
# content=user_query
|
| 97 |
-
# )
|
| 98 |
-
# print("message =", message)
|
| 99 |
-
# run = client.beta.threads.runs.create(
|
| 100 |
-
# thread_id=thread.id,
|
| 101 |
-
# assistant_id=assistant.id,
|
| 102 |
-
# instructions=instructions
|
| 103 |
-
# )
|
| 104 |
-
# print("run =", run)
|
| 105 |
-
# while run.status == "in_progress" or run.status == "queued":
|
| 106 |
-
# time.sleep(1)
|
| 107 |
-
# run = client.beta.threads.runs.retrieve(
|
| 108 |
-
# thread_id=thread.id,
|
| 109 |
-
# run_id=run.id
|
| 110 |
-
# )
|
| 111 |
-
# print("run.status =", run.status)
|
| 112 |
-
# messages = client.beta.threads.messages.list(
|
| 113 |
-
# thread_id=thread.id
|
| 114 |
-
# )
|
| 115 |
-
# # print("messages =", messages)
|
| 116 |
-
# print("messages.data =", messages.data)
|
| 117 |
-
# query_answer = messages.data[0].content
|
| 118 |
-
|
| 119 |
-
query_response = assistant.invoke({"content": user_query})
|
| 120 |
-
query_answer = query_response["response"].content
|
| 121 |
-
|
| 122 |
-
# run = client.beta.threads.runs.create(
|
| 123 |
-
# thread_id=thread.id,
|
| 124 |
-
# assistant_id=assistant.id
|
| 125 |
-
# )
|
| 126 |
|
| 127 |
# Create and send the message stream
|
| 128 |
-
print('query_answer =', query_answer)
|
| 129 |
msg = cl.Message(content=query_answer)
|
| 130 |
await msg.send()
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
@cl.action_callback("upload_pdf")
|
| 134 |
-
async def upload_pdf_fn(action: cl.Action):
|
| 135 |
-
print("The user clicked on the action button!")
|
| 136 |
-
|
| 137 |
-
files = None
|
| 138 |
-
|
| 139 |
-
# Wait for the user to upload a file
|
| 140 |
-
while files == None:
|
| 141 |
-
files = await cl.AskFileMessage(
|
| 142 |
-
content="Waiting for file selection",
|
| 143 |
-
accept=["application/pdf"],
|
| 144 |
-
max_size_mb=20,
|
| 145 |
-
timeout=180,
|
| 146 |
-
).send()
|
| 147 |
-
|
| 148 |
-
file = files[0]
|
| 149 |
-
|
| 150 |
-
msg = cl.Message(
|
| 151 |
-
content=f"Processing `{file.name}`...", disable_human_feedback=True
|
| 152 |
-
)
|
| 153 |
-
await msg.send()
|
| 154 |
-
|
| 155 |
-
# Create the RAQA chain and store it in the user session
|
| 156 |
-
raqa_chain = utils.create_raqa_chain_from_docs(file)
|
| 157 |
-
settings = {
|
| 158 |
-
"raqa_chain": raqa_chain
|
| 159 |
-
}
|
| 160 |
-
cl.user_session.set("settings", settings)
|
| 161 |
-
|
| 162 |
-
return "Thank you for clicking on the action button!"
|
|
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
import utils
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
load_dotenv()
|
| 8 |
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
@cl.on_chat_start
|
| 11 |
async def start_chat():
|
| 12 |
+
# Create the RAQA chain and store it in the user session
|
| 13 |
+
raqa_chain = utils.create_raqa_chain_from_docs()
|
| 14 |
+
settings = {
|
| 15 |
+
"chain": raqa_chain
|
| 16 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
cl.user_session.set("settings", settings)
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
@cl.on_message
|
| 21 |
async def main(message: cl.Message):
|
|
|
|
| 25 |
|
| 26 |
# Get the chain from the user session
|
| 27 |
settings = cl.user_session.get("settings")
|
| 28 |
+
raqa_chain = settings["chain"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Generate the response from the chain
|
| 31 |
+
query_response = raqa_chain.invoke({"question" : user_query})
|
| 32 |
+
query_answer = query_response["response"].content
|
| 33 |
+
print('query_answer =', query_answer)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Create and send the message stream
|
|
|
|
| 36 |
msg = cl.Message(content=query_answer)
|
| 37 |
await msg.send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
CHANGED
|
@@ -28,35 +28,38 @@ def chunk_documents(docs, tiktoken_len):
|
|
| 28 |
return split_chunks
|
| 29 |
|
| 30 |
|
| 31 |
-
def create_raqa_chain_from_docs(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
#
|
| 34 |
-
# docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load() # TODO: Update this to enable user to upload PDF
|
| 35 |
-
# print("Loaded", len(docs), "documents")
|
| 36 |
-
# print(docs[0])
|
| 37 |
-
|
| 38 |
-
# Create a Qdrant vector store from the split chunks and embedding model, and obtain its retriever
|
| 39 |
split_chunks = chunk_documents(docs, tiktoken_len)
|
| 40 |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 41 |
qdrant_vectorstore = Qdrant.from_documents(
|
| 42 |
split_chunks,
|
| 43 |
embedding_model,
|
| 44 |
location=":memory:",
|
| 45 |
-
collection_name="
|
| 46 |
)
|
| 47 |
qdrant_retriever = qdrant_vectorstore.as_retriever()
|
| 48 |
|
| 49 |
# Define the RAG prompt template
|
|
|
|
| 50 |
RAG_PROMPT = """
|
| 51 |
-
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
{question}
|
|
|
|
|
|
|
| 55 |
"""
|
| 56 |
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
|
| 57 |
|
| 58 |
-
#
|
| 59 |
-
openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
|
| 60 |
retrieval_augmented_qa_chain = (
|
| 61 |
{"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
|
| 62 |
| RunnablePassthrough.assign(context=itemgetter("context"))
|
|
|
|
| 28 |
return split_chunks
|
| 29 |
|
| 30 |
|
| 31 |
+
def create_raqa_chain_from_docs():
|
| 32 |
+
# Load the documents from a PDF file using PyMuPDFLoader
|
| 33 |
+
# docs = PyMuPDFLoader("data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
|
| 34 |
+
docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
|
| 35 |
+
print("Loaded", len(docs), "documents")
|
| 36 |
+
print(docs[0])
|
| 37 |
|
| 38 |
+
# Chunk documents, load embedding model, create vectorstore and retriever
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
split_chunks = chunk_documents(docs, tiktoken_len)
|
| 40 |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 41 |
qdrant_vectorstore = Qdrant.from_documents(
|
| 42 |
split_chunks,
|
| 43 |
embedding_model,
|
| 44 |
location=":memory:",
|
| 45 |
+
collection_name="Meta 10-k Filings",
|
| 46 |
)
|
| 47 |
qdrant_retriever = qdrant_vectorstore.as_retriever()
|
| 48 |
|
| 49 |
# Define the RAG prompt template
|
| 50 |
+
openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
|
| 51 |
RAG_PROMPT = """
|
| 52 |
+
CONTEXT:
|
| 53 |
+
{context}
|
| 54 |
|
| 55 |
+
QUERY:
|
| 56 |
{question}
|
| 57 |
+
|
| 58 |
+
Use the provided context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, respond with "I don't know".
|
| 59 |
"""
|
| 60 |
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
|
| 61 |
|
| 62 |
+
# Define the RAQA chain
|
|
|
|
| 63 |
retrieval_augmented_qa_chain = (
|
| 64 |
{"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
|
| 65 |
| RunnablePassthrough.assign(context=itemgetter("context"))
|