Spaces:

ymath
/

Teach2LearnVirtualStudent

Sleeping

App Files Files Community

whymath commited on May 16, 2024

Commit

40e1b4d

1 Parent(s): 7010433

Creating base and RAG chains, adding upload and switch default buttons

Browse files

Files changed (2) hide show

app.py +49 -42
utils.py +23 -78

app.py CHANGED Viewed

@@ -2,30 +2,36 @@
 import chainlit as cl
 from dotenv import load_dotenv
 import utils
 load_dotenv()
-start_msg = "Teach2Learn Virtual Student by Jerry Chiang and Yohan Mathew\n\nYou can choose to upload a PDF, or just start chatting"
-# Create the RAQA chain and store it in the user session
-raqa_chain = utils.create_raqa_chain_from_docs()
 @cl.on_chat_start
 async def start_chat():
-    # # Create the RAQA chain and store it in the user session
-    # raqa_chain = utils.create_raqa_chain_from_docs()
-    # settings = {
-    #     "chain": raqa_chain
-    # }
-    # cl.user_session.set("settings", settings)
     print("Chat started")
-    # Send a welcome message with an action button
     actions = [
-        cl.Action(name="upload_pdf", value="upload_pdf_value", label="Upload a PDF", description="Upload a PDF")
     ]
     await cl.Message(content=start_msg, actions=actions).send()
@@ -34,58 +40,59 @@ async def start_chat():
 async def main(message: cl.Message):
     # Print the message content
     user_query = message.content
-    print('\nuser_query =', user_query)
-    # Get the chain from the user session
-    try:
-        settings = cl.user_session.get("settings")
-        raqa_chain_upload = settings["raqa_chain_upload"]
-    except Exception as e:
-        print("Error fetching chain from session, defaulting to base chain", e)
-        raqa_chain_upload = None
     # Generate the response from the chain
-    if raqa_chain_upload:
-        print("\nUsing UPLOAD chain to answer query", user_query)
-        query_response = raqa_chain_upload.invoke({"question" : user_query})
     else:
-        print("\nUsing DEFAULT chain to answer query", user_query)
-        query_response = raqa_chain.invoke({"question" : user_query})
-    query_answer = query_response["response"].content
-    print('query_answer =', query_answer, '\n')
-    # Create and send the message stream
     msg = cl.Message(content=query_answer)
     await msg.send()
 @cl.action_callback("upload_pdf")
 async def upload_pdf_fn(action: cl.Action):
-    print("\nThe user clicked on an action button!")
-    files = None
     # Wait for the user to upload a file
     while files == None:
         files = await cl.AskFileMessage(
-            content="Processing your file",
             accept=["application/pdf"],
             max_size_mb=20,
             timeout=180,
         ).send()
     file_uploaded = files[0]
     print("\nUploaded file:", file_uploaded, "\n")
-    # Create the RAQA chain and store it in the user session
-    filepath_uploaded = file_uploaded.path
-    filename_uploaded = file_uploaded.name
-    raqa_chain_upload = utils.create_raqa_chain_from_file(filepath_uploaded, filename_uploaded)
-    settings = {
-        "raqa_chain_upload": raqa_chain_upload
-    }
     cl.user_session.set("settings", settings)
-    msg = cl.Message(content="Thank you for uploading!")
     await msg.send()

 import chainlit as cl
 from dotenv import load_dotenv
 import utils
+from langchain_openai import ChatOpenAI
 load_dotenv()
+start_msg = "Hello! I'm Teach2Learn VirtualStudent, a virtual student peer by Jerry Chiang and Yohan Mathew\n\nYou can choose to upload a PDF, or just start chatting\n"
+base_instructions = """
+Assume you are a virtual student being taught by the user. Your goal is to ensure that the user understands the concept they are explaining.
+You should always first let the user know if they are correct or not, and then ask them questions to help them learn by teaching rather than explaining things to them.
+If they ask for feedback, you should provide constructive feedback on the whole conversation instead of asking another question.
+"""
+openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
+base_chain = utils.create_base_chain(openai_chat_model, base_instructions)
 @cl.on_chat_start
 async def start_chat():
     print("Chat started")
+    # Set the user session settings
+    settings = {
+        "rag_chain_available": False
+    }
+    cl.user_session.set("settings", settings)
+    # Send a welcome message with action buttons
     actions = [
+        cl.Action(name="upload_pdf", value="upload_pdf_value", label="Upload a PDF", description="Upload a PDF"),
+        cl.Action(name="switch_default", value="switch_default_value", label="Switch back to default mode", description="Switch back to default mode")
     ]
     await cl.Message(content=start_msg, actions=actions).send()
 async def main(message: cl.Message):
     # Print the message content
     user_query = message.content
+    settings = cl.user_session.get("settings")
     # Generate the response from the chain
+    if settings["rag_chain_available"]:
+        print("\nUsing RAG chain to answer query", user_query)
+        rag_chain = settings["rag_chain"]
+        query_response = rag_chain.invoke({"question" : user_query})
+        query_answer = query_response["response"].content
     else:
+        print("\nUsing base chain to answer query", user_query)
+        query_response = base_chain.invoke({"question" : user_query})
+        query_answer = query_response.content
+    # Create and send the message stream
+    print('query_answer =', query_answer, '\n')
     msg = cl.Message(content=query_answer)
     await msg.send()
 @cl.action_callback("upload_pdf")
 async def upload_pdf_fn(action: cl.Action):
+    print("\nRunning PDF upload and RAG chain creation")
     # Wait for the user to upload a file
+    files = None
     while files == None:
         files = await cl.AskFileMessage(
+            content="Processing your file...",
             accept=["application/pdf"],
             max_size_mb=20,
             timeout=180,
         ).send()
     file_uploaded = files[0]
     print("\nUploaded file:", file_uploaded, "\n")
+    # Create the RAG chain and store it in the user session
+    rag_chain = utils.create_rag_chain_from_file(openai_chat_model, base_instructions, file_uploaded.path, file_uploaded.name)
+    settings = cl.user_session.get("settings")
+    settings["rag_chain"] = rag_chain
+    settings["rag_chain_available"] = True
+    cl.user_session.set("settings", settings)
+    msg = cl.Message(content="Ready to discuss the uploaded PDF file!")
+    await msg.send()
+@cl.action_callback("switch_default")
+async def switch_default_fn(action: cl.Action):
+    print("\nSwitching back to default base chain")
+    settings = cl.user_session.get("settings")
+    settings["rag_chain_available"] = False
     cl.user_session.set("settings", settings)
+    msg = cl.Message(content="Okay, I'm back to answering general questions. What would you like to try teaching me next?")
     await msg.send()

utils.py CHANGED Viewed

@@ -4,9 +4,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import Qdrant
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_openai import ChatOpenAI
 from operator import itemgetter
-# from langchain.schema.output_parser import StrOutputParser
 from langchain.schema.runnable import RunnablePassthrough
@@ -23,77 +21,28 @@ def chunk_documents(docs, tiktoken_len):
         chunk_overlap = 0,
         length_function = tiktoken_len,
     )
     split_chunks = text_splitter.split_documents(docs)
     print('len(split_chunks) =', len(split_chunks))
     return split_chunks
-def create_raqa_chain_from_docs():
-    # Load the documents from a PDF file using PyMuPDFLoader
-    # docs = PyMuPDFLoader("data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
-    docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
-    # Print the number of loaded documents
-    print("Loaded", len(docs), "documents")
-    # Print the first document
-    print(docs[0])
-    # Split the documents into chunks based on their length
-    split_chunks = chunk_documents(docs, tiktoken_len)
-    # Create an instance of the OpenAIEmbeddings model for text embeddings
-    embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
-    # Create a Qdrant vector store from the split chunks
-    qdrant_vectorstore = Qdrant.from_documents(
-        split_chunks,
-        embedding_model,
-        location=":memory:",
-        collection_name="Meta 10-k Filings",
-    )
-    # Create a retriever from the Qdrant vector store
-    qdrant_retriever = qdrant_vectorstore.as_retriever()
-    # Define the RAG prompt template
-    RAG_PROMPT = """
-    CONTEXT:
-    {context}
-    QUERY:
-    {question}
-    Use the provided context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, respond with "I don't know".
-    """
-    # Create a ChatPromptTemplate instance from the RAG prompt template
-    rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
-    # Create an instance of the ChatOpenAI model
-    openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
-    # Define the retrieval augmented QA chain
-    retrieval_augmented_qa_chain = (
-        {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
-        | RunnablePassthrough.assign(context=itemgetter("context"))
-        | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
-    )
-    print("Created retrieval augmented QA chain from default PDF file")
-    return retrieval_augmented_qa_chain
-def create_raqa_chain_from_file(filepath_uploaded, filename_uploaded):
-    # # Load the documents from a PDF file using PyMuPDFLoader
-    # docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
-    docs = PyMuPDFLoader(filepath_uploaded).load()
     print("Loaded", len(docs), "documents")
-    print(docs[0])
     # Create a Qdrant vector store from the split chunks and embedding model, and obtain its retriever
     split_chunks = chunk_documents(docs, tiktoken_len)
@@ -102,35 +51,31 @@ def create_raqa_chain_from_file(filepath_uploaded, filename_uploaded):
         split_chunks,
         embedding_model,
         location=":memory:",
-        collection_name="LoadedPDF",
     )
     qdrant_retriever = qdrant_vectorstore.as_retriever()
     # Define the RAG prompt template
-    # RAG_PROMPT = """
-    # Assume you are a virtual student being taught by the user. You can ask clarifying questions to better understand the user's explanation. Your goal is to ensure that the user understands the concept they are explaining. You can also ask questions to help the user elaborate on their explanation. You can ask questions like "Can you explain that in simpler terms?" or "Can you provide an example?".
-    # USER MESSAGE:
-    # {question}
-    # """
     RAG_PROMPT = """
-    CONTEXT:
-    {context}
     QUERY:
     {question}
-    Use the provided context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, respond with "I don't know".
     """
     rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
     # Create the retrieval augmented QA chain using the Qdrant retriever, RAG prompt, and OpenAI chat model
-    openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
-    retrieval_augmented_qa_chain = (
         {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
         | RunnablePassthrough.assign(context=itemgetter("context"))
         | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
     )
-    print("Created retrieval augmented QA chain from uploaded PDF file =", filename_uploaded, "\n")
-    return retrieval_augmented_qa_chain

 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain_community.vectorstores import Qdrant
 from langchain_core.prompts import ChatPromptTemplate
 from operator import itemgetter
 from langchain.schema.runnable import RunnablePassthrough
         chunk_overlap = 0,
         length_function = tiktoken_len,
     )
     split_chunks = text_splitter.split_documents(docs)
     print('len(split_chunks) =', len(split_chunks))
     return split_chunks
+def create_base_chain(openai_chat_model, base_instructions):
+    human_template = "{question}"
+    base_prompt = ChatPromptTemplate.from_messages([
+        ("system", base_instructions),
+        ("human", human_template)
+    ])
+    base_chain = base_prompt | openai_chat_model
+    print("Created base chain\n")
+    return base_chain
+def create_rag_chain_from_file(openai_chat_model, base_instructions, file_path, file_name):
+    # Load the documents from a PDF file using PyMuPDFLoader
+    docs = PyMuPDFLoader(file_path).load()
     print("Loaded", len(docs), "documents")
+    print("First document:\n", docs[0], "\n")
     # Create a Qdrant vector store from the split chunks and embedding model, and obtain its retriever
     split_chunks = chunk_documents(docs, tiktoken_len)
         split_chunks,
         embedding_model,
         location=":memory:",
+        collection_name=file_name,
     )
     qdrant_retriever = qdrant_vectorstore.as_retriever()
+    print("Created Qdrant vector store from uploaded PDF file =", file_name)
     # Define the RAG prompt template
     RAG_PROMPT = """
+    Use the provided context while replying to the user query. Only use the provided context to answer the query.
     QUERY:
     {question}
+    CONTEXT:
+    {context}
     """
+    RAG_PROMPT = base_instructions + RAG_PROMPT
+    print("RAG prompt template =", RAG_PROMPT)
     rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
     # Create the retrieval augmented QA chain using the Qdrant retriever, RAG prompt, and OpenAI chat model
+    rag_chain = (
         {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
         | RunnablePassthrough.assign(context=itemgetter("context"))
         | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
     )
+    print("Created RAG chain from uploaded PDF file =", file_name, "\n")
+    return rag_chain