Spaces:

ymath
/

Teach2LearnVirtualStudent

Sleeping

whymath commited on May 16, 2024

Commit

3a1e28f

1 Parent(s): 53867fd

Add PyPDF processing option

Files changed (2) hide show

app.py CHANGED Viewed

@@ -48,17 +48,17 @@ async def main(message: cl.Message):
     # Generate the response from the chain
     if settings["current_mode"] == "rag_chain":
-        print("\nUsing RAG chain to answer query", user_query)
         rag_chain = settings["rag_chain"]
         query_response = rag_chain.invoke({"question" : user_query})
         query_answer = query_response["response"].content
     elif settings["current_mode"] == "ai_student_chain":
-        print("\nUsing AI student chain to answer query", user_query)
         ai_student_chain = settings["ai_student_chain"]
         query_response = ai_student_chain.invoke({"question" : user_query})
         query_answer = query_response.content
     else:
-        print("\nUsing base chain to answer query", user_query)
         query_response = base_chain.invoke({"question" : user_query})
         query_answer = query_response.content
@@ -91,7 +91,7 @@ async def upload_pdf_fn(action: cl.Action):
         rag_instructions = aistudent_instructions
     else:
         rag_instructions = base_instructions
-    rag_chain = utils.create_rag_chain_from_file(openai_chat_model, rag_instructions, file_uploaded.path, file_uploaded.name)
     settings["rag_chain"] = rag_chain
     settings["current_mode"] = "rag_chain"
     cl.user_session.set("settings", settings)

     # Generate the response from the chain
     if settings["current_mode"] == "rag_chain":
+        print("\nUsing RAG chain to answer query:", user_query)
         rag_chain = settings["rag_chain"]
         query_response = rag_chain.invoke({"question" : user_query})
         query_answer = query_response["response"].content
     elif settings["current_mode"] == "ai_student_chain":
+        print("\nUsing AI student chain to answer query:", user_query)
         ai_student_chain = settings["ai_student_chain"]
         query_response = ai_student_chain.invoke({"question" : user_query})
         query_answer = query_response.content
     else:
+        print("\nUsing base chain to answer query:", user_query)
         query_response = base_chain.invoke({"question" : user_query})
         query_answer = query_response.content
         rag_instructions = aistudent_instructions
     else:
         rag_instructions = base_instructions
+    rag_chain = utils.create_rag_chain_from_file(openai_chat_model, rag_instructions, file_uploaded, file_uploaded.name)
     settings["rag_chain"] = rag_chain
     settings["current_mode"] = "rag_chain"
     cl.user_session.set("settings", settings)

utils.py CHANGED Viewed

@@ -6,6 +6,8 @@ from langchain_community.vectorstores import Qdrant
 from langchain_core.prompts import ChatPromptTemplate
 from operator import itemgetter
 from langchain.schema.runnable import RunnablePassthrough
 def tiktoken_len(text):
@@ -25,6 +27,18 @@ def chunk_documents(docs, tiktoken_len):
     print('len(split_chunks) =', len(split_chunks))
     return split_chunks
 def create_base_chain(openai_chat_model, system_prompt):
     human_template = "{question}"
@@ -37,10 +51,11 @@ def create_base_chain(openai_chat_model, system_prompt):
     return base_chain
-def create_rag_chain_from_file(openai_chat_model, base_instructions, file_path, file_name):
     # Load the documents from a PDF file using PyMuPDFLoader
-    docs = PyMuPDFLoader(file_path).load()
     print("Loaded", len(docs), "documents")
     print("First document:\n", docs[0], "\n")

 from langchain_core.prompts import ChatPromptTemplate
 from operator import itemgetter
 from langchain.schema.runnable import RunnablePassthrough
+from chainlit.types import AskFileResponse
+from langchain.document_loaders import PyPDFLoader
 def tiktoken_len(text):
     print('len(split_chunks) =', len(split_chunks))
     return split_chunks
+def process_file(file: AskFileResponse):
+    import tempfile
+    with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
+        with open(tempfile.name, "wb") as f:
+            f.write(file.content)
+    pypdf_loader = PyPDFLoader(tempfile.name)
+    texts = pypdf_loader.load_and_split()
+    texts = [text.page_content for text in texts]
+    return texts
 def create_base_chain(openai_chat_model, system_prompt):
     human_template = "{question}"
     return base_chain
+def create_rag_chain_from_file(openai_chat_model, base_instructions, file_response, file_name):
     # Load the documents from a PDF file using PyMuPDFLoader
+    docs = PyMuPDFLoader(file_response.path).load()
+    # docs = process_file(file_response)
     print("Loaded", len(docs), "documents")
     print("First document:\n", docs[0], "\n")