whymath commited on
Commit
3a1e28f
·
1 Parent(s): 53867fd

Add PyPDF processing option

Browse files
Files changed (2) hide show
  1. app.py +4 -4
  2. utils.py +17 -2
app.py CHANGED
@@ -48,17 +48,17 @@ async def main(message: cl.Message):
48
 
49
  # Generate the response from the chain
50
  if settings["current_mode"] == "rag_chain":
51
- print("\nUsing RAG chain to answer query", user_query)
52
  rag_chain = settings["rag_chain"]
53
  query_response = rag_chain.invoke({"question" : user_query})
54
  query_answer = query_response["response"].content
55
  elif settings["current_mode"] == "ai_student_chain":
56
- print("\nUsing AI student chain to answer query", user_query)
57
  ai_student_chain = settings["ai_student_chain"]
58
  query_response = ai_student_chain.invoke({"question" : user_query})
59
  query_answer = query_response.content
60
  else:
61
- print("\nUsing base chain to answer query", user_query)
62
  query_response = base_chain.invoke({"question" : user_query})
63
  query_answer = query_response.content
64
 
@@ -91,7 +91,7 @@ async def upload_pdf_fn(action: cl.Action):
91
  rag_instructions = aistudent_instructions
92
  else:
93
  rag_instructions = base_instructions
94
- rag_chain = utils.create_rag_chain_from_file(openai_chat_model, rag_instructions, file_uploaded.path, file_uploaded.name)
95
  settings["rag_chain"] = rag_chain
96
  settings["current_mode"] = "rag_chain"
97
  cl.user_session.set("settings", settings)
 
48
 
49
  # Generate the response from the chain
50
  if settings["current_mode"] == "rag_chain":
51
+ print("\nUsing RAG chain to answer query:", user_query)
52
  rag_chain = settings["rag_chain"]
53
  query_response = rag_chain.invoke({"question" : user_query})
54
  query_answer = query_response["response"].content
55
  elif settings["current_mode"] == "ai_student_chain":
56
+ print("\nUsing AI student chain to answer query:", user_query)
57
  ai_student_chain = settings["ai_student_chain"]
58
  query_response = ai_student_chain.invoke({"question" : user_query})
59
  query_answer = query_response.content
60
  else:
61
+ print("\nUsing base chain to answer query:", user_query)
62
  query_response = base_chain.invoke({"question" : user_query})
63
  query_answer = query_response.content
64
 
 
91
  rag_instructions = aistudent_instructions
92
  else:
93
  rag_instructions = base_instructions
94
+ rag_chain = utils.create_rag_chain_from_file(openai_chat_model, rag_instructions, file_uploaded, file_uploaded.name)
95
  settings["rag_chain"] = rag_chain
96
  settings["current_mode"] = "rag_chain"
97
  cl.user_session.set("settings", settings)
utils.py CHANGED
@@ -6,6 +6,8 @@ from langchain_community.vectorstores import Qdrant
6
  from langchain_core.prompts import ChatPromptTemplate
7
  from operator import itemgetter
8
  from langchain.schema.runnable import RunnablePassthrough
 
 
9
 
10
 
11
  def tiktoken_len(text):
@@ -25,6 +27,18 @@ def chunk_documents(docs, tiktoken_len):
25
  print('len(split_chunks) =', len(split_chunks))
26
  return split_chunks
27
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def create_base_chain(openai_chat_model, system_prompt):
30
  human_template = "{question}"
@@ -37,10 +51,11 @@ def create_base_chain(openai_chat_model, system_prompt):
37
  return base_chain
38
 
39
 
40
- def create_rag_chain_from_file(openai_chat_model, base_instructions, file_path, file_name):
41
 
42
  # Load the documents from a PDF file using PyMuPDFLoader
43
- docs = PyMuPDFLoader(file_path).load()
 
44
  print("Loaded", len(docs), "documents")
45
  print("First document:\n", docs[0], "\n")
46
 
 
6
  from langchain_core.prompts import ChatPromptTemplate
7
  from operator import itemgetter
8
  from langchain.schema.runnable import RunnablePassthrough
9
+ from chainlit.types import AskFileResponse
10
+ from langchain.document_loaders import PyPDFLoader
11
 
12
 
13
  def tiktoken_len(text):
 
27
  print('len(split_chunks) =', len(split_chunks))
28
  return split_chunks
29
 
30
+ def process_file(file: AskFileResponse):
31
+ import tempfile
32
+
33
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
34
+ with open(tempfile.name, "wb") as f:
35
+ f.write(file.content)
36
+
37
+ pypdf_loader = PyPDFLoader(tempfile.name)
38
+ texts = pypdf_loader.load_and_split()
39
+ texts = [text.page_content for text in texts]
40
+ return texts
41
+
42
 
43
  def create_base_chain(openai_chat_model, system_prompt):
44
  human_template = "{question}"
 
51
  return base_chain
52
 
53
 
54
+ def create_rag_chain_from_file(openai_chat_model, base_instructions, file_response, file_name):
55
 
56
  # Load the documents from a PDF file using PyMuPDFLoader
57
+ docs = PyMuPDFLoader(file_response.path).load()
58
+ # docs = process_file(file_response)
59
  print("Loaded", len(docs), "documents")
60
  print("First document:\n", docs[0], "\n")
61