Spaces:
Sleeping
Sleeping
whymath
commited on
Commit
·
3a1e28f
1
Parent(s):
53867fd
Add PyPDF processing option
Browse files
app.py
CHANGED
|
@@ -48,17 +48,17 @@ async def main(message: cl.Message):
|
|
| 48 |
|
| 49 |
# Generate the response from the chain
|
| 50 |
if settings["current_mode"] == "rag_chain":
|
| 51 |
-
print("\nUsing RAG chain to answer query", user_query)
|
| 52 |
rag_chain = settings["rag_chain"]
|
| 53 |
query_response = rag_chain.invoke({"question" : user_query})
|
| 54 |
query_answer = query_response["response"].content
|
| 55 |
elif settings["current_mode"] == "ai_student_chain":
|
| 56 |
-
print("\nUsing AI student chain to answer query", user_query)
|
| 57 |
ai_student_chain = settings["ai_student_chain"]
|
| 58 |
query_response = ai_student_chain.invoke({"question" : user_query})
|
| 59 |
query_answer = query_response.content
|
| 60 |
else:
|
| 61 |
-
print("\nUsing base chain to answer query", user_query)
|
| 62 |
query_response = base_chain.invoke({"question" : user_query})
|
| 63 |
query_answer = query_response.content
|
| 64 |
|
|
@@ -91,7 +91,7 @@ async def upload_pdf_fn(action: cl.Action):
|
|
| 91 |
rag_instructions = aistudent_instructions
|
| 92 |
else:
|
| 93 |
rag_instructions = base_instructions
|
| 94 |
-
rag_chain = utils.create_rag_chain_from_file(openai_chat_model, rag_instructions, file_uploaded
|
| 95 |
settings["rag_chain"] = rag_chain
|
| 96 |
settings["current_mode"] = "rag_chain"
|
| 97 |
cl.user_session.set("settings", settings)
|
|
|
|
| 48 |
|
| 49 |
# Generate the response from the chain
|
| 50 |
if settings["current_mode"] == "rag_chain":
|
| 51 |
+
print("\nUsing RAG chain to answer query:", user_query)
|
| 52 |
rag_chain = settings["rag_chain"]
|
| 53 |
query_response = rag_chain.invoke({"question" : user_query})
|
| 54 |
query_answer = query_response["response"].content
|
| 55 |
elif settings["current_mode"] == "ai_student_chain":
|
| 56 |
+
print("\nUsing AI student chain to answer query:", user_query)
|
| 57 |
ai_student_chain = settings["ai_student_chain"]
|
| 58 |
query_response = ai_student_chain.invoke({"question" : user_query})
|
| 59 |
query_answer = query_response.content
|
| 60 |
else:
|
| 61 |
+
print("\nUsing base chain to answer query:", user_query)
|
| 62 |
query_response = base_chain.invoke({"question" : user_query})
|
| 63 |
query_answer = query_response.content
|
| 64 |
|
|
|
|
| 91 |
rag_instructions = aistudent_instructions
|
| 92 |
else:
|
| 93 |
rag_instructions = base_instructions
|
| 94 |
+
rag_chain = utils.create_rag_chain_from_file(openai_chat_model, rag_instructions, file_uploaded, file_uploaded.name)
|
| 95 |
settings["rag_chain"] = rag_chain
|
| 96 |
settings["current_mode"] = "rag_chain"
|
| 97 |
cl.user_session.set("settings", settings)
|
utils.py
CHANGED
|
@@ -6,6 +6,8 @@ from langchain_community.vectorstores import Qdrant
|
|
| 6 |
from langchain_core.prompts import ChatPromptTemplate
|
| 7 |
from operator import itemgetter
|
| 8 |
from langchain.schema.runnable import RunnablePassthrough
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
def tiktoken_len(text):
|
|
@@ -25,6 +27,18 @@ def chunk_documents(docs, tiktoken_len):
|
|
| 25 |
print('len(split_chunks) =', len(split_chunks))
|
| 26 |
return split_chunks
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def create_base_chain(openai_chat_model, system_prompt):
|
| 30 |
human_template = "{question}"
|
|
@@ -37,10 +51,11 @@ def create_base_chain(openai_chat_model, system_prompt):
|
|
| 37 |
return base_chain
|
| 38 |
|
| 39 |
|
| 40 |
-
def create_rag_chain_from_file(openai_chat_model, base_instructions,
|
| 41 |
|
| 42 |
# Load the documents from a PDF file using PyMuPDFLoader
|
| 43 |
-
docs = PyMuPDFLoader(
|
|
|
|
| 44 |
print("Loaded", len(docs), "documents")
|
| 45 |
print("First document:\n", docs[0], "\n")
|
| 46 |
|
|
|
|
| 6 |
from langchain_core.prompts import ChatPromptTemplate
|
| 7 |
from operator import itemgetter
|
| 8 |
from langchain.schema.runnable import RunnablePassthrough
|
| 9 |
+
from chainlit.types import AskFileResponse
|
| 10 |
+
from langchain.document_loaders import PyPDFLoader
|
| 11 |
|
| 12 |
|
| 13 |
def tiktoken_len(text):
|
|
|
|
| 27 |
print('len(split_chunks) =', len(split_chunks))
|
| 28 |
return split_chunks
|
| 29 |
|
| 30 |
+
def process_file(file: AskFileResponse):
|
| 31 |
+
import tempfile
|
| 32 |
+
|
| 33 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
|
| 34 |
+
with open(tempfile.name, "wb") as f:
|
| 35 |
+
f.write(file.content)
|
| 36 |
+
|
| 37 |
+
pypdf_loader = PyPDFLoader(tempfile.name)
|
| 38 |
+
texts = pypdf_loader.load_and_split()
|
| 39 |
+
texts = [text.page_content for text in texts]
|
| 40 |
+
return texts
|
| 41 |
+
|
| 42 |
|
| 43 |
def create_base_chain(openai_chat_model, system_prompt):
|
| 44 |
human_template = "{question}"
|
|
|
|
| 51 |
return base_chain
|
| 52 |
|
| 53 |
|
| 54 |
+
def create_rag_chain_from_file(openai_chat_model, base_instructions, file_response, file_name):
|
| 55 |
|
| 56 |
# Load the documents from a PDF file using PyMuPDFLoader
|
| 57 |
+
docs = PyMuPDFLoader(file_response.path).load()
|
| 58 |
+
# docs = process_file(file_response)
|
| 59 |
print("Loaded", len(docs), "documents")
|
| 60 |
print("First document:\n", docs[0], "\n")
|
| 61 |
|