Spaces:

Mdean77
/

ProductionRAG

Paused

App Files Files Community

Mdean77 commited on Oct 3, 2024

Commit

2d7499f

1 Parent(s): b29e219

Refactoring

Browse files

Files changed (3) hide show

.gitignore +2 -1
app.py +28 -35
prompts.py +26 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 DS_Store
-.env

 DS_Store
+.env
+cache/

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain.storage import LocalFileStore
 from langchain_qdrant import QdrantVectorStore
 from langchain.embeddings import CacheBackedEmbeddings
-from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.globals import set_llm_cache
 from langchain_openai import ChatOpenAI
 from langchain_core.caches import InMemoryCache
@@ -24,7 +24,7 @@ from operator import itemgetter
 from langchain_core.runnables.passthrough import RunnablePassthrough
 from langchain_core.runnables.config import RunnableConfig
 import uuid
 load_dotenv()
@@ -36,8 +36,7 @@ load_dotenv()
 """
 GLOBAL CODE HERE
 """
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
-Loader = PyMuPDFLoader
 # Typical Embedding Model
 core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
@@ -61,43 +60,40 @@ vectorstore = QdrantVectorStore(
     collection_name=collection_name,
     embedding=cached_embedder)
-rag_system_prompt_template = """\
-You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existence of context.
-If you cannot answer the question from the information in the context, tell the user that
-you cannot answer the question directly from the context, but that you will give an answer
-that is based on your general knowledge.
-"""
-rag_message_list = [
-    {"role" : "system", "content" : rag_system_prompt_template},
-]
-rag_user_prompt_template = """
-Question:
-{question}
-Context:
-{context}
-"""
-chat_prompt = ChatPromptTemplate.from_messages([
-    ("system", rag_system_prompt_template),
-    ("human", rag_user_prompt_template)
-])
 chat_model = ChatOpenAI(model="gpt-4o")
 set_llm_cache(InMemoryCache())
 def split_file(file: AskFileMessage):
      import tempfile
      with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
         with open(tempfile.name, "wb") as f:
             f.write(file.content)
-    #  separate_pages = []
      loader = Loader(tempfile.name)
      documents = loader.load()
-    #  separate_pages.extend(page)
-    #  one_document = ""
-    #  for page in separate_pages:
-        #  one_document+= page.page_content
      docs = text_splitter.split_documents(documents)
      for i, doc in enumerate(docs):
         doc.metadata["source"] = f"source_{id}"
@@ -125,13 +121,10 @@ async def on_chat_start():
     )
     await msg.send()
-    docs = split_file(file)
     vectorstore.add_documents(docs)
     retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 15})
     retrieval_augmented_qa_chain = (
         {"context": itemgetter("question") | retriever, "question": itemgetter("question")}

 from langchain.storage import LocalFileStore
 from langchain_qdrant import QdrantVectorStore
 from langchain.embeddings import CacheBackedEmbeddings
 from langchain_core.globals import set_llm_cache
 from langchain_openai import ChatOpenAI
 from langchain_core.caches import InMemoryCache
 from langchain_core.runnables.passthrough import RunnablePassthrough
 from langchain_core.runnables.config import RunnableConfig
 import uuid
+from prompts import chat_prompt
 load_dotenv()
 """
 GLOBAL CODE HERE
 """
 # Typical Embedding Model
 core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
     collection_name=collection_name,
     embedding=cached_embedder)
+# rag_system_prompt_template = """\
+# You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existence of context.
+# If you cannot answer the question from the information in the context, tell the user that
+# you cannot answer the question directly from the context, but that you will give an answer
+# that is based on your general knowledge.
+# """
+# rag_message_list = [
+#     {"role" : "system", "content" : rag_system_prompt_template},
+# ]
+# rag_user_prompt_template = """
+# Question:
+# {question}
+# Context:
+# {context}
+# """
+# chat_prompt = ChatPromptTemplate.from_messages([
+#     ("system", rag_system_prompt_template),
+#     ("human", rag_user_prompt_template)
+# ])
 chat_model = ChatOpenAI(model="gpt-4o")
 set_llm_cache(InMemoryCache())
 def split_file(file: AskFileMessage):
      import tempfile
+     text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+     Loader = PyMuPDFLoader
      with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
         with open(tempfile.name, "wb") as f:
             f.write(file.content)
      loader = Loader(tempfile.name)
      documents = loader.load()
      docs = text_splitter.split_documents(documents)
      for i, doc in enumerate(docs):
         doc.metadata["source"] = f"source_{id}"
     )
     await msg.send()
+    docs = split_file(file)
     vectorstore.add_documents(docs)
     retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 15})
     retrieval_augmented_qa_chain = (
         {"context": itemgetter("question") | retriever, "question": itemgetter("question")}

prompts.py ADDED Viewed

	@@ -0,0 +1,26 @@

+## Contains prompts, welcome messages, etc.
+from langchain_core.prompts import ChatPromptTemplate
+rag_system_prompt_template = """\
+You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existence of context.
+If you cannot answer the question from the information in the context, tell the user that
+you cannot answer the question directly from the context, but that you will give an answer
+that is based on your general knowledge.
+"""
+rag_message_list = [
+    {"role" : "system", "content" : rag_system_prompt_template},
+]
+rag_user_prompt_template = """
+Question:
+{question}
+Context:
+{context}
+"""
+chat_prompt = ChatPromptTemplate.from_messages([
+    ("system", rag_system_prompt_template),
+    ("human", rag_user_prompt_template)
+])