Spaces:

Mdean77
/

ProductionRAG

Paused

App Files Files Community

Mdean77 commited on Oct 3, 2024

Commit

31f3de1

1 Parent(s): 8723368

Fixed app

Browse files

Files changed (2) hide show

.chainlit/config.toml +84 -0
app.py +140 -5

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,84 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = true
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the conversation is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+[meta]
+generated_by = "0.7.700"

app.py CHANGED Viewed

@@ -5,25 +5,160 @@
 IMPORTS HERE
 """
 import chainlit as cl
 ### Global Section ###
 """
 GLOBAL CODE HERE
 """
 ### On Chat Start (Session Start) Section ###
 @cl.on_chat_start
 async def on_chat_start():
     """ SESSION SPECIFIC CODE HERE """
-### Rename Chains ###
-@cl.author_rename
-def rename(orig_author: str):
-    """ RENAME CODE HERE """
 ### On Message Section ###
 @cl.on_message
 async def main(message: cl.Message):
     """
     MESSAGE CODE HERE
-    """

 IMPORTS HERE
 """
 import chainlit as cl
+import os
+from dotenv import load_dotenv
+from chainlit import AskFileMessage
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyMuPDFLoader
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import Distance, VectorParams
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain.storage import LocalFileStore
+from langchain_qdrant import QdrantVectorStore
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.globals import set_llm_cache
+from langchain_openai import ChatOpenAI
+from langchain_core.caches import InMemoryCache
+from operator import itemgetter
+from langchain_core.runnables.passthrough import RunnablePassthrough
+from langchain_core.runnables.config import RunnableConfig
+import uuid
+load_dotenv()
+os.environ["LANGCHAIN_PROJECT"] = f"Mike HF Production Rag - {uuid.uuid4().hex[0:8]}"
+os.environ["LANGCHAIN_TRACING_V2"] = "false"
+os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
 ### Global Section ###
 """
 GLOBAL CODE HERE
 """
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+Loader = PyMuPDFLoader
+# Typical Embedding Model
+core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+# Typical QDrant Client Set-up
+collection_name = f"pdf_to_parse_{uuid.uuid4()}"
+client = QdrantClient(":memory:")
+client.create_collection(
+    collection_name=collection_name,
+    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
+)
+# Adding cache!
+store = LocalFileStore("./cache/")
+cached_embedder = CacheBackedEmbeddings.from_bytes_store(
+    core_embeddings, store, namespace=core_embeddings.model
+)
+# Typical QDrant Vector Store Set-up
+vectorstore = QdrantVectorStore(
+    client=client,
+    collection_name=collection_name,
+    embedding=cached_embedder)
+rag_system_prompt_template = """\
+You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existence of context.
+"""
+rag_message_list = [
+    {"role" : "system", "content" : rag_system_prompt_template},
+]
+rag_user_prompt_template = """
+Question:
+{question}
+Context:
+{context}
+"""
+chat_prompt = ChatPromptTemplate.from_messages([
+    ("system", rag_system_prompt_template),
+    ("human", rag_user_prompt_template)
+])
+chat_model = ChatOpenAI(model="gpt-4o")
+set_llm_cache(InMemoryCache())
+def split_file(file: AskFileMessage):
+     import tempfile
+     with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
+        with open(tempfile.name, "wb") as f:
+            f.write(file.content)
+    #  separate_pages = []
+     loader = Loader(tempfile.name)
+     documents = loader.load()
+    #  separate_pages.extend(page)
+    #  one_document = ""
+    #  for page in separate_pages:
+        #  one_document+= page.page_content
+     docs = text_splitter.split_documents(documents)
+     for i, doc in enumerate(docs):
+        doc.metadata["source"] = f"source_{id}"
+     return docs
 ### On Chat Start (Session Start) Section ###
 @cl.on_chat_start
 async def on_chat_start():
     """ SESSION SPECIFIC CODE HERE """
+    files = None
+    # Wait for the user to upload a file
+    while files == None:
+        files = await cl.AskFileMessage(
+            content="Please upload a PDF File file to begin!",
+            accept=["application/pdf"],
+            max_size_mb=20,
+            timeout=180,
+        ).send()
+    file = files[0]
+    msg = cl.Message(
+        content=f"Processing `{file.name}`...", disable_human_feedback=True
+    )
+    await msg.send()
+    docs = split_file(file)
+    vectorstore.add_documents(docs)
+    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 15})
+    retrieval_augmented_qa_chain = (
+        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
+        | RunnablePassthrough.assign(context=itemgetter("context"))
+        | chat_prompt | chat_model
+    )
+    msg.content = f"Processing `{file.name}` done. You can now ask questions!"
+    await msg.update()
+    cl.user_session.set("chain", retrieval_augmented_qa_chain)
+# ### Rename Chains ###
+# @cl.author_rename
+# def rename(orig_author: str):
+#     """ RENAME CODE HERE """
 ### On Message Section ###
 @cl.on_message
 async def main(message: cl.Message):
     """
     MESSAGE CODE HERE
+    """
+    chain = cl.user_session.get("chain")
+    msg = cl.Message(content="")
+    async for stream_response in chain.astream(
+        {"question":message.content},
+        config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()])
+    ):
+        await msg.stream_token(stream_response.content)
+    await msg.send()